Skip to content

Upload glossaries CSV to GC bucket and update glossaries in Google Translate API #29

Upload glossaries CSV to GC bucket and update glossaries in Google Translate API

Upload glossaries CSV to GC bucket and update glossaries in Google Translate API #29

name: Upload glossaries CSV to GC bucket and update glossaries in Google Translate API
on:
workflow_dispatch:
push:
branches:
- main
paths:
- 'glossaries-for-google-translate-api/**'
jobs:
upload_csv_glossaries_to_bucket_and_update_glossaries:
runs-on: ubuntu-latest
steps:
- name: Load secrets from 1Password
id: onepw_secrets
uses: 1password/load-secrets-action@v2.0.0
with:
export-env: true # Export loaded secrets as environment variables
env:
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }}
GCLOUD_SERVICE_ACCOUNT: "op://rbiv7rvkkrsdlpcrz3bmv7nmcu/nyg5diiky4yheorujjdm5h3g6a/ctjnao73yan4bktrxmcafidfdy"
GCLOUD_SERVICE_ACCOUNT_JSON: "op://rbiv7rvkkrsdlpcrz3bmv7nmcu/nyg5diiky4yheorujjdm5h3g6a/Minifyied JSON"
GCLOUD_PROJECT_ID: "op://rbiv7rvkkrsdlpcrz3bmv7nmcu/nyg5diiky4yheorujjdm5h3g6a/Project ID"
- name: Checkout code
uses: actions/checkout@v4.1.4
# Authorize GCP, although this step might be superfluous because of next step
- name: Authorize GCP
id: authorizegcp
uses: 'google-github-actions/auth@v2'
with:
credentials_json: ${{ env.GCLOUD_SERVICE_ACCOUNT_JSON }}
# Authenticate with GCP
- name: Set up Google Cloud SDK
id: authenticategcp
uses: google-github-actions/setup-gcloud@v2.1.0
with:
version: '>= 470.0.0'
project_id: ${{ env.GCLOUD_PROJECT_ID }}
install_components: 'gcloud,gsutil'
# Upload the file to GCP Bucket. They are uploaded at the root of the bucket, existing files will be replaced without confirmation.
- name: Upload files
id: uploadfiles
run: |
for file in $GITHUB_WORKSPACE/glossaries-for-google-translate-api/*; do
filename=$(basename "$file")
gsutil cp "$file" gs://glossaries.mobilitydata.org/${filename}
done
- name: Udpate glossaries
if: ${{ success() }}
id: updateglossaries
run: |
access_token=$(gcloud auth application-default print-access-token | sed '/^$/q')
# First we need to delete the existing glossaries in Google Translate API. Updating them means adding to the end of what's existing and is not the desired result.
for file in $GITHUB_WORKSPACE/glossaries-for-google-translate-api/*; do
# Extract the necessary data from the filename
filename=$(basename "$file")
file_sans_ext="${filename%.*}"
# Delete existing glossary
delete_url="https://translation.googleapis.com/v3/projects/810644617646/locations/us-central1/glossaries/${file_sans_ext}"
curl -X DELETE "$delete_url" \
-H "Authorization: Bearer $access_token" \
-H "Content-Type: application/json"
done
# We have to wait since the previous step is an asynchronous request and we don't want to create a glossary that's currently being deleted.
sleep 600 # This will sleep for 10 minutes (600 seconds)
# Create the new glossaries
for file in $GITHUB_WORKSPACE/glossaries-for-google-translate-api/*; do
# Extract the necessary data from the filename
filename=$(basename "$file")
file_sans_ext="${filename%.*}"
language_code="${file_sans_ext##*-}"
# Construct the arguments and JSON payload
patch_url="https://translation.googleapis.com/v3/projects/810644617646/locations/us-central1/glossaries/${file_sans_ext}?update_mask=input_config,display_name"
gloss_name="projects/810644617646/locations/us-central1/glossaries/${file_sans_ext}"
gcs_source_uri="gs://glossaries.mobilitydata.org/$filename"
json_data="{ \"name\": \"$gloss_name\", \"languagePair\": { \"sourceLanguageCode\": \"en\", \"targetLanguageCode\": \"$language_code\" } , \"inputConfig\": { \"gcsSource\": { \"inputUri\": \"$gcs_source_uri\" } } }"
# Create new glossary
curl -X POST "https://translation.googleapis.com/v3/projects/810644617646/locations/us-central1/glossaries" \
-H "Authorization: Bearer $access_token" \
-H "Content-Type: application/json" \
-d "$json_data"
done