From f52b71b39767fe14e3da5eefd352e62028c23143 Mon Sep 17 00:00:00 2001 From: Megan Davidson <33814653+MDavidson17@users.noreply.github.com> Date: Fri, 25 Aug 2023 16:16:54 +1200 Subject: [PATCH] feat: add thumbnail workflow for topo maps TDE-840 (#155) Thumbnail workflow created for the processing of the topo50 and 250 maps **971** topo50 thumbnails in 8m31s (including pod pending time): - [workflow](https://argo.linzaccess.com/workflows/argo/generate-thumbnails-topo50-250-44q4g?tab=workflow&nodeId=artifact:s3:s3.amazonaws.com:linz-workflow-artifacts:2023-08/22-generate-thumbnails-topo50-250-44q4g/) - `s3://linz-workflow-artifacts/2023-08/22-generate-thumbnails-topo50-250-44q4g/intermediate/` --- workflows/util/README.md | 49 ++++++++ workflows/util/create-thumbnails.yaml | 156 ++++++++++++++++++++++++++ 2 files changed, 205 insertions(+) create mode 100644 workflows/util/README.md create mode 100644 workflows/util/create-thumbnails.yaml diff --git a/workflows/util/README.md b/workflows/util/README.md new file mode 100644 index 00000000..573a02d6 --- /dev/null +++ b/workflows/util/README.md @@ -0,0 +1,49 @@ +# Contents: + +- [create-thumbnails](#Create-Thumbnails) + +# Create-Thumbnails + +This workflow generates thumbnails for the topo50 and topo250 GeoTiffs and TIFF. +The thumbnails are used on the LINZ website at the [Topo50](https://www.linz.govt.nz/products-services/maps/new-zealand-topographic-maps/topo50-map-chooser) and [Topo250](https://www.linz.govt.nz/products-services/maps/new-zealand-topographic-maps/topo250-map-chooser) Map Chooser Pages. + +Thumbnailing uses two gdal_translate steps. +Upon completion all standardised TIFF and STAC files will be located within the `./intermediate/` directory of the workflow in the artifacts bucket. From here the thumbnails can be moved to `s3://linz-topographic` using the publish-copy workflow. + +## Workflow Input Parameters + +| Parameter | Type | Default | Description | +| ----------- | ---- | --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| source | str | s3://linz-topgraphic/maps/topo50/ | the uri (path) to the input tiffs | +| target | str | s3://linz- | the target uri (path) to copy the input and thumbnails | +| transform | str | f | String to be transformed from source to target to renamed filenames, e.g. `f.replace("text to replace", "new_text_to_use")`. Leave as `f` for no transformation. | +| copy-option | str | no-clobber | `--no-clobber` Skip overwriting existing files. `--force` Overwrite all files. `--force-no-clobber` Overwrite only changed files, skip unchanged files. tiffs | + +## Workflow Outputs + +The output thumbnails will be located within the `./intermediate/` directory of the workflow in the artifacts bucket. + +## Workflow Description + +```mermaid +graph TD; + aws-list-->create-thumbnails; + get-location-->create-thumbnails; + create-thumbnails-->publish-source; + create-thumbnails-->publish-thumbnails; +``` + +### [aws-list](https://github.com/linz/argo-tasks/blob/master/src/commands/list/list.ts) + +Recursively loops through the provided source path and lists all the files within this location. Some listing parameters are currently hard-coded due to the current bespoke purpose of this workflow: + +- group: `"2000"` +- include: `".*.*.tif?$"` + +### [get-location](./standardising.yaml) + +Finds the output location of this workflow within the artifacts bucket. + +### [create-thumbnails](https://github.com/linz/topo-imagery/blob/master/scripts/thumbnails.py) + +Runs the bespoke gdal_translate commands to generate thumbnails of the topo50 & topo250 Maps. diff --git a/workflows/util/create-thumbnails.yaml b/workflows/util/create-thumbnails.yaml new file mode 100644 index 00000000..d526eac7 --- /dev/null +++ b/workflows/util/create-thumbnails.yaml @@ -0,0 +1,156 @@ +--- +apiVersion: argoproj.io/v1alpha1 +kind: WorkflowTemplate +metadata: + name: create-thumbnails + namespace: argo +spec: + parallelism: 50 + nodeSelector: + karpenter.sh/capacity-type: "spot" + entrypoint: main + arguments: + parameters: + # FIXME: Should use camelCase or underscore? + - name: version-argo-tasks + value: "v2" + - name: version-topo-imagery + value: "v3" + - name: source + value: "s3://linz-topographic-upload/maps/topo50/" + - name: target + value: "s3://linz-" + - name: transform + value: "f" + - name: copy-option + value: "--no-clobber" + templateDefaults: + container: + imagePullPolicy: Always + templates: + - name: main + dag: + tasks: + - name: aws-list + template: aws-list + + - name: get-location + template: get-location + + - name: thumbnails + template: thumbnails + arguments: + parameters: + - name: target + value: "{{tasks.get-location.outputs.parameters.location}}thumbnails/" + artifacts: + - name: files + from: "{{ tasks.aws-list.outputs.artifacts.files }}" + depends: "aws-list && get-location" + + - name: publish-source + templateRef: + name: publish-copy + template: main + arguments: + parameters: + - name: source + value: "{{workflow.parameters.source}}" + - name: include + value: ".*.*.tif?$" + - name: group + value: "1000" + - name: group-size + value: "100Gi" + depends: "thumbnails" + + - name: publish-thumbnails + templateRef: + name: publish-copy + template: main + arguments: + parameters: + - name: source + value: "{{tasks.get-location.outputs.parameters.location}}thumbnails/" + - name: include + value: ".jpg?$" + - name: group + value: "1000" + - name: group-size + value: "100Gi" + depends: "thumbnails" + # END TEMPLATE `main` + + - name: aws-list + inputs: + container: + image: 019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:argo-tasks-{{workflow.parameters.version-argo-tasks}} + command: [node, /app/index.js] + env: + - name: AWS_ROLE_CONFIG_PATH + value: s3://linz-bucket-config/config.json + args: + [ + "list", + "--verbose", + "--include", + ".*.*.tif?$", + "--group", + "2000", + "--output", + "/tmp/file_list.json", + "{{workflow.parameters.source}}", + ] + outputs: + artifacts: + - name: files + path: /tmp/file_list.json + + - name: thumbnails + retryStrategy: + limit: "2" + nodeSelector: + karpenter.sh/capacity-type: "spot" + inputs: + parameters: + - name: target + artifacts: + - name: files + path: /tmp/file_list.json + container: + image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:topo-imagery-{{=sprig.trim(workflow.parameters['version-topo-imagery'])}}" + resources: + requests: + memory: 7.8Gi + cpu: 15000m + ephemeral-storage: 3Gi + volumeMounts: + - name: ephemeral + mountPath: "/tmp" + command: + - python + - "/app/scripts/thumbnails.py" + args: + - "--from-file" + - "/tmp/file_list.json" + - "--target" + - "{{inputs.parameters.target}}" + + - name: get-location + script: + image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:argo-tasks-{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" + command: [node] + source: | + const fs = require('fs'); + const loc = JSON.parse(process.env['ARGO_TEMPLATE']).archiveLocation.s3; + const key = loc.key.replace('{{pod.name}}',''); + fs.writeFileSync('/tmp/location', `s3://${loc.bucket}/${key}`); + outputs: + parameters: + - name: location + valueFrom: + path: "/tmp/location" + + volumes: + - name: ephemeral + emptyDir: {}