Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Azure-compatibility #610

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 25 additions & 7 deletions .github/workflows/deploy_server.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ jobs:
DOCKER_BUILDKIT: 1
BUILDKIT_PROGRESS: plain
CLOUDSDK_CORE_DISABLE_PROMPTS: 1
DRIVER_IMAGE: australia-southeast1-docker.pkg.dev/analysis-runner/images/driver
SERVER_IMAGE: australia-southeast1-docker.pkg.dev/analysis-runner/images/server
AZURE_CONTAINER_REGISTRY: cpgcommonimages.azurecr.io
GCP_CONTAINER_REGSITRY: australia-southeast1-docker.pkg.dev
DRIVER_IMAGE: analysis-runner/images/driver
SERVER_IMAGE: $GCP_CONTAINER_REGSITRY/analysis-runner/images/server

steps:
- name: "checkout analysis-runner repo"
Expand Down Expand Up @@ -48,21 +50,37 @@ jobs:
run: |
gcloud auth configure-docker marketplace.gcr.io,australia-southeast1-docker.pkg.dev

- name: "azure setup"
uses: azure/login@v1
with:
creds: $${{ secrets.AZURE_CREDENTIALS }}

- name: "azure docker auth"
uses: azure/docker-login@v1
with:
login-server: $AZURE_CONTAINER_REGISTRY
username: ${{ secrets.AZURE_REGISTRY_USERNAME }}
password: ${{ secrets.AZURE_REGISTRY_PASSWORD }}

- name: "build driver image"
run: |
docker build -f driver/Dockerfile.hail --build-arg HAIL_SHA=$HAIL_SHA --tag $DRIVER_IMAGE:$IMAGE_TAG driver

- name: "push driver image"
- name: "tag and push gcp image [driver]"
run: |
docker image tag $DRIVER_IMAGE:$IMAGE_TAG $GCP_CONTAINER_REGISTRY/$DRIVER_IMAGE:latest &&
docker push $GCP_CONTAINER_REGISTRY/$DRIVER_IMAGE:latest

- name: "tag and push azure image [driver]"
run: |
docker push $DRIVER_IMAGE:$IMAGE_TAG
docker tag $DRIVER_IMAGE:$IMAGE_TAG $DRIVER_IMAGE:latest
docker push $DRIVER_IMAGE:latest
docker image tag $DRIVER_IMAGE:$IMAGE_TAG $AZURE_CONTAINER_REGISTRY/$DRIVER_IMAGE:latest &&
$AZURE_CONTAINER_REGISTRY/$DRIVER_IMAGE:latest

- name: "build server image"
run: |
docker build --build-arg DRIVER_IMAGE=$DRIVER_IMAGE:$IMAGE_TAG --tag $SERVER_IMAGE:$IMAGE_TAG server

- name: "push server image"
- name: "push server image to gcp"
run: |
docker push $SERVER_IMAGE:$IMAGE_TAG
docker tag $SERVER_IMAGE:$IMAGE_TAG $SERVER_IMAGE:latest
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -131,3 +131,4 @@ dmypy.json
.idea/
.DS_Store
.vscode/
.*.json
2 changes: 1 addition & 1 deletion analysis_runner/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,4 @@ def main_from_args(args=None):


if __name__ == '__main__':
main_from_args()
main_from_args(args=sys.argv[1:])
17 changes: 15 additions & 2 deletions analysis_runner/cli_analysisrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import requests
from cpg_utils.config import read_configs
from cpg_utils.cloud import get_google_identity_token
from cpg_utils.cloud import get_google_identity_token, get_azure_identity_token
from analysis_runner.constants import get_server_endpoint
from analysis_runner.git import (
get_git_default_remote,
Expand All @@ -25,6 +25,8 @@
logger,
)

SUPPORTED_CLOUD_ENVIRONMENTS = {'gcp', 'azure'}
DEFAULT_CLOUD_ENVIRONMENT = 'gcp'

def add_analysis_runner_args(parser=None) -> argparse.ArgumentParser:
"""
Expand All @@ -35,6 +37,15 @@ def add_analysis_runner_args(parser=None) -> argparse.ArgumentParser:

add_general_args(parser)

parser.add_argument(
'-c',
'--cloud',
required=False,
default=DEFAULT_CLOUD_ENVIRONMENT,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we omit instead of provide a default? Which lets the analysis-runner decide the default

choices=SUPPORTED_CLOUD_ENVIRONMENTS,
help=f'Backend cloud environment to use. Supported options are ({", ".join(SUPPORTED_CLOUD_ENVIRONMENTS)})',
)

parser.add_argument(
'--image',
help=(
Expand Down Expand Up @@ -104,6 +115,7 @@ def run_analysis_runner( # pylint: disable=too-many-arguments
commit=None,
repository=None,
cwd=None,
cloud=DEFAULT_CLOUD_ENVIRONMENT,
image=None,
cpu=None,
memory=None,
Expand Down Expand Up @@ -216,6 +228,7 @@ def run_analysis_runner( # pylint: disable=too-many-arguments
'script': _script,
'description': description,
'cwd': _cwd,
'cloud': cloud,
'image': image,
'cpu': cpu,
'memory': memory,
Expand All @@ -224,7 +237,7 @@ def run_analysis_runner( # pylint: disable=too-many-arguments
'config': _config,
},
headers={'Authorization': f'Bearer {_token}'},
timeout=60,
# timeout=60,
)
try:
response.raise_for_status()
Expand Down
9 changes: 8 additions & 1 deletion analysis_runner/constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""Constants for analysis-runner"""

import os
import distutils

SERVER_ENDPOINT = 'https://server-a2pko7ameq-ts.a.run.app'
SERVER_TEST_ENDPOINT = 'https://server-test-a2pko7ameq-ts.a.run.app'
ANALYSIS_RUNNER_PROJECT_ID = 'analysis-runner'
Expand All @@ -11,13 +14,17 @@
'gcloud -q auth activate-service-account --key-file=/gsa-key/key.json'
)

USE_LOCAL_SERVER = distutils.util.strtobool(os.getenv('ANALYSIS_RUNNER_LOCAL', 'False'))


def get_server_endpoint(is_test: bool = False):
"""
Get the server endpoint {production / test}
Do it in a function so it's easy to fix if the logic changes
"""
if is_test:
if USE_LOCAL_SERVER:
return 'http://localhost:8080'
elif is_test:
return SERVER_TEST_ENDPOINT

return SERVER_ENDPOINT
43 changes: 43 additions & 0 deletions examples/batch/hail_batch_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env python3

"""
Simple script to test whether the CPG infrastructure and permissions are
configured appropriately to permit running AIP.
"""

import click

from cpg_utils.config import get_config
from cpg_utils.hail_batch import remote_tmpdir
import hailtop.batch as hb


@click.command()
def main():
"""
main
"""

service_backend = hb.ServiceBackend(
billing_project=get_config()['hail']['billing_project'],
remote_tmpdir=remote_tmpdir(),
)
batch = hb.Batch(
name='Test CPG Infra',
backend=service_backend,
cancel_after_n_failures=1,
default_timeout=6000,
default_memory='highmem',
)

j = batch.new_job(name='Write the file')
j.command(f'echo "Hello World." > {j.ofile}')

k = batch.new_job(name='Read the file')
k.command(f'cat {j.ofile}')

batch.run(wait=False)


if __name__ == '__main__':
main() # pylint: disable=E1120
19 changes: 19 additions & 0 deletions examples/batch/hail_batch_job.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[buckets]
web_suffix = "web"
tmp_suffix = "tmp"
analysis_suffix = "analysis"

[workflow]
dataset = "thousand-genomes"
access_level = "test"
dataset_path = "cpgthousandgenomes/test"
output_prefix = "output"
path_scheme = "az"
image_registry_prefix = "cpgcommonimages.azurecr.io"

[hail]
billing_project = "fewgenomes"
bucket = "az://cpgthousandgenomes/test"

[images]
hail = "hailgenetics/hail:0.2.93"
12 changes: 12 additions & 0 deletions examples/batch/run_analysis.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

analysis-runner \
--dataset thousand-genomes \
--description 'Test script for batch on Azure' \
--output-dir test \
--cloud azure \
--access-level test \
--config examples/batch/hail_batch_job.toml \
--image cpg_workflows:latest \
examples/batch/test_cpg_infra.py \
test
4 changes: 3 additions & 1 deletion server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ EXPOSE $PORT
COPY main.py cromwell.py util.py ./

# Prepare the Hail deploy config to point to the CPG domain.
COPY deploy-config.json /deploy-config/deploy-config.json
ENV HAIL_DEPLOY_CONFIG_FILE /deploy-config/deploy-config-gcp.json
COPY deploy-config-gcp.json /deploy-config/deploy-config-gcp.json
COPY deploy-config-azure.json /deploy-config/deploy-config-azure.json

CMD gunicorn --bind :$PORT --worker-class aiohttp.GunicornWebWorker main:init_func
10 changes: 5 additions & 5 deletions server/cromwell.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Exports 'add_cromwell_routes', to add the following route to a flask API:
POST /cromwell: Posts a workflow to a cromwell_url
"""
import os
import json
from datetime import datetime

Expand Down Expand Up @@ -100,11 +101,6 @@ async def cromwell(request): # pylint: disable=too-many-locals
input_jsons = params.get('input_json_paths') or []
input_dict = params.get('inputs_dict')

if access_level == 'test':
workflow_output_dir = f'gs://cpg-{dataset}-test/{output_dir}'
else:
workflow_output_dir = f'gs://cpg-{dataset}-main/{output_dir}'

timestamp = datetime.now().astimezone().isoformat()

# Prepare the job's configuration and write it to a blob.
Expand All @@ -121,6 +117,10 @@ async def cromwell(request): # pylint: disable=too-many-locals
config_path = write_config(config, cloud_environment)

# This metadata dictionary gets stored at the output_dir location.
workflow_output_dir = os.path.join(
config.get('storage', {}).get('default', {}).get('default'),
output_dir
)
metadata = get_analysis_runner_metadata(
timestamp=timestamp,
dataset=dataset,
Expand Down
5 changes: 5 additions & 0 deletions server/deploy-config-azure.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"location": "external",
"default_namespace": "default",
"domain": "azhail.populationgenomics.org.au"
}
5 changes: 5 additions & 0 deletions server/deploy-config-gcp.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"location": "external",
"default_namespace": "default",
"domain": "hail.populationgenomics.org.au"
}
1 change: 0 additions & 1 deletion server/deploy-config.json

This file was deleted.

Loading