Skip to content

Commit

Permalink
Merge pull request #343 from 4dn-dcic/vpc
Browse files Browse the repository at this point in the history
Vpc
  • Loading branch information
SooLee authored Aug 2, 2021
2 parents d32a1f3 + 3813ff0 commit 00c2ef5
Show file tree
Hide file tree
Showing 13 changed files with 196 additions and 158 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# virtualenv bin directory
*/bin/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[odc]
Expand Down
22 changes: 10 additions & 12 deletions awsf3-docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ RUN apt update -y && apt upgrade -y && apt install -y \
nodejs

RUN ln -s /usr/bin/python3.8 /usr/bin/python
RUN ln -s /usr/bin/pip3 /usr/bin/pip
#RUN ln -s /usr/bin/pip3 /usr/bin/pip

WORKDIR /usr/local/bin

Expand All @@ -51,23 +51,21 @@ RUN apt-get update -y \
&& apt-get install -y docker-ce

# singularity
RUN wget https://golang.org/dl/go1.15.3.linux-amd64.tar.gz && \
tar -xzf go1.15.3.linux-amd64.tar.gz && \
rm go1.15.3.linux-amd64.tar.gz
RUN export SINGULARITY_VERSION=3.3.0 && \
RUN wget https://golang.org/dl/go1.16.6.linux-amd64.tar.gz && \
tar -xzf go1.16.6.linux-amd64.tar.gz && \
rm go1.16.6.linux-amd64.tar.gz
RUN export SINGULARITY_VERSION=3.8.1 && \
export PATH=/usr/local/bin/go/bin/:$PATH && \
wget https://github.com/sylabs/singularity/releases/download/v${SINGULARITY_VERSION}/singularity-${SINGULARITY_VERSION}.tar.gz && \
tar -xzf singularity-${SINGULARITY_VERSION}.tar.gz && \
rm singularity-${SINGULARITY_VERSION}.tar.gz && \
cd singularity && \
wget https://github.com/sylabs/singularity/releases/download/v${SINGULARITY_VERSION}/singularity-ce-${SINGULARITY_VERSION}.tar.gz && \
tar -xzf singularity-ce-${SINGULARITY_VERSION}.tar.gz && \
rm singularity-ce-${SINGULARITY_VERSION}.tar.gz && \
cd singularity-ce-${SINGULARITY_VERSION} && \
./mconfig && \
make -C ./builddir && \
make -C ./builddir install && \
cd .. && \
rm -rf go && \
mv singularity/singularity singularity2 && \
rm -rf singularity && \
mv singularity2 singularity
rm -rf singularity-ce-${SINGULARITY_VERSION}

# goofys
# RUN curl -O -L http://bit.ly/goofys-latest && chmod +x goofys-latest # latest is not latest
Expand Down
7 changes: 4 additions & 3 deletions awsf3/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def args(self):
'help': "execution metadata file (output json of cwltool / cromwell)"},
{'flag': ["-m", "--md5file"], 'help': "text file storing md5 values for output files"},
{'flag': ["-o", "--output-json"], 'help': "output postrun json file"},
{'flag': ["-L", "--language"], 'help': "language", 'default': "cwl_v1"}],
{'flag': ["-L", "--language"], 'help': "language", 'default': "cwl_v1"},
{'flag': ["-u", "--endpoint-url"], 'help': "s3 vpc endpoint url"}],
'upload_postrun_json':
[{'flag': ["-i", "--input-json"], 'help': "input postrun json file to upload to s3"}],
'update_postrun_json_final':
Expand All @@ -66,8 +67,8 @@ def update_postrun_json_init(input_json, output_json):
utils.update_postrun_json_init(input_json, output_json)


def update_postrun_json_upload_output(input_json, execution_metadata_file, md5file, output_json, language):
utils.update_postrun_json_upload_output(input_json, execution_metadata_file, md5file, output_json, language)
def update_postrun_json_upload_output(input_json, execution_metadata_file, md5file, output_json, language, endpoint_url):
utils.update_postrun_json_upload_output(input_json, execution_metadata_file, md5file, output_json, language, endpoint_url=endpoint_url)


def upload_postrun_json(input_json):
Expand Down
7 changes: 5 additions & 2 deletions awsf3/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,15 @@ def unzip_source(self):
yield {'name': content_file_name, 'content': z.open(content_file_name).read()}
yield None

def upload_to_s3(self, encrypt_s3_upload=False):
def upload_to_s3(self, encrypt_s3_upload=False, endpoint_url=None):
"""upload target to s3, source can be either a file or a directory."""
if not self.is_valid:
raise Exception('Upload Error: source / dest must be specified first')
if not self.s3:
self.s3 = boto3.client('s3')
if endpoint_url:
self.s3 = boto3.client('s3', endpoint_url=endpoint_url)
else:
self.s3 = boto3.client('s3')
err_msg = "failed to upload output file %s to %s. %s"
upload_extra_args = {}
if encrypt_s3_upload:
Expand Down
32 changes: 16 additions & 16 deletions awsf3/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def determine_key_type(bucket, key, profile):
# The file itself may be a prefix of another file (e.v. abc.vcf.gz vs abc.vcf.gz.tbi)
# but it doesn't matter.
else:
return 'File'
return 'File'
else:
# data_file is a folder
return 'Folder'
Expand Down Expand Up @@ -217,7 +217,7 @@ def download_workflow():
return
local_wfdir = os.environ.get('LOCAL_WFDIR')
subprocess.call(['mkdir', '-p', local_wfdir])

if language in ['wdl', 'wdl_v1', 'wdl_draft2']:
main_wf = os.environ.get('MAIN_WDL', '')
wf_files = os.environ.get('WDL_FILES', '')
Expand All @@ -239,10 +239,10 @@ def download_workflow():
wf_files = [wf_files]
wf_files.append(main_wf)
wf_url = wf_url.rstrip('/')

print("main workflow file: %s" % main_wf)
print("workflow files: " + str(wf_files))

s3 = boto3.client('s3')
for wf_file in wf_files:
target = "%s/%s" % (local_wfdir, wf_file)
Expand All @@ -262,7 +262,7 @@ def download_workflow():
targetdir = re.sub('[^/]+$', '', target)
subprocess.call(["mkdir", "-p", targetdir])
s3.download_file(Bucket=bucket_name, Key=key, Filename=target)


def read_md5file(md5file):
with open(md5file, 'r') as md5_f:
Expand Down Expand Up @@ -340,7 +340,7 @@ def update_postrun_json_init(json_old, json_new):


def update_postrun_json_upload_output(json_old, execution_metadata_file, md5file, json_new,
language='cwl_v1', strict=True, upload=True):
language='cwl_v1', strict=True, upload=True, endpoint_url=None):
"""Update postrun json with output files.
if strict is set false, it does not check execution metadata is required for cwl/wdl."""
# read old json file and prepare postrunjson skeleton
Expand All @@ -362,18 +362,18 @@ def update_postrun_json_upload_output(json_old, execution_metadata_file, md5file

# upload output to S3 (this also updates postrun json)
if upload:
upload_output(prj)
upload_output(prj, endpoint_url=endpoint_url)

# write to new json file
write_postrun_json(json_new, prj)


def upload_output(prj):
def upload_output(prj, endpoint_url=None):
# parsing output_target and uploading output files to output target
upload_to_output_target(prj.Job.Output, prj.config.encrypt_s3_upload)
upload_to_output_target(prj.Job.Output, prj.config.encrypt_s3_upload, endpoint_url=endpoint_url)


def upload_to_output_target(prj_out, encrypt_s3_upload=False):
def upload_to_output_target(prj_out, encrypt_s3_upload=False, endpoint_url=None):
# parsing output_target and uploading output files to output target
output_bucket = prj_out.output_bucket_directory
output_argnames = prj_out.output_files.keys()
Expand All @@ -388,7 +388,7 @@ def upload_to_output_target(prj_out, encrypt_s3_upload=False):
target.parse_custom_target(k, output_target[k])
if target.is_valid:
print("Target is valid. Uploading..")
target.upload_to_s3(encrypt_s3_upload=encrypt_s3_upload)
target.upload_to_s3(encrypt_s3_upload=encrypt_s3_upload, endpoint_url=endpoint_url)
else:
raise Exception("Invalid target %s -> %s: failed to upload" % k, output_target[k])
else:
Expand All @@ -397,17 +397,17 @@ def upload_to_output_target(prj_out, encrypt_s3_upload=False):
target.parse_cwl_target(k, output_target.get(k, ''), prj_out.output_files)
if target.is_valid:
print("Target is valid. Uploading..")
target.upload_to_s3(encrypt_s3_upload=encrypt_s3_upload)
target.upload_to_s3(encrypt_s3_upload=encrypt_s3_upload, endpoint_url=endpoint_url)
prj_out.output_files[k].add_target(target.dest)

# upload secondary files
secondary_output_files = prj_out.output_files[k].secondaryFiles
if secondary_output_files:
stlist = SecondaryTargetList(output_bucket)
stlist.parse_target_values(prj_out.secondary_output_target.get(k, []))
stlist.reorder_by_source([sf.path for sf in secondary_output_files])
for st in stlist.secondary_targets:
st.upload_to_s3(encrypt_s3_upload=encrypt_s3_upload)
st.upload_to_s3(encrypt_s3_upload=encrypt_s3_upload, endpoint_url=endpoint_url)
for i, sf in enumerate(secondary_output_files):
sf.add_target(stlist.secondary_targets[i].dest)
else:
Expand All @@ -424,9 +424,9 @@ def update_postrun_json_final(json_old, json_new, logfile=None):
"""Update postrun json with status, time stamps, parsed commands,
input/tmp/output sizes"""
prj = read_postrun_json(json_old)

postrun_json_final(prj, logfile=logfile)

# write to new json file
write_postrun_json(json_new, prj)

Expand Down
22 changes: 18 additions & 4 deletions tibanna/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,12 @@ def args(self):
'action': "store_true",
'help': "Do not delete public access block from buckets" +
"(this way postrunjson and metrics reports will not be public)"},
{'flag': ["-t", "--subnets"],
'nargs': '+',
'help': "subnet IDs"},
{'flag': ["-r", "--security-groups"],
'nargs': '+',
'help': "security groups"},
{'flag': ["-q", "--quiet"],
'action': "store_true",
'help': "minimize standard output from deployment"}],
Expand All @@ -286,6 +292,12 @@ def args(self):
{'flag': ["-g", "--usergroup"],
'default': '',
'help': "Tibanna usergroup for the AWS Lambda function"},
{'flag': ["-t", "--subnets"],
'nargs': '+',
'help': "subnet IDs"},
{'flag': ["-r", "--security-groups"],
'nargs': '+',
'help': "security groups"},
{'flag': ["-q", "--quiet"],
'action': "store_true",
'help': "minimize standard output from deployment"}],
Expand Down Expand Up @@ -368,11 +380,12 @@ def args(self):
}


def deploy_core(name, suffix=None, usergroup='', quiet=False):
def deploy_core(name, suffix=None, usergroup='', quiet=False, subnets=None, security_groups=None):
"""
New method of deploying packaged lambdas (BETA)
"""
API().deploy_core(name=name, suffix=suffix, usergroup=usergroup, quiet=quiet)
API().deploy_core(name=name, suffix=suffix, usergroup=usergroup, subnets=subnets,
security_groups=security_groups, quiet=quiet)


def run_workflow(input_json, sfn=TIBANNA_DEFAULT_STEP_FUNCTION_NAME, jobid='', do_not_open_browser=False, sleep=3):
Expand All @@ -396,11 +409,12 @@ def setup_tibanna_env(buckets='', usergroup_tag='default', no_randomize=False,

def deploy_unicorn(suffix=None, no_setup=False, buckets='',
no_setenv=False, usergroup='', do_not_delete_public_access_block=False,
deploy_costupdater=False, quiet=False):
deploy_costupdater=False, subnets=None, security_groups=None, quiet=False):
"""deploy tibanna unicorn to AWS cloud"""
API().deploy_unicorn(suffix=suffix, no_setup=no_setup, buckets=buckets, no_setenv=no_setenv,
usergroup=usergroup, do_not_delete_public_access_block=do_not_delete_public_access_block,
deploy_costupdater=deploy_costupdater, quiet=quiet)
deploy_costupdater=deploy_costupdater, subnets=subnets, security_groups=security_groups,
quiet=quiet)


def add_user(user, usergroup):
Expand Down
2 changes: 1 addition & 1 deletion tibanna/_version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Version information."""

# The following line *must* be the last in the module, exactly as formatted:
__version__ = "1.3.1"
__version__ = "1.4.1"
Loading

0 comments on commit 00c2ef5

Please sign in to comment.