From 9df7c88803232b7b2a710a9d0d0d41f8e32ff5a6 Mon Sep 17 00:00:00 2001 From: EddieLF Date: Thu, 19 Sep 2024 10:17:04 +1000 Subject: [PATCH 1/4] Add readgroup level stats config option for picard CollectMultipleMetrics job --- configs/defaults/seqr_loader.toml | 1 + cpg_workflows/jobs/picard.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/configs/defaults/seqr_loader.toml b/configs/defaults/seqr_loader.toml index 8a268ed71..d3452b5c0 100644 --- a/configs/defaults/seqr_loader.toml +++ b/configs/defaults/seqr_loader.toml @@ -67,6 +67,7 @@ indel_filter_level = 99.0 [cramqc] assume_sorted = true num_pcs = 4 +readgroup_metrics = false [qc_thresholds.genome.min] "MEDIAN_COVERAGE" = 10 diff --git a/cpg_workflows/jobs/picard.py b/cpg_workflows/jobs/picard.py index 4feedbc6c..c3c659185 100644 --- a/cpg_workflows/jobs/picard.py +++ b/cpg_workflows/jobs/picard.py @@ -6,7 +6,7 @@ from hailtop.batch.job import Job from cpg_utils import Path -from cpg_utils.config import get_config, image_path, reference_path +from cpg_utils.config import config_retrieve, get_config, image_path, reference_path from cpg_utils.hail_batch import command, fasta_res_group from cpg_workflows.filetypes import CramPath from cpg_workflows.resources import ( @@ -297,6 +297,13 @@ def picard_collect_metrics( sorted_output = get_config()['cramqc']['assume_sorted'] assert cram_path.index_path + + if config_retrieve(['workflow', 'cramqc', 'readgroup_metrics'], default=False): + readgroup_metrics = """ \\ + METRIC_ACCUMULATION_LEVEL=READ_GROUP """ + else: + readgroup_metrics = "" + cmd = f"""\ CRAM=$BATCH_TMPDIR/{cram_path.path.name} CRAI=$BATCH_TMPDIR/{cram_path.index_path.name} @@ -319,7 +326,7 @@ def picard_collect_metrics( PROGRAM=CollectBaseDistributionByCycle \\ PROGRAM=CollectQualityYieldMetrics \\ METRIC_ACCUMULATION_LEVEL=null \\ - METRIC_ACCUMULATION_LEVEL=SAMPLE + METRIC_ACCUMULATION_LEVEL=SAMPLE {readgroup_metrics} ls $BATCH_TMPDIR/ cp $BATCH_TMPDIR/prefix.alignment_summary_metrics {j.out_alignment_summary_metrics} From cfd1fbd059361b197a819845f8b07ffe0a16452f Mon Sep 17 00:00:00 2001 From: EddieLF Date: Thu, 19 Sep 2024 10:23:23 +1000 Subject: [PATCH 2/4] Fix config retrieve --- cpg_workflows/jobs/picard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpg_workflows/jobs/picard.py b/cpg_workflows/jobs/picard.py index c3c659185..e095386d1 100644 --- a/cpg_workflows/jobs/picard.py +++ b/cpg_workflows/jobs/picard.py @@ -298,7 +298,7 @@ def picard_collect_metrics( assert cram_path.index_path - if config_retrieve(['workflow', 'cramqc', 'readgroup_metrics'], default=False): + if config_retrieve(['cramqc', 'readgroup_metrics'], default=False): readgroup_metrics = """ \\ METRIC_ACCUMULATION_LEVEL=READ_GROUP """ else: From dfe5b5058ac0f0cc7c301243f2e7cbc2f7c02288 Mon Sep 17 00:00:00 2001 From: EddieLF Date: Thu, 19 Sep 2024 13:55:48 +1000 Subject: [PATCH 3/4] =?UTF-8?q?Bump=20version:=201.27.3=20=E2=86=92=201.27?= =?UTF-8?q?.4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- .github/workflows/docker.yaml | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 4f50c5fa6..a7af89e0b 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.27.3 +current_version = 1.27.4 commit = True tag = False diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index c20c19a54..f95790283 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -15,7 +15,7 @@ permissions: contents: read env: - VERSION: 1.27.3 + VERSION: 1.27.4 jobs: docker: diff --git a/setup.py b/setup.py index 158b23a5c..80a6294c3 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='cpg-workflows', # This tag is automatically updated by bumpversion - version='1.27.3', + version='1.27.4', description='CPG workflows for Hail Batch', long_description=open('README.md').read(), long_description_content_type='text/markdown', From de4486bfd50b4d0d058947bafb40fdfeaa3f5b3e Mon Sep 17 00:00:00 2001 From: EddieLF Date: Thu, 19 Sep 2024 14:22:50 +1000 Subject: [PATCH 4/4] Choose to accumulate metrics at sample or rg level, not both --- cpg_workflows/jobs/picard.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cpg_workflows/jobs/picard.py b/cpg_workflows/jobs/picard.py index e095386d1..bfe8ff2f9 100644 --- a/cpg_workflows/jobs/picard.py +++ b/cpg_workflows/jobs/picard.py @@ -299,10 +299,9 @@ def picard_collect_metrics( assert cram_path.index_path if config_retrieve(['cramqc', 'readgroup_metrics'], default=False): - readgroup_metrics = """ \\ - METRIC_ACCUMULATION_LEVEL=READ_GROUP """ + readgroup_metrics = "METRIC_ACCUMULATION_LEVEL=READ_GROUP" else: - readgroup_metrics = "" + readgroup_metrics = "METRIC_ACCUMULATION_LEVEL=SAMPLE" cmd = f"""\ CRAM=$BATCH_TMPDIR/{cram_path.path.name} @@ -326,7 +325,7 @@ def picard_collect_metrics( PROGRAM=CollectBaseDistributionByCycle \\ PROGRAM=CollectQualityYieldMetrics \\ METRIC_ACCUMULATION_LEVEL=null \\ - METRIC_ACCUMULATION_LEVEL=SAMPLE {readgroup_metrics} + {readgroup_metrics} ls $BATCH_TMPDIR/ cp $BATCH_TMPDIR/prefix.alignment_summary_metrics {j.out_alignment_summary_metrics}