From 0222e809de21ed0b6778ac4023c8bd5e22b32224 Mon Sep 17 00:00:00 2001 From: Riley Grant Date: Mon, 17 Jun 2024 13:37:37 -0500 Subject: [PATCH 1/3] feat(data-pipelines): add new LoF curation csvs to pipeline input --- .../gnomad_v2/gnomad_v2_lof_curation.py | 2 ++ .../src/data_pipeline/pipelines/genes.py | 2 +- .../gnomad_v2_lof_curation_results.py | 24 +++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/data-pipeline/src/data_pipeline/datasets/gnomad_v2/gnomad_v2_lof_curation.py b/data-pipeline/src/data_pipeline/datasets/gnomad_v2/gnomad_v2_lof_curation.py index e271bf825..c7411d10e 100644 --- a/data-pipeline/src/data_pipeline/datasets/gnomad_v2/gnomad_v2_lof_curation.py +++ b/data-pipeline/src/data_pipeline/datasets/gnomad_v2/gnomad_v2_lof_curation.py @@ -22,6 +22,8 @@ "conflicting_evidence": "Uncertain", "insufficient_evidence": "Uncertain", "uncertain": "Uncertain", + "uncertain_lof": "Uncertain", + "likey_lof": "Likely LoF", "likely_lof": "Likely LoF", "likely_not_lof": "Likely not LoF", "lof": "LoF", diff --git a/data-pipeline/src/data_pipeline/pipelines/genes.py b/data-pipeline/src/data_pipeline/pipelines/genes.py index 8b77dae19..58d9c79fb 100644 --- a/data-pipeline/src/data_pipeline/pipelines/genes.py +++ b/data-pipeline/src/data_pipeline/pipelines/genes.py @@ -220,7 +220,7 @@ "prepare_gnomad_v4_constraint", prepare_gnomad_v4_constraint, f"/{constraint_subdir}/gnomad_v4_constraint.ht", - {"path": "gs://gcp-public-data--gnomad/release/v4.1/constraint/gnomad.v4.1.constraint_metrics.ht"}, + {"path": "gs://gcp-public-data--gnomad/release/4.1/constraint/gnomad.v4.1.constraint_metrics.ht"}, ) pipeline.add_task( diff --git a/data-pipeline/src/data_pipeline/pipelines/gnomad_v2_lof_curation_results.py b/data-pipeline/src/data_pipeline/pipelines/gnomad_v2_lof_curation_results.py index 7264a3a91..e001ae4cb 100644 --- a/data-pipeline/src/data_pipeline/pipelines/gnomad_v2_lof_curation_results.py +++ b/data-pipeline/src/data_pipeline/pipelines/gnomad_v2_lof_curation_results.py @@ -16,6 +16,30 @@ # If a result for a variant/gene pair is present in more than one file, # the result in the first file in this list takes precedence. "curation_result_paths": [ + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/ABCC8_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/ADA2_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/CBS_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/CFTR_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/EYS_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/GAMT_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/GBE1_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/HADH_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/HPS1_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/HPS3_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/HPS4_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/MFSD8_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/PCDH15_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/PLA2G6_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/POLR3A_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/POLR3B_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/POMGNT1_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/SEPN1_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/SLC13A5_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/SMARCB1_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/SMC3_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/TANGO2_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/WWOX_final_results_gnomAD.csv", + "gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v2/gnomAD_chets_final_results_gnomAD.csv", "gs://gcp-public-data--gnomad/truth-sets/source/lof-curation/NSD1_curation_results.csv", "gs://gcp-public-data--gnomad/truth-sets/source/lof-curation/gnomAD_addendum_curation_results.csv", "gs://gcp-public-data--gnomad/truth-sets/source/lof-curation/metabolic_conditions_genes_curation_results.csv", From d70a4d8c15ddde83d35b217f98c749b1dbb56042 Mon Sep 17 00:00:00 2001 From: Riley Grant Date: Mon, 17 Jun 2024 14:52:56 -0500 Subject: [PATCH 2/3] refactor(graphql-api): reuse index definition in lof curation queries --- graphql-api/src/queries/lof-curation-result-queries.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/graphql-api/src/queries/lof-curation-result-queries.ts b/graphql-api/src/queries/lof-curation-result-queries.ts index e638e82c7..1e8981d01 100644 --- a/graphql-api/src/queries/lof-curation-result-queries.ts +++ b/graphql-api/src/queries/lof-curation-result-queries.ts @@ -1,10 +1,12 @@ +const GNOMAD_V2_LOF_CURATION_RESULTS_INDEX = 'gnomad_v2_lof_curation_results' + // ================================================================================================ // Variant query // ================================================================================================ export const fetchLofCurationResultsByVariant = async (esClient: any, variantId: any) => { const response = await esClient.search({ - index: 'gnomad_v2_lof_curation_results', + index: GNOMAD_V2_LOF_CURATION_RESULTS_INDEX, type: '_doc', body: { query: { @@ -29,7 +31,7 @@ export const fetchLofCurationResultsByVariant = async (esClient: any, variantId: export const fetchLofCurationResultsByGene = async (esClient: any, gene: any) => { const response = await esClient.search({ - index: 'gnomad_v2_lof_curation_results', + index: GNOMAD_V2_LOF_CURATION_RESULTS_INDEX, type: '_doc', size: 1000, body: { @@ -54,7 +56,7 @@ export const fetchLofCurationResultsByGene = async (esClient: any, gene: any) => export const fetchLofCurationResultsByRegion = async (esClient: any, region: any) => { const response = await esClient.search({ - index: 'gnomad_v2_lof_curation_results', + index: GNOMAD_V2_LOF_CURATION_RESULTS_INDEX, type: '_doc', size: 1000, body: { From 115203a5ae7e79ea46db6dd98d9231ff7c6e7c1d Mon Sep 17 00:00:00 2001 From: Riley Grant Date: Fri, 21 Jun 2024 11:33:11 -0500 Subject: [PATCH 3/3] chore(browser): remove note about name and logo from policies --- browser/about/policies/policies.md | 2 +- browser/src/__snapshots__/PoliciesPage.spec.tsx.snap | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/browser/about/policies/policies.md b/browser/about/policies/policies.md index a39550910..9a14bbfd1 100644 --- a/browser/about/policies/policies.md +++ b/browser/about/policies/policies.md @@ -8,7 +8,7 @@ While we hope gnomAD exists for decades to come, we recognize the importance of ## gnomAD Open Science Policy -The gnomAD team has a firm commitment to open science. This includes, but is not limited to, making our data and code open-source, posting pre-prints, and prioritizing publishing in journals that support open access. However, we ask that you refrain from using the name "gnomAD"/"Genome Aggregation Database" or the gnomAD logo without permission. +The gnomAD team has a firm commitment to open science. This includes, but is not limited to, making our data and code open-source, posting pre-prints, and prioritizing publishing in journals that support open access. ## Data Generation diff --git a/browser/src/__snapshots__/PoliciesPage.spec.tsx.snap b/browser/src/__snapshots__/PoliciesPage.spec.tsx.snap index 8a5a32d68..0fcdb3ca1 100644 --- a/browser/src/__snapshots__/PoliciesPage.spec.tsx.snap +++ b/browser/src/__snapshots__/PoliciesPage.spec.tsx.snap @@ -235,7 +235,7 @@ While we hope gnomAD exists for decades to come, we recognize the importance of ## gnomAD Open Science Policy -The gnomAD team has a firm commitment to open science. This includes, but is not limited to, making our data and code open-source, posting pre-prints, and prioritizing publishing in journals that support open access. However, we ask that you refrain from using the name "gnomAD"/"Genome Aggregation Database" or the gnomAD logo without permission. +The gnomAD team has a firm commitment to open science. This includes, but is not limited to, making our data and code open-source, posting pre-prints, and prioritizing publishing in journals that support open access. ## Data Generation