diff --git a/data-pipeline/src/data_pipeline/data_types/gene.py b/data-pipeline/src/data_pipeline/data_types/gene.py index 393d043b8..958484be7 100644 --- a/data-pipeline/src/data_pipeline/data_types/gene.py +++ b/data-pipeline/src/data_pipeline/data_types/gene.py @@ -111,6 +111,12 @@ def collect_gene_exons(gene_exons): return exons +def reject_par_y_genes(genes_path=None): + genes = hl.read_table(genes_path) + genes = genes.filter(genes.gene_version.endswith("_PAR_Y") == hl.literal(False)) + return genes + + ############################################### # Transcripts # ############################################### diff --git a/data-pipeline/src/data_pipeline/pipelines/genes.py b/data-pipeline/src/data_pipeline/pipelines/genes.py index 54d9f894e..2ceddad60 100644 --- a/data-pipeline/src/data_pipeline/pipelines/genes.py +++ b/data-pipeline/src/data_pipeline/pipelines/genes.py @@ -24,6 +24,7 @@ prepare_heterozygous_variant_cooccurrence_counts, prepare_homozygous_variant_cooccurrence_counts, ) +from data_pipeline.data_types.gene import reject_par_y_genes pipeline = Pipeline() @@ -317,6 +318,15 @@ def annotate_with_preferred_transcript(table_path): }, ) +pipeline.add_task( + "annotate_grch38_genes_step_5", + reject_par_y_genes, + "/genes/genes_grch38_annotated_5.ht", + { + "genes_path": pipeline.get_task("annotate_grch38_genes_step_4"), + }, +) + ############################################### # Extract transcripts ############################################### @@ -357,7 +367,7 @@ def annotate_with_preferred_transcript(table_path): pipeline.set_outputs( { "genes_grch37": "annotate_grch37_genes_step_5", - "genes_grch38": "annotate_grch38_genes_step_4", + "genes_grch38": "annotate_grch38_genes_step_5", "base_transcripts_grch37": "extract_grch37_transcripts", "base_transcripts_grch38": "extract_grch38_transcripts", "transcripts_grch37": "annotate_grch37_transcripts",