diff --git a/data-pipeline/src/data_pipeline/data_types/gene.py b/data-pipeline/src/data_pipeline/data_types/gene.py index 958484be7..97940403a 100644 --- a/data-pipeline/src/data_pipeline/data_types/gene.py +++ b/data-pipeline/src/data_pipeline/data_types/gene.py @@ -226,6 +226,12 @@ def import_hgnc(path): return ds +def prepare_table_for_release(genes_path): + ds = hl.import_table(genes_path) + ds = ds.select_globals() + return ds + + def prepare_genes(gencode_path, hgnc_path, reference_genome): genes = import_gencode(gencode_path, reference_genome) diff --git a/data-pipeline/src/data_pipeline/pipelines/genes.py b/data-pipeline/src/data_pipeline/pipelines/genes.py index 2968c0260..76a0b12f3 100644 --- a/data-pipeline/src/data_pipeline/pipelines/genes.py +++ b/data-pipeline/src/data_pipeline/pipelines/genes.py @@ -4,7 +4,7 @@ from data_pipeline.helpers import annotate_table -from data_pipeline.data_types.gene import prepare_genes +from data_pipeline.data_types.gene import prepare_genes, prepare_table_for_release from data_pipeline.data_types.canonical_transcript import get_canonical_transcripts from data_pipeline.data_types.mane_select_transcript import import_mane_select_transcripts from data_pipeline.data_types.transcript import ( @@ -322,6 +322,15 @@ def annotate_with_preferred_transcript(table_path): }, ) +pipeline.add_task( + "prepare_grch37_genes_table_for_public_release", + prepare_table_for_release, + f"/{genes_subdir}/genes_grch37_public_release.ht", + { + "genes_path": pipeline.get_task("annotate_grch37_genes_step_5"), + }, +) + pipeline.add_task( "annotate_grch38_genes_step_1", annotate_table, @@ -385,6 +394,16 @@ def annotate_with_constraint(genes_path, constraint_path): }, ) + +pipeline.add_task( + "prepare_grch38_genes_table_for_public_release", + prepare_table_for_release, + f"/{genes_subdir}/genes_grch38_public_release.ht", + { + "genes_path": pipeline.get_task("remove_constraint_for_release"), + }, +) + ############################################### # Extract transcripts ###############################################