Skip to content

Commit

Permalink
bonus checkpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
MattWellie committed Oct 21, 2024
1 parent eae1e36 commit 06bc53c
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 15 deletions.
29 changes: 16 additions & 13 deletions src/talos/RunHailFiltering.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
MAX_PARTITIONS = 10000


def annotate_talos_clinvar(mt: hl.MatrixTable, clinvar: str) -> hl.MatrixTable:
def annotate_clinvarbitration(mt: hl.MatrixTable, clinvar: str) -> hl.MatrixTable:
"""
Don't allow these annotations to be missing
- Talos has been co-developed with ClinvArbitration, a ClinVar re-summary effort
Expand Down Expand Up @@ -852,6 +852,14 @@ def main(
mt = hl.read_matrix_table(mt_path)
get_logger().info(f'Loaded annotated MT from {mt_path}, size: {mt.count_rows()}, partitions: {mt.n_partitions()}')

# lookups for required fields all delegated to the hail_audit file
if not (
fields_audit(mt=mt, base_fields=BASE_FIELDS_REQUIRED, nested_fields=FIELDS_REQUIRED)
and vep_audit(mt=mt, expected_fields=VEP_TX_FIELDS_REQUIRED)
):
mt.describe()
raise KeyError('Fields were missing from the input Matrix')

# repartition if required - local Hail with finite resources has struggled with some really high (~120k) partitions
# this creates a local duplicate of the input data with far smaller partition counts, for less processing overhead
if mt.n_partitions() > MAX_PARTITIONS:
Expand All @@ -861,14 +869,6 @@ def main(
get_logger().info('Trying to write the result locally, might need more space on disk...')
mt = generate_a_checkpoint(mt, f'{checkpoint}_reparitioned')

# lookups for required fields all delegated to the hail_audit file
if not (
fields_audit(mt=mt, base_fields=BASE_FIELDS_REQUIRED, nested_fields=FIELDS_REQUIRED)
and vep_audit(mt=mt, expected_fields=VEP_TX_FIELDS_REQUIRED)
):
mt.describe()
raise KeyError('Fields were missing from the input Matrix')

# subset to currently considered samples
mt = subselect_mt_to_pedigree(mt, pedigree=pedigree)

Expand All @@ -878,15 +878,15 @@ def main(
# remove any rows which have no genes of interest
mt = remove_variants_outside_gene_roi(mt=mt, green_genes=green_expression)

if checkpoint:
mt = generate_a_checkpoint(mt, f'{checkpoint}_green_genes')

# swap out the default clinvar annotations with private clinvar
mt = annotate_talos_clinvar(mt=mt, clinvar=clinvar)
mt = annotate_clinvarbitration(mt=mt, clinvar=clinvar)

# remove common-in-gnomad variants (also includes ClinVar annotation)
mt = filter_to_population_rare(mt=mt)

if checkpoint:
mt = generate_a_checkpoint(mt, f'{checkpoint}_data')

# filter out quality failures
mt = filter_on_quality_flags(mt=mt)

Expand All @@ -902,6 +902,9 @@ def main(
# split each gene annotation onto separate rows, filter to green genes (PanelApp ROI)
mt = split_rows_by_gene_and_filter_to_green(mt=mt, green_genes=green_expression)

if checkpoint:
mt = generate_a_checkpoint(mt, f'{checkpoint}_green_and_clean')

# add Labels to the MT
# current logic is to apply 1, 2, 3, and 5, then 4 (de novo)
# for cat. 4, pre-filter the variants by tx-consequential or C5==1
Expand Down
4 changes: 2 additions & 2 deletions test/test_hail_categories.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
annotate_category_3,
annotate_category_5,
annotate_category_6,
annotate_talos_clinvar,
annotate_clinvarbitration,
filter_to_categorised,
filter_to_population_rare,
green_from_panelapp,
Expand Down Expand Up @@ -306,7 +306,7 @@ def test_annotate_talos_clinvar(rating, stars, rows, regular, strong, tmp_path,
table_path = str(tmp_path / 'anno.ht')
table.write(table_path)

returned_table = annotate_talos_clinvar(make_a_mt, clinvar=table_path)
returned_table = annotate_clinvarbitration(make_a_mt, clinvar=table_path)
assert returned_table.count_rows() == rows
assert len([x for x in returned_table.info.clinvar_talos.collect() if x == 1]) == regular
assert len([x for x in returned_table.info.clinvar_talos_strong.collect() if x == 1]) == strong

0 comments on commit 06bc53c

Please sign in to comment.