Skip to content

Commit

Permalink
Fix loading of local ancestry with one top-level ancestry group
Browse files Browse the repository at this point in the history
  • Loading branch information
phildarnowsky-broad committed Oct 15, 2024
1 parent 6625333 commit f656414
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

from data_pipeline.data_types.variant import variant_id

data_item = hl.tstruct(id=hl.tstr, ac=hl.tint32, an=hl.tint32)
data_array = hl.tarray(data_item)


def prepare_local_ancestry(sources):
result = None
Expand All @@ -15,24 +18,29 @@ def prepare_local_ancestry(sources):

ds = ds.select(
variant_id=variant_id(ds.locus, ds.alleles),
populations=hl.struct(
genome=hl.array(
[
hl.struct(
id=f"{ancestry_group_id}_{key}", ac=ds.info[f"AC_{vcf_key}"], an=ds.info[f"AN_{vcf_key}"]
)
for key, vcf_key in local_ancestry_group_keys
]
),
genome=hl.array(
[
hl.struct(id=f"{ancestry_group_id}_{key}", ac=ds.info[f"AC_{vcf_key}"], an=ds.info[f"AN_{vcf_key}"])
for key, vcf_key in local_ancestry_group_keys
]
),
)
ds = ds.key_by(ds.variant_id)
ds = ds.select(ds.genome, ds.locus, ds.alleles)

if result is None:
result = ds
else:
result = result.join(ds, how="outer")
result = result.transmute(
genome=hl.or_else(result.genome, hl.literal([], dtype=data_array)),
genome_1=hl.or_else(result.genome_1, hl.literal([], dtype=data_array)),
locus=hl.or_else(result.locus, result.locus_1),
alleles=hl.or_else(result.alleles, result.alleles_1),
)
result = result.transmute(
populations=hl.struct(genome=result.populations.genome.extend(ds[result.variant_id].populations.genome))
genome=result.genome.extend(result.genome_1),
)

return result
shaped_result = result.transmute(populations=hl.struct(genome=result.genome))
return shaped_result
2 changes: 1 addition & 1 deletion graphql-api/src/queries/local-ancestry-queries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { DATASET_LABELS } from '../datasets'
import { UserVisibleError } from '../errors'

const LOCAL_ANCESTRY_INDICES = {
gnomad_r3: 'gnomad_v3_local_ancestry-2024-10-04--18-38',
gnomad_r3: 'gnomad_v3_local_ancestry-2024-10-11--20-51',
}

export const fetchLocalAncestryPopulationsByVariant = async (
Expand Down

0 comments on commit f656414

Please sign in to comment.