diff --git a/data-pipeline/src/data_pipeline/datasets/gnomad_v3/gnomad_v3_local_ancestry.py b/data-pipeline/src/data_pipeline/datasets/gnomad_v3/gnomad_v3_local_ancestry.py index 451f1c4b9..b94e80bd5 100644 --- a/data-pipeline/src/data_pipeline/datasets/gnomad_v3/gnomad_v3_local_ancestry.py +++ b/data-pipeline/src/data_pipeline/datasets/gnomad_v3/gnomad_v3_local_ancestry.py @@ -2,6 +2,9 @@ from data_pipeline.data_types.variant import variant_id +data_item = hl.tstruct(id=hl.tstr, ac=hl.tint32, an=hl.tint32) +data_array = hl.tarray(data_item) + def prepare_local_ancestry(sources): result = None @@ -15,24 +18,29 @@ def prepare_local_ancestry(sources): ds = ds.select( variant_id=variant_id(ds.locus, ds.alleles), - populations=hl.struct( - genome=hl.array( - [ - hl.struct( - id=f"{ancestry_group_id}_{key}", ac=ds.info[f"AC_{vcf_key}"], an=ds.info[f"AN_{vcf_key}"] - ) - for key, vcf_key in local_ancestry_group_keys - ] - ), + genome=hl.array( + [ + hl.struct(id=f"{ancestry_group_id}_{key}", ac=ds.info[f"AC_{vcf_key}"], an=ds.info[f"AN_{vcf_key}"]) + for key, vcf_key in local_ancestry_group_keys + ] ), ) ds = ds.key_by(ds.variant_id) + ds = ds.select(ds.genome, ds.locus, ds.alleles) if result is None: result = ds else: + result = result.join(ds, how="outer") + result = result.transmute( + genome=hl.or_else(result.genome, hl.literal([], dtype=data_array)), + genome_1=hl.or_else(result.genome_1, hl.literal([], dtype=data_array)), + locus=hl.or_else(result.locus, result.locus_1), + alleles=hl.or_else(result.alleles, result.alleles_1), + ) result = result.transmute( - populations=hl.struct(genome=result.populations.genome.extend(ds[result.variant_id].populations.genome)) + genome=result.genome.extend(result.genome_1), ) - return result + shaped_result = result.transmute(populations=hl.struct(genome=result.genome)) + return shaped_result diff --git a/graphql-api/src/queries/local-ancestry-queries.ts b/graphql-api/src/queries/local-ancestry-queries.ts index f8bb1a76b..e9bf3ff54 100644 --- a/graphql-api/src/queries/local-ancestry-queries.ts +++ b/graphql-api/src/queries/local-ancestry-queries.ts @@ -2,7 +2,7 @@ import { DATASET_LABELS } from '../datasets' import { UserVisibleError } from '../errors' const LOCAL_ANCESTRY_INDICES = { - gnomad_r3: 'gnomad_v3_local_ancestry-2024-10-04--18-38', + gnomad_r3: 'gnomad_v3_local_ancestry-2024-10-11--20-51', } export const fetchLocalAncestryPopulationsByVariant = async (