Skip to content

Commit

Permalink
Use unmasked ref fasta if specify 'hgdp' in dataset param (#224)
Browse files Browse the repository at this point in the history
* try unmasked fasta for hgdp

* Update str_iterative_eh_runner.py

* cheeck if 'hgdp' in

* Update get_cis_numpy_files.py

* remove \n from 'sex' variable
  • Loading branch information
hopedisastro authored Jun 4, 2024
1 parent d44310f commit 21781ee
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
2 changes: 1 addition & 1 deletion str/associatr/get_cis_numpy_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def extract_genotypes(vcf_file, loci):
for record in vcf_reader(f'{chrom}:{pos}-{pos}'):
if record.CHROM == chrom and record.POS == pos:
gt = record.gt_types
gt[gt == 3] = 2 #HOM ALT is coded as 3; change it to 2
gt[gt == 3] = 2 # HOM ALT is coded as 3; change it to 2
results[locus] = gt
break

Expand Down
6 changes: 5 additions & 1 deletion str/runners/str_iterative_eh_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,10 @@ def main(
b = get_batch()

# Reference fasta
ref_fasta = str(reference_path('broad/ref_fasta'))
if 'hgdp' in dataset:
ref_fasta = 'gs://cpg-common-main/references/hg38/v0/Homo_sapiens_assembly38.fasta'
else:
ref_fasta = str(reference_path('broad/ref_fasta'))
ref = b.read_input_group(
**dict(
base=ref_fasta,
Expand All @@ -112,6 +115,7 @@ def main(
split_line = line.split(',')
cpg_id = split_line[0]
sex = split_line[2]
sex = sex.replace('\n', '')
if cpg_id == 's': # header line
continue
if sex == 'XY':
Expand Down

0 comments on commit 21781ee

Please sign in to comment.