Skip to content

Commit

Permalink
Merge pull request #45 from msk-access/hotfix-sexmismatch-nan-noregions
Browse files Browse the repository at this point in the history
Hotfix sexmismatch nan noregions
  • Loading branch information
murphycj2 authored Jun 8, 2021
2 parents 5820383 + f75abc6 commit f9bb131
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 11 deletions.
2 changes: 1 addition & 1 deletion biometrics/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.2.11
0.2.12
8 changes: 4 additions & 4 deletions biometrics/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def _parse_bed_file(self):
# only keep Y chrom regions
self.regions = self.regions[self.regions[0].isin(['Y', 'chrY'])]
if len(self.regions) == 0:
print('There are not Y chromosome regions!')
print('There are no Y chromosome regions. Cannot determine if there is a sex mismatch.')

self.regions.columns = range(self.regions.shape[1])

Expand Down Expand Up @@ -87,9 +87,9 @@ def _extract_regions(self, sample):
'end': end,
'count': count})

region_counts = pd.DataFrame(region_counts)

sample.region_counts = region_counts
if len(region_counts) > 0:
region_counts = pd.DataFrame(region_counts)
sample.region_counts = region_counts

return sample

Expand Down
4 changes: 2 additions & 2 deletions biometrics/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ def __init__(self, sample_name=None, sample_bam=None, sample_group=None,

if self.sample_name is not None:
if db is not None:
self.extraction_file = os.path.join(db, self.sample_name + '.pk')
self.extraction_file = os.path.join(db, self.sample_name + '.pickle')
else:
self.extraction_file = self.sample_name + '.pk'
self.extraction_file = self.sample_name + '.pickle'

def save_to_file(self):

Expand Down
69 changes: 65 additions & 4 deletions tests/test_biometrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from unittest import TestCase
from unittest import mock

import pandas as pd
from biometrics.biometrics import get_samples, run_minor_contamination, run_major_contamination
from biometrics.cli import get_args
from biometrics.extract import Extract
Expand Down Expand Up @@ -93,6 +94,10 @@ def test_extract_sample(self):
self.assertIsNotNone(samples['test_sample1'].pileup, msg='Sample pileup was not loaded correctly.')
self.assertEqual(samples['test_sample1'].pileup.shape[0], 15, msg='Did not find pileup for 4 variants. Found: {}.'.format(samples['test_sample1'].pileup))

self.assertIsNotNone(
samples['test_sample1'].region_counts,
msg='Sample bed file was not loaded correctly.')


class TestLoadData(TestCase):
"""Tests load data by sample name in `biometrics` package."""
Expand All @@ -110,7 +115,7 @@ class TestLoadData(TestCase):
database=os.path.join(CUR_DIR, 'test_data/'),
vcf=None,
fafile=None,
bed=None,
bed=os.path.join(CUR_DIR, 'test_data/test.bed'),
min_mapping_quality=None,
min_base_quality=None,
min_coverage=None,
Expand Down Expand Up @@ -149,8 +154,8 @@ class TestLoadDataPickle(TestCase):
return_value=argparse.Namespace(
subparser_name='extract',
input=[
os.path.join(CUR_DIR, 'test_data/test_sample1.pk'),
os.path.join(CUR_DIR, 'test_data/test_sample2.pk')],
os.path.join(CUR_DIR, 'test_data/test_sample1.pickle'),
os.path.join(CUR_DIR, 'test_data/test_sample2.pickle')],
sample_bam=None,
sample_name=None,
sample_type=None,
Expand Down Expand Up @@ -206,7 +211,7 @@ class TestDownstreamTools(TestCase):
database=os.path.join(CUR_DIR, 'test_data/'),
vcf=None,
fafile=None,
bed=None,
bed=os.path.join(CUR_DIR, 'test_data/test.bed'),
min_mapping_quality=None,
min_base_quality=None,
min_coverage=None,
Expand Down Expand Up @@ -297,3 +302,59 @@ def test_sexmismatch(self):

self.assertEqual(set(results['expected_sex']), set(['M']), msg='Expected all samples to have an expected sex of M.')
self.assertEqual(set(results['predicted_sex']), set(['M']), msg='Expected all samples to not have a sex mismatch.')


class TestNASexMismatch(TestCase):
"""Test that sex mismatch returns NA if no Y chrom regions."""

@mock.patch(
'argparse.ArgumentParser.parse_args',
return_value=argparse.Namespace(
subparser_name='extract',
input=None,
sample_bam=[
os.path.join(CUR_DIR, 'test_data/test_sample1_golden.bam'),
os.path.join(CUR_DIR, 'test_data/test_sample2_golden.bam')],
sample_name=['test_sample1', 'test_sample2'],
sample_type=['tumor', 'tumor'],
sample_group=['patient1', 'patient1'],
sample_sex=['M', 'M'],
database=os.path.join(CUR_DIR, 'test_data/'),
vcf=os.path.join(CUR_DIR, 'test_data/test.vcf'),
fafile=os.path.join(CUR_DIR, 'test_data/ref.fasta'),
bed=os.path.join(CUR_DIR, 'test_data/test-noY.bed'),
min_mapping_quality=1,
min_base_quality=1,
min_coverage=10,
minor_threshold=0.002,
major_threshold=0.6,
discordance_threshold=0.05,
coverage_threshold=50,
min_homozygous_thresh=0.1,
zmin=None,
zmax=None,
outdir='.',
json=None,
plot=False,
default_genotype=None,
overwrite=True,
no_db_compare=False,
prefix='test',
version=False,
threads=1))
def setUp(self, mock_args):
"""Set up test fixtures, if any."""

self.args = get_args()

def test_sexmismatch_noY(self):

extractor = Extract(self.args)
samples = get_samples(self.args, extraction_mode=True)
samples = extractor.extract(samples)

sex_mismatch = SexMismatch(self.args.coverage_threshold)
results = sex_mismatch.detect_mismatch(samples)

self.assertTrue(
pd.isna(results.at[0, 'predicted_sex']), msg='Predicted sample sex should have been nan.')
1 change: 1 addition & 0 deletions tests/test_data/test-noY.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
X 1 200

0 comments on commit f9bb131

Please sign in to comment.