From 183045a6b6b32e31ef47715c3affa01f2c438039 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 8 Jun 2021 17:10:47 -0400 Subject: [PATCH 1/2] print NA if no y chrom, add unit tests --- biometrics/extract.py | 8 ++--- biometrics/sample.py | 4 +-- tests/test_biometrics.py | 69 +++++++++++++++++++++++++++++++++--- tests/test_data/test-noY.bed | 1 + 4 files changed, 72 insertions(+), 10 deletions(-) create mode 100644 tests/test_data/test-noY.bed diff --git a/biometrics/extract.py b/biometrics/extract.py index 0f87125..1b62d65 100644 --- a/biometrics/extract.py +++ b/biometrics/extract.py @@ -55,7 +55,7 @@ def _parse_bed_file(self): # only keep Y chrom regions self.regions = self.regions[self.regions[0].isin(['Y', 'chrY'])] if len(self.regions) == 0: - print('There are not Y chromosome regions!') + print('There are no Y chromosome regions. Cannot determine if there is a sex mismatch.') self.regions.columns = range(self.regions.shape[1]) @@ -87,9 +87,9 @@ def _extract_regions(self, sample): 'end': end, 'count': count}) - region_counts = pd.DataFrame(region_counts) - - sample.region_counts = region_counts + if len(region_counts) > 0: + region_counts = pd.DataFrame(region_counts) + sample.region_counts = region_counts return sample diff --git a/biometrics/sample.py b/biometrics/sample.py index e633038..2a7fe2b 100644 --- a/biometrics/sample.py +++ b/biometrics/sample.py @@ -29,9 +29,9 @@ def __init__(self, sample_name=None, sample_bam=None, sample_group=None, if self.sample_name is not None: if db is not None: - self.extraction_file = os.path.join(db, self.sample_name + '.pk') + self.extraction_file = os.path.join(db, self.sample_name + '.pickle') else: - self.extraction_file = self.sample_name + '.pk' + self.extraction_file = self.sample_name + '.pickle' def save_to_file(self): diff --git a/tests/test_biometrics.py b/tests/test_biometrics.py index cc4136e..26b5757 100644 --- a/tests/test_biometrics.py +++ b/tests/test_biometrics.py @@ -8,6 +8,7 @@ from unittest import TestCase from unittest import mock +import pandas as pd from biometrics.biometrics import get_samples, run_minor_contamination, run_major_contamination from biometrics.cli import get_args from biometrics.extract import Extract @@ -93,6 +94,10 @@ def test_extract_sample(self): self.assertIsNotNone(samples['test_sample1'].pileup, msg='Sample pileup was not loaded correctly.') self.assertEqual(samples['test_sample1'].pileup.shape[0], 15, msg='Did not find pileup for 4 variants. Found: {}.'.format(samples['test_sample1'].pileup)) + self.assertIsNotNone( + samples['test_sample1'].region_counts, + msg='Sample bed file was not loaded correctly.') + class TestLoadData(TestCase): """Tests load data by sample name in `biometrics` package.""" @@ -110,7 +115,7 @@ class TestLoadData(TestCase): database=os.path.join(CUR_DIR, 'test_data/'), vcf=None, fafile=None, - bed=None, + bed=os.path.join(CUR_DIR, 'test_data/test.bed'), min_mapping_quality=None, min_base_quality=None, min_coverage=None, @@ -149,8 +154,8 @@ class TestLoadDataPickle(TestCase): return_value=argparse.Namespace( subparser_name='extract', input=[ - os.path.join(CUR_DIR, 'test_data/test_sample1.pk'), - os.path.join(CUR_DIR, 'test_data/test_sample2.pk')], + os.path.join(CUR_DIR, 'test_data/test_sample1.pickle'), + os.path.join(CUR_DIR, 'test_data/test_sample2.pickle')], sample_bam=None, sample_name=None, sample_type=None, @@ -206,7 +211,7 @@ class TestDownstreamTools(TestCase): database=os.path.join(CUR_DIR, 'test_data/'), vcf=None, fafile=None, - bed=None, + bed=os.path.join(CUR_DIR, 'test_data/test.bed'), min_mapping_quality=None, min_base_quality=None, min_coverage=None, @@ -297,3 +302,59 @@ def test_sexmismatch(self): self.assertEqual(set(results['expected_sex']), set(['M']), msg='Expected all samples to have an expected sex of M.') self.assertEqual(set(results['predicted_sex']), set(['M']), msg='Expected all samples to not have a sex mismatch.') + + +class TestNASexMismatch(TestCase): + """Test that sex mismatch returns NA if no Y chrom regions.""" + + @mock.patch( + 'argparse.ArgumentParser.parse_args', + return_value=argparse.Namespace( + subparser_name='extract', + input=None, + sample_bam=[ + os.path.join(CUR_DIR, 'test_data/test_sample1_golden.bam'), + os.path.join(CUR_DIR, 'test_data/test_sample2_golden.bam')], + sample_name=['test_sample1', 'test_sample2'], + sample_type=['tumor', 'tumor'], + sample_group=['patient1', 'patient1'], + sample_sex=['M', 'M'], + database=os.path.join(CUR_DIR, 'test_data/'), + vcf=os.path.join(CUR_DIR, 'test_data/test.vcf'), + fafile=os.path.join(CUR_DIR, 'test_data/ref.fasta'), + bed=os.path.join(CUR_DIR, 'test_data/test-noY.bed'), + min_mapping_quality=1, + min_base_quality=1, + min_coverage=10, + minor_threshold=0.002, + major_threshold=0.6, + discordance_threshold=0.05, + coverage_threshold=50, + min_homozygous_thresh=0.1, + zmin=None, + zmax=None, + outdir='.', + json=None, + plot=False, + default_genotype=None, + overwrite=True, + no_db_compare=False, + prefix='test', + version=False, + threads=1)) + def setUp(self, mock_args): + """Set up test fixtures, if any.""" + + self.args = get_args() + + def test_sexmismatch_noY(self): + + extractor = Extract(self.args) + samples = get_samples(self.args, extraction_mode=True) + samples = extractor.extract(samples) + + sex_mismatch = SexMismatch(self.args.coverage_threshold) + results = sex_mismatch.detect_mismatch(samples) + + self.assertTrue( + pd.isna(results.at[0, 'predicted_sex']), msg='Predicted sample sex should have been nan.') diff --git a/tests/test_data/test-noY.bed b/tests/test_data/test-noY.bed new file mode 100644 index 0000000..b651713 --- /dev/null +++ b/tests/test_data/test-noY.bed @@ -0,0 +1 @@ +X 1 200 From f75abc6b9a1fda7d1fb8155b2d81b408b4034368 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 8 Jun 2021 17:10:59 -0400 Subject: [PATCH 2/2] Update VERSION --- biometrics/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biometrics/VERSION b/biometrics/VERSION index d3b5ba4..f2722b1 100644 --- a/biometrics/VERSION +++ b/biometrics/VERSION @@ -1 +1 @@ -0.2.11 +0.2.12