Skip to content

Commit

Permalink
Update species probes (#177)
Browse files Browse the repository at this point in the history
* Add option --ncbi_names; handle new tb probes/hierarchy

* Remove hard-coded panel fix (crashes mykrobe predict)

* Add option --dump_species_covgs

* Run black

* Update changelog
  • Loading branch information
martinghunt authored Sep 27, 2023
1 parent 4c52a48 commit 1bf2270
Show file tree
Hide file tree
Showing 6 changed files with 311 additions and 98 deletions.
18 changes: 18 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,24 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
- When using `--debug` flag, log species probe coverage/depth info and also each
time a probe is rejected due to low coverage and/or depth.

- Added flag `--ncbi_names`. This is for soon to be updated tb probes for
species calling, where the JSON file will also report alternative NCBI taxon
names, as well as the default GTBD names.

- Added option `--dump_species_covgs`, to dump a JSON file of all the species
probe coverage information. This includes the raw coverage info from mccortex,
before it is aggregated into a single call that you see in the usual
mykrobe output.

### Changed

- The tb species probes are going to be updated after the next release of
mykrobe. Code changed to handle these new probes, specifically where a node in
the taxon tree has no probes. If a child node is called as present from the
reads, then push that call up to the parent node. Code still works as normal
on the existing (soon to be old) panels.


## [0.12.2]

### Fixed
Expand Down
11 changes: 11 additions & 0 deletions src/mykrobe/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@ def __call__(self, parser, namespace, values, option_string=None):
action="store_true",
default=False,
)
genotyping_mixin.add_argument(
"--dump_species_covgs",
help="Dump species probes coverage information to a JSON file",
metavar="FILENAME",
)
genotyping_mixin.add_argument(
"-e",
"--expected_error_rate",
Expand Down Expand Up @@ -174,6 +179,12 @@ def __call__(self, parser, namespace, values, option_string=None):
help="File path to save output file as. Default is to stdout",
default="",
)
genotyping_mixin.add_argument(
"--ncbi_names",
help="Report NCBI species names in addiition to the usual species names in the JSON output. Only applies when the species is tb",
action="store_true",
default=False,
)


panels_mixin = argparse.ArgumentParser(add_help=False)
Expand Down
31 changes: 25 additions & 6 deletions src/mykrobe/cmds/amr.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,20 +167,24 @@ def ref_data_from_args(args):
"var_to_res_json": species_dir.json_file("amr"),
"hierarchy_json": species_dir.json_file("hierarchy"),
"lineage_json": species_dir.json_file("lineage"),
"ncbi_names_json": species_dir.json_file("ncbi_names"),
"kmer": species_dir.kmer(),
"version": species_dir.version(),
"species_phylo_group": species_dir.species_phylo_group(),
}

if ref_data["lineage_json"] is None:
ref_data["lineage_dict"] = None
else:
ref_data["lineage_dict"] = load_json(ref_data["lineage_json"])
for key in ["lineage", "ncbi_names"]:
if ref_data[f"{key}_json"] is None:
ref_data[f"{key}_dict"] = None
else:
ref_data[f"{key}_dict"] = load_json(ref_data[f"{key}_json"])

return ref_data


def detect_species_and_get_depths(cov_parser, hierarchy_json, wanted_phylo_group):
def detect_species_and_get_depths(
cov_parser, hierarchy_json, wanted_phylo_group, probe_cov_json=None
):
depths = []
if wanted_phylo_group is None:
return {}, depths
Expand All @@ -193,6 +197,7 @@ def detect_species_and_get_depths(cov_parser, hierarchy_json, wanted_phylo_group
species_covgs=cov_parser.covgs["species"],
lineage_covgs=cov_parser.covgs.get("sub-species", {}),
hierarchy_json_file=hierarchy_json,
probe_cov_json_file=probe_cov_json,
)
phylogenetics = species_predictor.run()

Expand Down Expand Up @@ -249,6 +254,13 @@ def fix_X_amino_acid_variants(sample_json):
fix_amino_acid_X_variants_keys(sample_json["variant_calls"])


def add_ncbi_species_names_to_phylo_dict(phylo, ncbi_names):
if "species" not in phylo or ncbi_names is None:
return
for species, species_d in phylo["species"].items():
species_d["ncbi_names"] = ncbi_names.get(species, "UNKNOWN")


def run(parser, args):
logger.info(f"Start runnning mykrobe predict. Command line: {' '.join(sys.argv)}")
base_json = {args.sample: {}}
Expand Down Expand Up @@ -298,7 +310,10 @@ def run(parser, args):
depths = [cp.estimate_depth()]
else:
phylogenetics, depths = detect_species_and_get_depths(
cp, ref_data["hierarchy_json"], ref_data["species_phylo_group"]
cp,
ref_data["hierarchy_json"],
ref_data["species_phylo_group"],
probe_cov_json=args.dump_species_covgs,
)

# Genotype
Expand Down Expand Up @@ -450,5 +465,9 @@ def run(parser, args):

logger.info("Progress: writing output")
fix_X_amino_acid_variants(base_json[args.sample])
if args.ncbi_names and ref_data["ncbi_names_dict"] is not None:
add_ncbi_species_names_to_phylo_dict(
base_json[args.sample]["phylogenetics"], ref_data["ncbi_names_dict"]
)
write_outputs(args, base_json)
logger.info("Progress: finished")
Loading

0 comments on commit 1bf2270

Please sign in to comment.