Skip to content

Commit

Permalink
Checkpoint for validation
Browse files Browse the repository at this point in the history
  • Loading branch information
mobiusklein committed Aug 24, 2024
1 parent f77fce9 commit 7ccc7ec
Show file tree
Hide file tree
Showing 12 changed files with 273 additions and 136 deletions.
1 change: 1 addition & 0 deletions mzspeclib/annotation.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
"""Re-export all of the :mod:`mzpaf` annotation parsing machinery"""
from mzpaf.annotation import *
1 change: 1 addition & 0 deletions mzspeclib/attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,7 @@ def apply(self, target: Attributed):
group_ids[term.group_id] += 1
target.remove_attribute(key, group_identifier=term.group_id)
else:
term = terms
if term.group_id:
group_ids[term.group_id] += 1
target.remove_attribute(key, group_identifier=term.group_id)
Expand Down
13 changes: 7 additions & 6 deletions mzspeclib/backends/diann.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,12 @@ def _parse_from_buffer(self, buffer: List[Dict[str, Any]], spectrum_index: int =
group_identifier=protein_group_id
)
if "ProteinName" in descr:
analyte.add_attribute(
"MS:1000886|protein name",
descr["ProteinName"],
group_identifier=protein_group_id
)
if descr['ProteinName']:
analyte.add_attribute(
"MS:1000886|protein name",
descr["ProteinName"],
group_identifier=protein_group_id
)

for key in self._custom_analyte_keys:
if key in descr:
Expand Down Expand Up @@ -210,7 +211,7 @@ def _generate_peaks(self, batch: List[Dict[str, Any]]) -> List[Tuple[float, floa

loss_type = row['FragmentLossType']
if loss_type != NO_LOSS:
loss_type = ['-' + loss_type]
loss_type = annotation.NeutralName.parse('-' + loss_type)
else:
loss_type = None

Expand Down
10 changes: 7 additions & 3 deletions mzspeclib/backends/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Sp
else:
raise ValueError("Must provide either spectrum_number or spectrum_name argument")
data = self.buffer[SPECTRA_KEY][offset]
spectrum = self._make_spectrum_from_payload(data)
spectrum = self._make_spectrum_from_payload(data, offset)
return spectrum

def get_cluster(self, cluster_number: int) -> SpectrumCluster:
Expand Down Expand Up @@ -237,8 +237,12 @@ def _make_cluster_from_payload(self, data: Dict[str, Any]) -> SpectrumCluster:
data[ELEMENT_ATTRIBUTES_KEY], cluster, AttributeSetTypes.cluster)
return cluster

def _make_spectrum_from_payload(self, data: Dict) -> Spectrum:
def _make_spectrum_from_payload(self, data: Dict, index: int = None) -> Spectrum:
spectrum = self._new_spectrum()

if index is not None:
spectrum.index = index

self._fill_attributes(
data[ELEMENT_ATTRIBUTES_KEY],
spectrum,
Expand Down Expand Up @@ -295,7 +299,7 @@ def read(self):
n = len(self.buffer[SPECTRA_KEY])
for offset in range(n):
data = self.buffer[SPECTRA_KEY][offset]
spectrum = self._make_spectrum_from_payload(data)
spectrum = self._make_spectrum_from_payload(data, offset)
yield spectrum


Expand Down
12 changes: 6 additions & 6 deletions mzspeclib/backends/msp.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,10 +235,7 @@ def __init__(self, keys: Dict[str, Any]):
self.keys = keys

def handle(self, key: str, value: Any, container: Attributed) -> bool:
try:
trans_key = self.keys[key]
except KeyError:
breakpoint()
trans_key = self.keys[key]
if value is None:
if isinstance(trans_key, list):
k, v = trans_key
Expand Down Expand Up @@ -2011,7 +2008,10 @@ def _format_value(self, value):
return str(value)

def _proforma_to_mods(self, proforma_seq: str) -> str:
parsed = proforma.ProForma.parse(proforma_seq)
if isinstance(proforma_seq, proforma.ProForma):
parsed = proforma_seq
else:
parsed = proforma.ProForma.parse(proforma_seq)
mods = [(i, tok) for i, tok in enumerate(parsed) if tok[1]]
if mods:
tokens = []
Expand Down Expand Up @@ -2122,7 +2122,7 @@ def _format_annotation(self, annot: annotation.IonAnnotationBase):
if not parts:
return "?"
if annot.neutral_losses:
f = annotation.combine_formula(annot.neutral_losses)
f = annotation.NeutralName.combine(annot.neutral_losses)
if f[0] not in ("-", "+"):
f = "+" + f
parts.append(f)
Expand Down
8 changes: 4 additions & 4 deletions mzspeclib/backends/spectronaut.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from mzspeclib import annotation
from mzspeclib.analyte import Analyte
from mzspeclib.backends.base import LIBRARY_NAME_TERM, _CSVSpectralLibraryBackendBase, FORMAT_VERSION_TERM, DEFAULT_VERSION
from mzspeclib.backends.utils import open_stream, urlify
from mzspeclib.backends.utils import open_stream, urlify, try_cast
from mzspeclib.spectrum import Spectrum, SPECTRUM_NAME


Expand Down Expand Up @@ -196,7 +196,7 @@ def _generate_peaks(self, batch: List[Dict[str, Any]]) -> List[Tuple[float, floa

loss_type = row['FragmentLossType']
if loss_type != NO_LOSS:
loss_type = ['-' + loss_type]
loss_type = annotation.NeutralName.parse('-' + loss_type)
else:
loss_type = None

Expand Down Expand Up @@ -241,9 +241,9 @@ def _build_analyte(self, description: Dict[str, Any], analyte: Analyte) -> Analy
group_identifier=protein_group_id
)

if "OrganismId" in description:
if "OrganismId" in description and description["OrganismId"] is not None:
analyte.add_attribute_group([
["MS:1001467|taxonomy: NCBI TaxID", f"NCBITaxon:{description['OrganismId']}|{description['Organisms']}"],
["MS:1001467|taxonomy: NCBI TaxID", try_cast(description['OrganismId'])],
["MS:1001469|taxonomy: scientific name", description['Organisms']],
])

Expand Down
6 changes: 3 additions & 3 deletions mzspeclib/backends/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@
r"^(?P<term>(?P<term_accession>\S+:(?:\d|X)+)\|(?P<term_name>[^=]+))"
)
key_value_term_pattern = re.compile(
r"^(?P<term>(?P<term_accession>[A-Za-z0-9:.]+:(?:\d|X)+)\|(?P<term_name>[^=]+?))\s*=\s*(?P<value>.+)"
r"^(?P<term>(?P<term_accession>[A-Za-z0-9:.]+:(?:\d|X)+)\|(?P<term_name>[^=]+?))\s*=(\s*(?P<value>.+))?"
)
grouped_key_value_term_pattern = re.compile(
r"^\[(?P<group_id>\d+)\](?P<term>(?P<term_accession>\S+:(?:\d|X)+)\|(?P<term_name>[^=]+?))\s*=\s*(?P<value>.+)"
r"^\[(?P<group_id>\d+)\](?P<term>(?P<term_accession>\S+:(?:\d|X)+)\|(?P<term_name>[^=]+?))\s*=(\s*(?P<value>.+))?"
)
float_number = re.compile(r"^\d+(.\d+)?")

Expand Down Expand Up @@ -162,7 +162,7 @@ def _parse_attribute_into(
def real_line_number_or_nothing(self):
message = f" on line {self.line_number + self.start_line_number}"
if self.spectrum_index is not None:
message += f" in spectrum {self.spectrum_index}"
message += f" in spectrum index {self.spectrum_index}"
message += f" in state {self.state}"
return message

Expand Down
2 changes: 0 additions & 2 deletions mzspeclib/backends/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,6 @@ def truncate(self, size=None):
raise io.UnsupportedOperation("Read-only")

def close(self):
print("Closing PreBufferedStreamReader")
breakpoint()
self.buffer.close()
return self.stream.close()

Expand Down
29 changes: 24 additions & 5 deletions mzspeclib/tools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,13 @@ def _display_tree(tree, indent: int=0):


@click.group(context_settings=CONTEXT_SETTINGS)
def main():
@click.option("-d", "--debug-logging", is_flag=True, help="Enable debug logging")
def main(debug_logging=False):
"""A collection of utilities for inspecting and manipulating spectral libraries."""
format_string = '[%(asctime)s] %(levelname).1s | %(name)s | %(message)s'
format_string = '[%(asctime)s] %(levelname).1s | %(name)s | %(filename)s:%(funcName)s:%(lineno)d | %(message)s'

logging.basicConfig(
level='INFO',
level='INFO' if not debug_logging else "DEBUG",
stream=sys.stderr,
format=format_string,
datefmt="%H:%M:%S")
Expand All @@ -55,6 +56,8 @@ def main():
handler.setFormatter(
fmtr
)
# if debug_logging:
# sys.excepthook = debug_hook


@main.command("describe", short_help=("Produce a minimal textual description"
Expand Down Expand Up @@ -191,6 +194,8 @@ def validate(inpath, profiles=None, input_format=None):

logger.info(f"Loading validators...")
chain = validator.get_validator_for("base")
chain |= validator.ControlledVocabularyAttributeValidator()
chain |= validator.get_object_validator_for("base")
chain |= validator.get_object_validator_for("peak_annotations")
for profile in profiles:
if profile is None:
Expand All @@ -208,9 +213,23 @@ def validate(inpath, profiles=None, input_format=None):
by_level[message.requirement_level].append(message)

for level, bucket in sorted(by_level.items()):
logger.info(f"Found {len(bucket)} violations for {level.name.upper()} rules")
log_level = logging.WARN
if level == RequirementLevel.may:
log_level = logging.DEBUG
logger.log(log_level, f"Found {len(bucket)} violations for {level.name.upper()} rules")
for err in bucket:
logger.warn(f"... {err.message}")
logger.log(log_level, f"... {err.message}")


def debug_hook(type, value, tb):
if not sys.stderr.isatty():
click.secho("Running interactively, not starting debugger", fg="yellow")
sys.__excepthook__(type, value, tb)
else:
import pdb
import traceback
traceback.print_exception(type, value, tb)
pdb.post_mortem(tb)


if __name__ == "__main__":
Expand Down
39 changes: 38 additions & 1 deletion mzspeclib/validate/object_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

if TYPE_CHECKING:
from .validator import ValidatorBase

from mzspeclib.spectrum_library import SpectrumLibrary

logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
Expand All @@ -38,13 +38,50 @@ def __init__(self, id, path: str, requirement_level: RequirementLevel=Requiremen
self.path = path
self.requirement_level = requirement_level

def __eq__(self, other):
if other is None:
return False
props_eq = self.id == other.id and self.path == other.path and self.requirement_level == other.requirement_level
if not props_eq:
return False
return isinstance(other, self.__class__) or isinstance(self, other.__class__)

def __ne__(self, other):
return not self == other

def __hash__(self):
return hash(self.id)

def __call__(self, obj: Attributed, path: str, identifier_path: Tuple, validator_context: "ValidatorBase") -> bool:
return self.validate(obj, path, identifier_path, validator_context)

def validate(self, obj: Attributed, path: str, identifier_path: Tuple, validator_context: "ValidatorBase") -> bool:
raise NotImplementedError()


class LibraryFormatVersionFirstRule(ScopedObjectRuleBase):
def __init__(self, requirement_level: RequirementLevel = RequirementLevel.must):
super().__init__("Library_format_version_first_rule", "/Library", requirement_level=requirement_level)

def validate(self, obj: "SpectrumLibrary", path: str, identifier_path: Tuple, validator_context: "ValidatorBase") -> bool:
errors = []
attr = next(iter(obj.attributes))
is_format_version_key = attr.key == "MS:1003186|library format version"
if not is_format_version_key:
validator_context.add_warning(
obj,
path,
identifier_path,
self,
attr,
self.requirement_level,
f"The first attribute of the library is not 'MS:1003186|library format version': {attr}",
)
if errors:
return False
return True


class SpectrumPeakAnnotationRule(ScopedObjectRuleBase):
def __init__(self, requirement_level: RequirementLevel=RequirementLevel.should):
super().__init__(
Expand Down
3 changes: 3 additions & 0 deletions mzspeclib/validate/semantic_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,9 @@ class ScopedSemanticRule:
condition: Optional[AttributeSemanticRule] = dataclasses.field(default=None)
notes: Optional[str] = dataclasses.field(default=None)

def __hash__(self):
return hash(self.id)

def find_all_children_of(self, attribute_rule: AttributeSemanticRule, obj: Attributed, validator_context: "ValidatorBase") -> Tuple:
result = []
for attrib in validator_context.walk_terms_for(attribute_rule.accession):
Expand Down
Loading

0 comments on commit 7ccc7ec

Please sign in to comment.