From bd168a9ad8b63ec13e316171943de72ca588bddd Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Tue, 9 Jan 2024 20:49:57 +0000 Subject: [PATCH] Fix cutoff documentation of DistanceCalculator classes (#474) * Fix cutoff documentation of DistanceCalculator classes * Fix incompatibility with adjustText 1.0 * Update CHANGELOG * Update CHANGELOG * Adjusttext compatibility with both new and old versions * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CHANGELOG.md | 6 ++++++ src/scirpy/ir_dist/__init__.py | 16 ++++++++++------ src/scirpy/ir_dist/metrics.py | 18 +++++------------- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 12ee44099..457d1d4c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,12 @@ and this project adheres to [Semantic Versioning][]. in `pp.ir_dist` now uses the `FastAlignmentDistanceCalculator` with only the lenght-based filter activated. Using the `"fastalignment"` activates the heuristic, which is significantly faster, but results in some false-negatives. +### Documentation + +- The default values of the distance calculator classes in `ir_dist.metrics` was unclear. The default value is now + set in the classes. In `pp.ir_dist` and `ir_dist.sequence_dist`, no cutoff argument is passed to the metrics + objects, unless one is explicitly specified (previously `None` was passed by default). + ## v0.14.0 ### Breaking changes diff --git a/src/scirpy/ir_dist/__init__.py b/src/scirpy/ir_dist/__init__.py index 6b13a760b..379e94d40 100644 --- a/src/scirpy/ir_dist/__init__.py +++ b/src/scirpy/ir_dist/__init__.py @@ -51,7 +51,7 @@ def IrNeighbors(*args, **kwargs): * `alignment` -- Distance based on pairwise sequence alignments using the BLOSUM62 matrix. This option is incompatible with nucleotide sequences. See :class:`~scirpy.ir_dist.metrics.FastAlignmentDistanceCalculator`. - * `fastalignment` -- Distance based on pairwise sequence alignments using the + * `fastalignment` -- Distance based on pairwise sequence alignments using the BLOSUM62 matrix. Faster implementation of `alignment` with some loss. This option is incompatible with nucleotide sequences. See :class:`~scirpy.ir_dist.metrics.FastAlignmentDistanceCalculator`. @@ -82,18 +82,22 @@ def _get_distance_calculator(metric: MetricType, cutoff: Union[int, None], *, n_ metric = "identity" cutoff = 0 + # Let's rely on the default set by the class if cutoff is None + if cutoff is not None: + kwargs["cutoff"] = cutoff + if isinstance(metric, metrics.DistanceCalculator): dist_calc = metric elif metric == "alignment": - dist_calc = metrics.FastAlignmentDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, estimated_penalty=0, **kwargs) + dist_calc = metrics.FastAlignmentDistanceCalculator(n_jobs=n_jobs, estimated_penalty=0, **kwargs) elif metric == "fastalignment": - dist_calc = metrics.FastAlignmentDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, **kwargs) + dist_calc = metrics.FastAlignmentDistanceCalculator(n_jobs=n_jobs, **kwargs) elif metric == "identity": - dist_calc = metrics.IdentityDistanceCalculator(cutoff=cutoff, **kwargs) + dist_calc = metrics.IdentityDistanceCalculator(**kwargs) elif metric == "levenshtein": - dist_calc = metrics.LevenshteinDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, **kwargs) + dist_calc = metrics.LevenshteinDistanceCalculator(n_jobs=n_jobs, **kwargs) elif metric == "hamming": - dist_calc = metrics.HammingDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, **kwargs) + dist_calc = metrics.HammingDistanceCalculator(n_jobs=n_jobs, **kwargs) else: raise ValueError("Invalid distance metric.") diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py index 6f1a46d19..b433fbf63 100644 --- a/src/scirpy/ir_dist/metrics.py +++ b/src/scirpy/ir_dist/metrics.py @@ -243,7 +243,7 @@ class IdentityDistanceCalculator(DistanceCalculator): will be ignored and is always 0. """ - def __init__(self, cutoff: Union[int, None] = 0): + def __init__(self, cutoff: int = 0): cutoff = 0 super().__init__(cutoff) @@ -295,9 +295,7 @@ class LevenshteinDistanceCalculator(ParallelDistanceCalculator): {params} """ - def __init__(self, cutoff: Union[None, int] = None, **kwargs): - if cutoff is None: - cutoff = 2 + def __init__(self, cutoff: int = 2, **kwargs): super().__init__(cutoff, **kwargs) def _compute_block(self, seqs1, seqs2, origin): @@ -344,9 +342,7 @@ class HammingDistanceCalculator(ParallelDistanceCalculator): {params} """ - def __init__(self, cutoff: Union[None, int] = None, **kwargs): - if cutoff is None: - cutoff = 2 + def __init__(self, cutoff: int = 2, **kwargs): super().__init__(cutoff, **kwargs) def _compute_block(self, seqs1, seqs2, origin): @@ -420,7 +416,7 @@ class AlignmentDistanceCalculator(ParallelDistanceCalculator): ) def __init__( self, - cutoff: Union[None, int] = None, + cutoff: int = 10, *, n_jobs: Union[int, None] = None, block_size: int = 50, @@ -428,8 +424,6 @@ def __init__( gap_open: int = 11, gap_extend: int = 11, ): - if cutoff is None: - cutoff = 10 super().__init__(cutoff, n_jobs=n_jobs, block_size=block_size) self.subst_mat = subst_mat self.gap_open = gap_open @@ -558,7 +552,7 @@ class FastAlignmentDistanceCalculator(ParallelDistanceCalculator): def __init__( self, - cutoff: Union[None, int] = None, + cutoff: int = 10, *, n_jobs: Union[int, None] = None, block_size: int = 50, @@ -567,8 +561,6 @@ def __init__( gap_extend: int = 11, estimated_penalty: float = None, ): - if cutoff is None: - cutoff = 10 super().__init__(cutoff, n_jobs=n_jobs, block_size=block_size) self.subst_mat = subst_mat self.gap_open = gap_open