Skip to content

Commit

Permalink
Fix cutoff documentation of DistanceCalculator classes (#474)
Browse files Browse the repository at this point in the history
* Fix cutoff documentation of DistanceCalculator classes

* Fix incompatibility with adjustText 1.0

* Update CHANGELOG

* Update CHANGELOG

* Adjusttext compatibility with both new and old versions

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
grst and pre-commit-ci[bot] authored Jan 9, 2024
1 parent 759bc44 commit bd168a9
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 19 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ and this project adheres to [Semantic Versioning][].
in `pp.ir_dist` now uses the `FastAlignmentDistanceCalculator` with only the lenght-based filter activated.
Using the `"fastalignment"` activates the heuristic, which is significantly faster, but results in some false-negatives.

### Documentation

- The default values of the distance calculator classes in `ir_dist.metrics` was unclear. The default value is now
set in the classes. In `pp.ir_dist` and `ir_dist.sequence_dist`, no cutoff argument is passed to the metrics
objects, unless one is explicitly specified (previously `None` was passed by default).

## v0.14.0

### Breaking changes
Expand Down
16 changes: 10 additions & 6 deletions src/scirpy/ir_dist/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def IrNeighbors(*args, **kwargs):
* `alignment` -- Distance based on pairwise sequence alignments using the
BLOSUM62 matrix. This option is incompatible with nucleotide sequences.
See :class:`~scirpy.ir_dist.metrics.FastAlignmentDistanceCalculator`.
* `fastalignment` -- Distance based on pairwise sequence alignments using the
* `fastalignment` -- Distance based on pairwise sequence alignments using the
BLOSUM62 matrix. Faster implementation of `alignment` with some loss.
This option is incompatible with nucleotide sequences.
See :class:`~scirpy.ir_dist.metrics.FastAlignmentDistanceCalculator`.
Expand Down Expand Up @@ -82,18 +82,22 @@ def _get_distance_calculator(metric: MetricType, cutoff: Union[int, None], *, n_
metric = "identity"
cutoff = 0

# Let's rely on the default set by the class if cutoff is None
if cutoff is not None:
kwargs["cutoff"] = cutoff

if isinstance(metric, metrics.DistanceCalculator):
dist_calc = metric
elif metric == "alignment":
dist_calc = metrics.FastAlignmentDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, estimated_penalty=0, **kwargs)
dist_calc = metrics.FastAlignmentDistanceCalculator(n_jobs=n_jobs, estimated_penalty=0, **kwargs)
elif metric == "fastalignment":
dist_calc = metrics.FastAlignmentDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, **kwargs)
dist_calc = metrics.FastAlignmentDistanceCalculator(n_jobs=n_jobs, **kwargs)
elif metric == "identity":
dist_calc = metrics.IdentityDistanceCalculator(cutoff=cutoff, **kwargs)
dist_calc = metrics.IdentityDistanceCalculator(**kwargs)
elif metric == "levenshtein":
dist_calc = metrics.LevenshteinDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, **kwargs)
dist_calc = metrics.LevenshteinDistanceCalculator(n_jobs=n_jobs, **kwargs)
elif metric == "hamming":
dist_calc = metrics.HammingDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, **kwargs)
dist_calc = metrics.HammingDistanceCalculator(n_jobs=n_jobs, **kwargs)
else:
raise ValueError("Invalid distance metric.")

Expand Down
18 changes: 5 additions & 13 deletions src/scirpy/ir_dist/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ class IdentityDistanceCalculator(DistanceCalculator):
will be ignored and is always 0.
"""

def __init__(self, cutoff: Union[int, None] = 0):
def __init__(self, cutoff: int = 0):
cutoff = 0
super().__init__(cutoff)

Expand Down Expand Up @@ -295,9 +295,7 @@ class LevenshteinDistanceCalculator(ParallelDistanceCalculator):
{params}
"""

def __init__(self, cutoff: Union[None, int] = None, **kwargs):
if cutoff is None:
cutoff = 2
def __init__(self, cutoff: int = 2, **kwargs):
super().__init__(cutoff, **kwargs)

def _compute_block(self, seqs1, seqs2, origin):
Expand Down Expand Up @@ -344,9 +342,7 @@ class HammingDistanceCalculator(ParallelDistanceCalculator):
{params}
"""

def __init__(self, cutoff: Union[None, int] = None, **kwargs):
if cutoff is None:
cutoff = 2
def __init__(self, cutoff: int = 2, **kwargs):
super().__init__(cutoff, **kwargs)

def _compute_block(self, seqs1, seqs2, origin):
Expand Down Expand Up @@ -420,16 +416,14 @@ class AlignmentDistanceCalculator(ParallelDistanceCalculator):
)
def __init__(
self,
cutoff: Union[None, int] = None,
cutoff: int = 10,
*,
n_jobs: Union[int, None] = None,
block_size: int = 50,
subst_mat: str = "blosum62",
gap_open: int = 11,
gap_extend: int = 11,
):
if cutoff is None:
cutoff = 10
super().__init__(cutoff, n_jobs=n_jobs, block_size=block_size)
self.subst_mat = subst_mat
self.gap_open = gap_open
Expand Down Expand Up @@ -558,7 +552,7 @@ class FastAlignmentDistanceCalculator(ParallelDistanceCalculator):

def __init__(
self,
cutoff: Union[None, int] = None,
cutoff: int = 10,
*,
n_jobs: Union[int, None] = None,
block_size: int = 50,
Expand All @@ -567,8 +561,6 @@ def __init__(
gap_extend: int = 11,
estimated_penalty: float = None,
):
if cutoff is None:
cutoff = 10
super().__init__(cutoff, n_jobs=n_jobs, block_size=block_size)
self.subst_mat = subst_mat
self.gap_open = gap_open
Expand Down

0 comments on commit bd168a9

Please sign in to comment.