diff --git a/src/pixelator/graph/constants.py b/src/pixelator/graph/constants.py index 1477d0a9..f1d436f1 100644 --- a/src/pixelator/graph/constants.py +++ b/src/pixelator/graph/constants.py @@ -5,3 +5,6 @@ MIN_PIXELS_TO_REFINE = 100 LEIDEN_RESOLUTION = 0.01 +RELATIVE_ANNOTATE_RESOLUTION = ( + 0.5 # A lower resolution is used for annotation of potential doublets +) diff --git a/src/pixelator/pixeldataset/utils.py b/src/pixelator/pixeldataset/utils.py index 9be27a83..c3c00bd6 100644 --- a/src/pixelator/pixeldataset/utils.py +++ b/src/pixelator/pixeldataset/utils.py @@ -16,7 +16,7 @@ from graspologic.partition import leiden from pixelator.graph import components_metrics -from pixelator.graph.constants import LEIDEN_RESOLUTION +from pixelator.graph.constants import LEIDEN_RESOLUTION, RELATIVE_ANNOTATE_RESOLUTION from pixelator.statistics import ( clr_transformation, log1p_transformation, @@ -233,7 +233,7 @@ def _compute_sub_communities( ) component_communities_dict = leiden( edgelist_tuple, - resolution=0.5 * LEIDEN_RESOLUTION, + resolution=RELATIVE_ANNOTATE_RESOLUTION * LEIDEN_RESOLUTION, random_seed=42, ) component_communities = pd.Series(component_communities_dict) @@ -246,7 +246,10 @@ def _assess_doublet(component_edgelist: pd.DataFrame) -> tuple[bool, int]: A component is a potential doublet if a) it has more than one community and b) the second largest community is at least 20% of the size of the largest - community. (If the other communities are smaller they are assumed to be debries.) + community. A lower resolution is to be used for annotation of potential doublets + compared to the component recovery in the graph phase. The reduction factor in + annotate resolution is set by RELATIVE_ANNOTATE_RESOLUTION (default is 0.5). + """ component_communities = _compute_sub_communities(component_edgelist) component_community_sizes = component_communities.value_counts().sort_values(