Skip to content

Commit

Permalink
Merge branch 'maintenance/v0.1.x' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
kevin931 committed Jul 10, 2023
2 parents 190d475 + aec8da7 commit 4143105
Show file tree
Hide file tree
Showing 14 changed files with 723 additions and 35 deletions.
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
The MIT License (MIT)

Copyright 2022 Cytomulate Developers
Copyright 2022-2023 Cytomulate Developers

Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
Expand Down
Binary file added assets/pycytodata_flowchart.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion cytomulate/cell_graph_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ def generate_trajectories(self,
cell_types: dict
A dictionary of CellType objects
kwargs:
Extra parameters needed for non-default path generation algorithms
Extra parameters needed for non-default path generation algorithms, which
are passed to ``cytomulate.utilities.trajectories``.
"""
edges = self.graph.edges
for e in edges:
Expand Down
41 changes: 30 additions & 11 deletions cytomulate/creation/cytof_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,28 @@ def __init__(self,
n_markers: int = 20,
n_trees: int = 2,
background_noise_model: Optional[Union[Callable, dict]] = None) -> None:
"""Initialize the CreationCytofData object
"""The Creation Mode object for Cytomulate.
This class serves as a starting point for the Creation Mode of Cytomulate. The constructor
defines the key parameters of the simulation, including the number of batches, cell types,
protein markers, and trees. The number of cells is defined later at a sampling step.
Parameters
----------
n_batches: int
Number of batches to be simulated
Number of batches to be simulated. All other the parameters apply to every batch.
n_types: int
Number of cell types to be simulated
Number of cell types to be simulated.
n_markers: int
Number of markers (columns) to be used
Number of protein markers (columns) to be simulated.
n_trees: int
Number of trees in the cell graph
Number of trees in the cell graph. Each tree encapsulates and represents the relationship
between cell types. Note that it is not necessary to add trajectory in complex simulation
even though trees are used in general.
background_noise_model: Callable or dict
The function used to generate background noise. It should only take one input: size
The function used to generate background noise. It should only take one input: size. In the
cases of multiple batches with different noise models, a dictionary with batch number as keys
and function as value is used.
"""
super().__init__(n_batches, background_noise_model)

Expand All @@ -63,18 +71,26 @@ def initialize_cell_types(self,
scale: float = 0.5,
n_components: int = 1,
variance_mode: float = 0.01) -> None:
"""Initialize cell type objects
"""Initialize cell type models.
This method initialzes the models for each cell type. Namely, a Gaussian Mixture Model
is generated for each cell type at this stage according to the parameters specified.
Parameters
----------
L: int
Number of levels of expressions
Number of levels of expressions. The levels are used to differentiate between cell types
whose expressions for the same marker may be different. We recommend at least 2, but
not too many.
scale: float
The scale parameter used in generating expression levels
The scale parameter used in generating expression levels' mean, which comes from a
truncated normal distribution on the positive reals. The ``scale`` is the standard the
deviation of the distribution. When the scale is large, the levels of expressions
are more spead out, and vice versa.
n_components: int
Number of components in a GMM
Number of components in a GMM.
variance_mode: float
The mode of the variance of the inverse wishart distribution
The mode of the variance of the inverse wishart distribution.
"""
# We first generate high expression levels and low expression levels
# Truncated normals are used to ensure the ordering
Expand All @@ -91,6 +107,9 @@ def initialize_cell_types(self,
def generate_cell_graph(self, **kwargs) -> None:
"""Generate cell differentiation paths
This method is part of complex simulation's cellular trajectory simulation. It
generates differentiation paths, which will be used at the sampling stage.
Parameters
----------
kwargs:
Expand Down
33 changes: 24 additions & 9 deletions cytomulate/cytof_data_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@ class GeneralCytofData:
def __init__(self,
n_batches: int = 1,
background_noise_model: Optional[Union[Callable, dict]] = None) -> None:
"""Initialize the GeneralCytofData object
"""Initialize the GeneralCytofData object.
This is the base class for `CreationCytofData` and `EmulationCytofData`, both of
which inherits most of the methods. This class provides functionalities including
sampling and complex simulations.
Parameters
----------
Expand Down Expand Up @@ -53,12 +57,18 @@ def __init__(self,

def generate_cell_abundances(self,
is_random: bool = True) -> None:
"""Generate cell abundances
"""Generate cell abundances.
This method generates the cell abundane for each batch. The probability
of each cell type can be either random or fixed with equal probabilities.
See `is_random` parameter for details.
Parameters
----------
is_random: bool
Whether the cell abundances should be randomly generated
Whether the cell abundances should be randomly generated. If
`True`, the abundance of each cell type is sampled from a dirichlet
distribution. If `False`, then all cell types an have equal probability.
"""
if is_random:
# If randomly generate cell abundances,
Expand Down Expand Up @@ -218,13 +228,16 @@ def sample_one_batch(self,
Number of samples
cell_abundances: dict or None
A dictionary whose keys are the cell labels. The corresponding values should be
either the actual number of events for each cell type or the probability of each cell type
either the actual number of events for each cell type or the probability of each cell type.
If this is not provided, the one stored in the object will be used. Defaults to `None`.
batch: int
The index of the batch for which we want to draw samples
The index of the batch for which we want to draw samples. Defaults to 0.
beta_alpha: float or int
The alpha parameter of the beta distribution
The alpha parameter of the beta distribution, which should be contrained to the positive reals.
Defaults to 0.4.
beta_beta: float or int
The beta parameter of the beta distribution
The beta parameter of the beta distribution, which should be contrained to the positive reals.
Defaults to 1.0.
Returns
-------
Expand Down Expand Up @@ -355,9 +368,11 @@ def sample(self,
It can be a plain dictionary whose keys are the cell labels. The corresponding values should be
either the actual number of events for each cell type or the probability of each cell type
beta_alpha: float, int, or dict
The alpha parameters of the beta distribution
The alpha parameters of the beta distribution, which should be contrained to the positive reals.
Defaults to 0.4.
beta_beta: float, int, or dict
The beta parameters of the beta distribution
The beta parameters of the beta distribution, which should be contrained to the positive reals.
Defaults to 0.4.
Returns
-------
Expand Down
48 changes: 41 additions & 7 deletions cytomulate/emulation/cytof_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,20 @@
from cytomulate.cytof_data_general import GeneralCytofData

# Typing
from typing import Union, Optional, Callable
from typing import Union, Optional, Callable, Tuple, List


class EmulationCytofData(GeneralCytofData):
def __init__(self,
n_batches: int = 1,
background_noise_model: Optional[Union[Callable, dict]] = None,
bead_label: Optional[Union[str, int]] = None) -> None:
"""Initialize the EmulationCytofData object
"""The Emulation Mode object for Cytomulate.
This class serves as a starting point for the Emulation Mode of Cytomulate. The constructor
defines the key parameters of the simulation, including the number of batches. Unlike the
Creation mode, other parameters such as the number of protein markers are fixed from the
dataset rather than user-soecified. The number of cells is defined later at a sampling step.
Parameters
----------
Expand All @@ -46,21 +51,33 @@ def initialize_cell_types(self,
labels: np.ndarray,
max_components: int = 9,
min_components: int = 1,
covariance_types: Union[list, tuple] = ("full", "tied", "diag", "spherical")) -> None:
covariance_types: Union[List[str], Tuple[str]] = ("full", "tied", "diag", "spherical")) -> None:
"""Initialize cell type models by fitting Gaussian mixtures
This method fits the GMM models for each cell type. Namely, a Gaussian Mixture Model
is generated for each cell type at this stage according to the parameters specified.
An extensive model selection procedure based on the Bayesian Information Criterion (BIC)
is performed when multiple possibilities of components and covariance types are
specified. See details in `max_components` and `covariance_types`.
Parameters
----------
expression_matrix: np.ndarray
A matrix containing the expression levels of cell events
labels: np.ndarray
A vector of cell type labels
max_components: int
Used for Gaussian mixture model selection. The maximal number of components for a Gaussian mixture
The maximal number of components for a Gaussian mixture. Used for Gaussian mixture model selection.
This must be smaller or equal to the `max_components`. If `max_components` equals `min_components`,
the exact number will be used for fitting. Otherwise, a model selection procedure will ensue using
Bayesian Information Criterion.
min_components: int
Used for Gaussian mixture model selection. The minimal number of components for a Gaussian mxitrue
The minimal number of components for a Gaussian mxitrue. Used for Gaussian mixture model selection.
This must be smaller or equal to the `max_components`. See `max_components` for details on model
selection.
covariance_types: list or tuple
Used for Gaussian mixture model selection. The candidate types of covariances
The candidate types of covariances used for Gaussian mixture model selection. If only one is specified,
no model selection will be performed based on the covariance structure.
"""
self.n_markers = np.shape(expression_matrix)[1]

Expand Down Expand Up @@ -92,6 +109,9 @@ def generate_cell_graph(self,
graph_topology: str = "forest",
**kwargs) -> None:
"""Generate a cell graph as well as differentiation paths
This method is part of complex simulation's cellular trajectory simulation. It
generates differentiation paths, which will be used at the sampling stage.
Parameters
----------
Expand All @@ -110,12 +130,26 @@ def generate_cell_abundances(self,
is_random: bool = True) -> None:
"""Generate cell abundances
Generate the cell abundances for all cell types: namely, the amount
of cells in each cell type. This method supports either data-based
cell abundance or randomly-generated cell abundance. In the latter
case, each cell type's probability can be further randomized.
Parameters
----------
use_observed: bool
Whether the cell abundances should use the observed ones
is_random: bool
Whether the cell abundances should be randomly generated
In the case that `user_obsersed` is `False`, whether the cell abundances'
probability should be randomly generated. If `True`, the abundance of each
cell type is sampled from a dirichlet distribution. If `False`, then all cell
types an have equal probability.
Note
-----
If you wish to use the default observed cell abundance from the data,
it is not necessary to call this method. Otherwise, you should always
set ``used_observed`` to ``False``.
"""
if use_observed:
for b in range(self.n_batches):
Expand Down
4 changes: 4 additions & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,13 @@ For more details, **read our tutorials and documentations linked below!** Or try
:maxdepth: 1
:caption: Tutorial

tutorial/emulation
tutorial/creation
tutorial/cli
tutorial/complex
tutorial/visualization
tutorial/pycytodata
tutorial/benchmark

.. toctree::
:maxdepth: 1
Expand Down
4 changes: 2 additions & 2 deletions docs/source/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ Now, you can have the option to output your simulation results in a ``PyCytoData

.. note::

``PyCytoData`` requires ``Python>=3.7``, which is more strict than ``cytomulate``.
``PyCytoData`` requires ``Python>=3.7``, which is stricter than ``cytomulate``.
If you are still running an older version, please consider upgrading.

.. image:: ../../../assets/pycytodata.jpg
.. image:: ../../assets/pycytodata.jpg
:width: 600
:alt: PyCytoData Alliance
2 changes: 1 addition & 1 deletion docs/source/license.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Our project is officially licensed under the MIT license:
The MIT License (MIT)
Copyright 2022 Cytomulate Developers
Copyright 2022-2023 Cytomulate Developers
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
Expand Down
6 changes: 3 additions & 3 deletions docs/source/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ this is because Cytomulate can accomodate multiple samples as indexed by
the dictionary keys. Of course, you can procceed to extract the expression
matrix and then work with it in downstream analyses.

``PyCytoData`` Output
PyCytoData Output
------------------------

For those of you who are familiar with ``PyCytoData`` or want a cleaner interface
Expand All @@ -82,7 +82,7 @@ to work with, Cytomulate can output a ``PyCytoData`` object.

``PyCytoData`` is required to be installed for this to work. Since it is an
optional dependency, read our `Installation Guide <https://cytomulate.readthedocs.io/en/dev/installation.html>`_
for further details. Once installed, ``PyCytoData`` is fully compatible.
for further details. Once installed, ``PyCytoData`` is fully compatible with ``Cytomulate``


To do this, simply use the following method instead:
Expand Down Expand Up @@ -237,7 +237,7 @@ Now, let's look at our outputs:
'Erythroblast'], dtype='<U17')}
``PyCytoData`` Output
PyCytoData Output
------------------------

If you have fallen in love with ``PyCytoData``, good news: the emulation mode is compatible with
Expand Down
3 changes: 3 additions & 0 deletions docs/source/tutorial/benchmark.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*************************
Benchmark Analyses
*************************
Loading

0 comments on commit 4143105

Please sign in to comment.