Skip to content

Commit

Permalink
Merge pull request #127 from MannLabs/multicova
Browse files Browse the repository at this point in the history
Release 0.4.3
  • Loading branch information
elena-krismer authored Mar 4, 2023
2 parents a73f9ff + 057bdef commit 55bd1ae
Show file tree
Hide file tree
Showing 21 changed files with 59 additions and 39 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.4.2
current_version = 0.4.3
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
Expand Down
4 changes: 4 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

# 0.4.3
* FIX loading dataset with columns
* ADD log2-transformation

# 0.4.2
* ADD option compare_preprocessing_modes
* update to streamlit 1.19 with new caching functions
Expand Down
4 changes: 3 additions & 1 deletion alphastats/DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def create_matrix(self):
"""

regex_find_intensity_columns = self.intensity_column.replace("[sample]", ".*")

df = self.rawinput
df = df.set_index(self.index_column)
df = df.filter(regex=(regex_find_intensity_columns), axis=1)
Expand Down Expand Up @@ -159,6 +159,7 @@ def load_metadata(self, file_path):

# check whether sample labeling matches protein data
# warnings.warn("WARNING: Sample names do not match sample labelling in protein data")
df.columns = df.columns.astype(str)
self.metadata = df

def _save_dataset_info(self):
Expand All @@ -168,6 +169,7 @@ def _save_dataset_info(self):
"Matrix: Number of ProteinIDs/ProteinGroups": self.mat.shape[1],
"Matrix: Number of samples": self.mat.shape[0],
"Intensity used for analysis": self.intensity_column,
"Log2-transformed": False,
"Normalization": None,
"Imputation": None,
"Contaminations have been removed": False,
Expand Down
16 changes: 12 additions & 4 deletions alphastats/DataSet_Preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def _imputation(self, method):
@ignore_warning(UserWarning)
@ignore_warning(RuntimeWarning)
def _normalization(self, method):

if method == "zscore":
scaler = sklearn.preprocessing.StandardScaler()
normalized_array = scaler.fit_transform(self.mat.values)
Expand All @@ -153,11 +153,10 @@ def _normalization(self, method):
"Choose from 'zscore', 'quantile', 'linear' normalization. or 'vst' for variance stabilization transformation"
)

# TODO logarithimic normalization

self.mat = pd.DataFrame(
normalized_array, index=self.mat.index, columns=self.mat.columns
)

self.preprocessing_info.update({"Normalization": method})

def reset_preprocessing(self):
Expand Down Expand Up @@ -193,11 +192,16 @@ def _compare_preprocessing_modes(self, func, params_for_func):
results_list.append(res)

return results_list


def _log2_transform(self):
self.mat = np.log2(self.mat + 0.1)
self.preprocessing_info.update({"Log2 Transformed": True})


@ignore_warning(RuntimeWarning)
def preprocess(
self,
log2_transform=True,
remove_contaminations=False,
subset=False,
normalization=None,
Expand Down Expand Up @@ -239,6 +243,7 @@ def preprocess(
Args:
remove_contaminations (bool, optional): remove ProteinGroups that are identified as contamination.
log2_transform (bool, optional): Log2 transform data. Default to True.
normalization (str, optional): method to normalize data: either "zscore", "quantile", "linear". Defaults to None.
remove_samples (list, optional): list with sample ids to remove. Defaults to None.
imputation (str, optional): method to impute data: either "mean", "median", "knn" or "randomforest". Defaults to None.
Expand All @@ -249,6 +254,9 @@ def preprocess(

if subset:
self.mat = self._subset()

if log2_transform:
self._log2_transform()

if normalization is not None:
self._normalization(method=normalization)
Expand Down
2 changes: 1 addition & 1 deletion alphastats/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__project__ = "alphastats"
__version__ = "0.4.2"
__version__ = "0.4.3"
__license__ = "Apache"
__description__ = "An open-source Python package for Mass Spectrometry Analysis"
__author__ = "Mann Labs"
Expand Down
6 changes: 6 additions & 0 deletions alphastats/gui/pages/03_Preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ def preprocessing():
options=[True, False],
)

log2_transform = st.selectbox(
"Log2-transform dataset",
options=[True, False],
)

normalization = st.selectbox(
"Normalization", options=[None, "zscore", "quantile", "vst", "linear"]
)
Expand All @@ -42,6 +47,7 @@ def preprocessing():
if submitted:
st.session_state.dataset.preprocess(
remove_contaminations=remove_contaminations,
log2_transform=log2_transform,
subset=subset,
normalization=normalization,
imputation=imputation,
Expand Down
1 change: 1 addition & 0 deletions alphastats/loader/AlphaPeptLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(
# add contamination column "Reverse"
self._add_contamination_reverse_column()
self._add_contamination_column()
self._read_all_columns_as_string()
#  make ProteinGroup column
self.rawinput["ProteinGroup"] = self.rawinput[self.index_column].map(
self._standardize_protein_group_column
Expand Down
5 changes: 4 additions & 1 deletion alphastats/loader/BaseLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def __init__(self, file, intensity_column, index_column, sep):
self.ptm_df = None
self._add_contamination_column()
self._check_if_columns_are_present()
self._read_all_columns_as_string()

def _check_if_columns_are_present(self):
"""check if given columns present in rawinput"""
Expand All @@ -46,9 +47,11 @@ def _check_if_columns_are_present(self):
"FragPipe Format: https://fragpipe.nesvilab.org/docs/tutorial_fragpipe_outputs.html#combined_proteintsv"
"MaxQuant Format: http://www.coxdocs.org/doku.php?id=maxquant:table:proteingrouptable"
)

def _read_all_columns_as_string(self):
self.rawinput.columns = self.rawinput.columns.astype(str)

def _check_if_indexcolumn_is_unique(self):
# TODO make own duplicates functions to have less dependencies
duplicated_values = list(duplicates(self.rawinput[self.index_column].to_list()))
if len(duplicated_values) > 0:
# error or warning, duplicates could be resolved with preprocessing/filtering
Expand Down
1 change: 1 addition & 0 deletions alphastats/loader/DIANNLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def __init__(
self._remove_filepath_from_name()
self._add_tag_to_sample_columns()
self._add_contamination_column()
self._read_all_columns_as_string()

def _add_tag_to_sample_columns(self):
"""
Expand Down
1 change: 1 addition & 0 deletions alphastats/loader/MaxQuantLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(
self.confidence_column = confidence_column
self.software = "MaxQuant"
self._set_filter_columns_to_true_false()
self._read_all_columns_as_string()

if gene_names_column in self.rawinput.columns.to_list():
self.gene_names = gene_names_column
Expand Down
1 change: 1 addition & 0 deletions alphastats/loader/SpectronautLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def __init__(

self._reshape_spectronaut(sample_column=sample_column, gene_names_column=gene_names_column)
self._add_contamination_column()
self._read_all_columns_as_string()


def _reshape_spectronaut(self, sample_column, gene_names_column):
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
author = "Elena Krismer"

# The full version, including alpha/beta/rc tags
release = "0.4.2"
release = "0.4.3"


# -- General configuration ---------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_linux_gui/control
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: alphastats
Version: 0.4.2
Version: 0.4.3
Architecture: all
Maintainer: MannLabs
Description: alphastats
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_linux_gui/create_installer_linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel
# Setting up the local package
cd release/one_click_linux_gui
# Make sure you include the required extra packages and always use the stable or very-stable options!
pip install "../../dist/alphastats-0.4.2-py3-none-any.whl"
pip install "../../dist/alphastats-0.4.3-py3-none-any.whl"

# Creating the stand-alone pyinstaller folder
pip install pyinstaller==5.8
Expand Down
4 changes: 2 additions & 2 deletions release/one_click_macos_gui/Info.plist
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
<key>CFBundleIconFile</key>
<string>alphapeptstats_logo.icns</string>
<key>CFBundleIdentifier</key>
<string>alphastats.0.4.2</string>
<string>alphastats.0.4.3</string>
<key>CFBundleShortVersionString</key>
<string>0.4.2</string>
<string>0.4.3</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_macos_gui/create_installer_macos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ python setup.py sdist bdist_wheel

# Setting up the local package
cd release/one_click_macos_gui
pip install "../../dist/alphastats-0.4.2-py3-none-any.whl"
pip install "../../dist/alphastats-0.4.3-py3-none-any.whl"

# Creating the stand-alone pyinstaller folder
pip install pyinstaller==5.8
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_macos_gui/distribution.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="utf-8" standalone="no"?>
<installer-script minSpecVersion="1.000000">
<title>AlphaPeptStats 0.4.0</title>
<title>AlphaPeptStats 0.4.3</title>
<background mime-type="image/png" file="alphapeptstats_logo.png" scaling="proportional"/>
<welcome file="welcome.html" mime-type="text/html" />
<conclusion file="conclusion.html" mime-type="text/html" />
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_windows_gui/alphastats_innoinstaller.iss
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!

#define MyAppName "AlphaPeptStats"
#define MyAppVersion "0.4.2"
#define MyAppVersion "0.4.3"
#define MyAppPublisher "MannLabs"
#define MyAppURL "https://github.com/MannLabs/alphapeptstats"
#define MyAppExeName "alphastats_gui.exe"
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_windows_gui/create_installer_windows.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel
# Setting up the local package
cd release/one_click_windows_gui
# Make sure you include the required extra packages and always use the stable or very-stable options!
pip install "../../dist/alphastats-0.4.2-py3-none-any.whl"
pip install "../../dist/alphastats-0.4.3-py3-none-any.whl"

# Creating the stand-alone pyinstaller folder
pip install pyinstaller==5.8
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def create_pip_wheel():
requirements = get_requirements()
setuptools.setup(
name="alphastats",
version="0.4.2",
version="0.4.3",
license="Apache",
description="An open-source Python package for Mass Spectrometry Analysis",
long_description=get_long_description(),
Expand Down
35 changes: 14 additions & 21 deletions tests/test_DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def test_preprocess_remove_samples(self):
def test_preprocess_normalize_zscore(self):
self.obj.mat = pd.DataFrame({"a": [2, 5, 4], "b": [5, 4, 4], "c": [0, 10, 8]})
# zscore Normalization
self.obj.preprocess(normalization="zscore")
self.obj.preprocess(log2_transform=False,normalization="zscore")
expected_mat = pd.DataFrame(
{
"a": [-1.33630621, 1.06904497, 0.26726124],
Expand All @@ -270,7 +270,7 @@ def test_preprocess_normalize_zscore(self):
def test_preprocess_normalize_quantile(self):
self.obj.mat = pd.DataFrame({"a": [2, 5, 4], "b": [5, 4, 4], "c": [0, 10, 8]})
# Quantile Normalization
self.obj.preprocess(normalization="quantile")
self.obj.preprocess(log2_transform=False,normalization="quantile")
expected_mat = pd.DataFrame(
{"a": [0.0, 1.0, 0.5], "b": [1.0, 0.0, 0.0], "c": [0.0, 1.0, 0.5]}
)
Expand All @@ -279,7 +279,7 @@ def test_preprocess_normalize_quantile(self):
def test_preprocess_normalize_linear(self):
self.obj.mat = pd.DataFrame({"a": [2, 5, 4], "b": [5, 4, 4], "c": [0, 10, 8]})
# Linear Normalization
self.obj.preprocess(normalization="linear")
self.obj.preprocess(log2_transform=False,normalization="linear")
expected_mat = pd.DataFrame(
{
"a": [0.37139068, 0.42107596, 0.40824829],
Expand All @@ -292,7 +292,7 @@ def test_preprocess_normalize_linear(self):
def test_preprocess_normalize_vst(self):
self.obj.mat = pd.DataFrame({"a": [2, 5, 4], "b": [5, 4, 4], "c": [0, 10, 8]})
# Linear Normalization
self.obj.preprocess(normalization="vst")
self.obj.preprocess(log2_transform=False,normalization="vst")
expected_mat = pd.DataFrame(
{
"a": [-1.30773413, 1.12010046, 0.18763367],
Expand All @@ -306,7 +306,7 @@ def test_preprocess_imputation_mean_values(self):
self.obj.mat = pd.DataFrame(
{"a": [2, np.nan, 4], "b": [5, 4, 4], "c": [np.nan, 10, np.nan]}
)
self.obj.preprocess(imputation="mean")
self.obj.preprocess(log2_transform=False,imputation="mean")
expected_mat = pd.DataFrame(
{"a": [2.0, 3.0, 4.0], "b": [5.0, 4.0, 4.0], "c": [10.0, 10.0, 10.0]}
)
Expand All @@ -316,7 +316,7 @@ def test_preprocess_imputation_median_values(self):
self.obj.mat = pd.DataFrame(
{"a": [2, np.nan, 4], "b": [5, 4, 4], "c": [np.nan, 10, np.nan]}
)
self.obj.preprocess(imputation="median")
self.obj.preprocess(log2_transform=False,imputation="median")
expected_mat = pd.DataFrame(
{"a": [2.0, 3.0, 4.0], "b": [5.0, 4.0, 4.0], "c": [10.0, 10.0, 10.0]}
)
Expand All @@ -326,7 +326,7 @@ def test_preprocess_imputation_knn_values(self):
self.obj.mat = pd.DataFrame(
{"a": [2, np.nan, 4], "b": [5, 4, 4], "c": [np.nan, 10, np.nan]}
)
self.obj.preprocess(imputation="knn")
self.obj.preprocess(log2_transform=False,imputation="knn")
expected_mat = pd.DataFrame(
{"a": [2.0, 3.0, 4.0], "b": [5.0, 4.0, 4.0], "c": [10.0, 10.0, 10.0]}
)
Expand All @@ -336,7 +336,7 @@ def test_preprocess_imputation_randomforest_values(self):
self.obj.mat = pd.DataFrame(
{"a": [2, np.nan, 4], "b": [5, 4, 4], "c": [np.nan, 10, np.nan]}
)
self.obj.preprocess(imputation="randomforest")
self.obj.preprocess(log2_transform=False,imputation="randomforest")
expected_mat = pd.DataFrame(
{
"a": [2.00000000e00, -9.22337204e12, 4.00000000e00],
Expand Down Expand Up @@ -372,14 +372,14 @@ def test_plot_correlation_matrix(self):
)

def test_plot_clustermap(self):
self.obj.preprocess(imputation="knn")
self.obj.preprocess(log2_transform=False, imputation="knn")
plot = self.obj.plot_clustermap()
first_row = plot.data2d.iloc[0].to_list()
expected = [487618.5371077078, 1293013.103298046]
self.assertEqual(first_row, expected)

def test_plot_clustermap_with_label_bar(self):
self.obj.preprocess(imputation="knn")
self.obj.preprocess(log2_transform=False, imputation="knn")
plot = self.obj.plot_clustermap(label_bar=self.comparison_column)
first_row = plot.data2d.iloc[0].to_list()
expected = [487618.5371077078, 1293013.103298046]
Expand Down Expand Up @@ -456,17 +456,10 @@ def test_plot_volcano_compare_preprocessing_modes(self):
group2=["1_71_F10", "1_73_F12"],
compare_preprocessing_modes=True
)
self.assertEqual(len(result_list), 9)
# check if results are different
# for idx, res in enumerate(result_list):
# for idx2, res2 in enumerate(result_list):
# if idx != idx2:
# difference = dictdiffer.diff(res.to_plotly_json(), res2.to_plotly_json())
# self.assertNotEqual(len(list(difference)), 0)

self.assertEqual(len(result_list), 9)

def test_preprocess_subset(self):
self.obj.preprocess(subset=True)
self.obj.preprocess(subset=True, log2_transform=False)
self.assertEqual(self.obj.mat.shape, (48, 1364))

@patch.object(Statistics, "tukey_test")
Expand Down Expand Up @@ -552,7 +545,7 @@ def test_plot_volcano_wald(self):
self.assertTrue(column_added)

def test_plot_volcano_sam(self):
self.obj.preprocess(imputation="knn", normalization="zscore")
self.obj.preprocess(log2_transform=False, imputation="knn", normalization="zscore")
plot = self.obj.plot_volcano(
column = "disease",
group1="type 2 diabetes mellitus",
Expand Down Expand Up @@ -758,7 +751,7 @@ def test_volcano_plot_anova(self):
plot = self.obj.plot_volcano(
column="grouping1", group1="Healthy", group2="Disease", method="anova"
)
expected_y_value = 0.09437708068494619
expected_y_value = 0.040890177695653236
y_value = plot.to_plotly_json().get("data")[0].get("y")[1]
self.assertAlmostEqual(y_value, expected_y_value)

Expand Down

0 comments on commit 55bd1ae

Please sign in to comment.