diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 5ccdd01f..d055d7f3 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.3.0 +current_version = 0.4.0 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? diff --git a/HISTORY.md b/HISTORY.md index 08525806..27a81c6d 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,10 @@ # Changelog +# 0.4.0 +* SAM implementation from Isabel (MultiCova) +* Volcano Plot with permutation based FDR line +* Bug fix when reseting matrix and saving info + # 0.3.0 * One Click Installer for macOS, Linux and Windows * Spectronaut support diff --git a/alphastats/DataSet.py b/alphastats/DataSet.py index 42adb6c6..b0ff98f2 100644 --- a/alphastats/DataSet.py +++ b/alphastats/DataSet.py @@ -109,7 +109,8 @@ def _remove_misc_samples_in_metadata(self): ) def create_matrix(self): - """Creates a matrix of the Outputfile, with columns displaying features (Proteins) and + """ + Creates a matrix of the Outputfile, with columns displaying features (Proteins) and rows the samples. """ @@ -126,7 +127,7 @@ def create_matrix(self): # remove proteins with only zero self.mat = mat.loc[:, (mat != 0).any(axis=0)] # reset preproccessing info - self._save_dataset_info() + self.preprocessing_info = self._save_dataset_info() self.preprocessed = False self.rawmat = mat diff --git a/alphastats/DataSet_Plot.py b/alphastats/DataSet_Plot.py index 932e504d..fd97baba 100644 --- a/alphastats/DataSet_Plot.py +++ b/alphastats/DataSet_Plot.py @@ -104,6 +104,8 @@ def plot_volcano( min_fc=1, alpha=0.05, draw_line=True, + perm=100, + fdr=0.05 ): """Plot Volcano Plot @@ -111,11 +113,13 @@ def plot_volcano( column (str): column name in the metadata file with the two groups to compare group1 (str/list): name of group to compare needs to be present in column or list of sample names to compare group2 (str/list): name of group to compare needs to be present in column or list of sample names to compare - method (str): "anova", "wald", "ttest", Defaul ttest. + method (str): "anova", "wald", "ttest", "SAM" Defaul ttest. labels (bool): Add text labels to significant Proteins, Default False. alpha(float,optional): p-value cut off. min_fc (float): Minimum fold change draw_line(boolean): whether to draw cut off lines. + perm(float,optional): number of permutations when using SAM as method. Defaults to 100. + fdr(float,optional): FDR cut off when using SAM as method. Defaults to 0.05. Returns: @@ -132,6 +136,8 @@ def plot_volcano( min_fc=min_fc, alpha=alpha, draw_line=draw_line, + perm=perm, + fdr=fdr ) return volcano_plot.plot diff --git a/alphastats/DataSet_Statistics.py b/alphastats/DataSet_Statistics.py index c6f85da9..7db5b4c4 100644 --- a/alphastats/DataSet_Statistics.py +++ b/alphastats/DataSet_Statistics.py @@ -105,9 +105,11 @@ def diff_expression_analysis(self, group1, group2, column=None, method="ttest"): elif method == "ttest": test = de.test.t_test(data=d, grouping=column) + #elif method == "SAM": + else: raise ValueError( - f"{method} is invalid choose between 'wald' for Wald-test and 'ttest'" + f"{method} is invalid choose between 'wald' for Wald-test, 'SAM' and 'ttest'" ) df = test.summary().rename(columns={"gene": self.index_column}) diff --git a/alphastats/__init__.py b/alphastats/__init__.py index 504e2818..ba5e2171 100644 --- a/alphastats/__init__.py +++ b/alphastats/__init__.py @@ -1,5 +1,5 @@ __project__ = "alphastats" -__version__ = "0.3.0" +__version__ = "0.4.0" __license__ = "Apache" __description__ = "An open-source Python package for Mass Spectrometry Analysis" __author__ = "Mann Labs" diff --git a/alphastats/gui/pages/04_Analysis.py b/alphastats/gui/pages/04_Analysis.py index 44fec1d8..4c8b805c 100644 --- a/alphastats/gui/pages/04_Analysis.py +++ b/alphastats/gui/pages/04_Analysis.py @@ -1,8 +1,7 @@ import streamlit as st from alphastats.gui.utils.analysis_helper import ( get_analysis, - load_options, -) # , check_if_options_are_loaded +) from alphastats.gui.utils.ui_helper import sidebar_info import alphastats.gui.utils.analysis_helper import pandas as pd @@ -139,17 +138,15 @@ def select_analysis(): if method in st.session_state.plotting_options.keys(): analysis_result = get_analysis( - method=method, options_dict=st.session_state.plotting_options ) - plot_to_display = True elif method in st.session_state.statistic_options.keys(): analysis_result = get_analysis( - method=method, options_dict=st.session_state.statistic_options - ) + method=method, options_dict=st.session_state.statistic_options + ) df_to_display = True st.markdown("") diff --git a/alphastats/gui/utils/analysis_helper.py b/alphastats/gui/utils/analysis_helper.py index 1d1e7ea0..2840af8e 100644 --- a/alphastats/gui/utils/analysis_helper.py +++ b/alphastats/gui/utils/analysis_helper.py @@ -1,4 +1,3 @@ -from tkinter import E import pandas as pd import logging import streamlit as st @@ -93,7 +92,7 @@ def gui_volcano_plot(): chosen_parameter_dict = helper_compare_two_groups() method = st.selectbox( "Differential Analysis using:", - options=["ttest", "anova", "wald"], + options=["ttest", "anova", "wald", "sam"], ) chosen_parameter_dict.update({"method": method}) @@ -116,6 +115,16 @@ def gui_volcano_plot(): "min_fc": min_fc, } + if method == "sam": + perm = st.number_input( + label="Number of Permutations", min_value=1, max_value=1000, value=10 + ) + fdr = st.number_input( + label="FDR cut off", min_value=0.005, max_value=0.1, value=0.050 + ) + chosen_parameter_dict.update({"perm": perm, "fdr": fdr}) + + submitted = st.button("Submit") if submitted: diff --git a/alphastats/multicova/__init__.py b/alphastats/multicova/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/alphastats/multicova/multicova.py b/alphastats/multicova/multicova.py new file mode 100644 index 00000000..bc6a0c34 --- /dev/null +++ b/alphastats/multicova/multicova.py @@ -0,0 +1,791 @@ +import numpy as np +import pandas as pd +import numba as nb +import numba_stats as nbs +import plotly.express as px +import plotly.graph_objects as go +from sklearn.linear_model import LinearRegression +from scipy import stats +import statsmodels.api as sm +from collections import Counter +import multiprocessing +from joblib import Parallel, delayed +import random +import math +import swifter +from sklearn.preprocessing import StandardScaler +from statsmodels.stats.multitest import multipletests +import os +from pathlib import Path + + +def get_std(x): + """ + Function to calculate the sample standard deviation. + """ + std_x = np.sqrt(np.sum((abs(x - x.mean())**2)/(len(x)-1))) + return std_x + + +def perform_ttest(x, y, s0): + """ + Function to perform a independent two-sample t-test including s0 + adjustment. + Assumptions: Equal or unequal sample sizes with similar variances. + """ + mean_x = np.mean(x) + mean_y = np.mean(y) + # Get fold-change + fc = mean_x-mean_y + n_x = len(x) + n_y = len(y) + # pooled standard vatiation + # assumes that the two distributions have the same variance + sp = np.sqrt((((n_x-1)*get_std(x)**2) + ((n_y-1)*(get_std(y)**2)))/(n_x+n_y-2)) + # Get t-values + tval = fc/(sp * (np.sqrt((1/n_x)+(1/n_y)))) + tval_s0 = fc/((sp * (np.sqrt((1/n_x)+(1/n_y))))+s0) + # Get p-values + pval = 2*(1-stats.t.cdf(np.abs(tval), n_x+n_y-2)) + pval_s0 = 2*(1-stats.t.cdf(np.abs(tval_s0), n_x+n_y-2)) + return [fc, tval, pval, tval_s0, pval_s0] + + +def permutate_vars(X, n_rand, seed=42): + """ + Function to randomly permutate an array `n_rand` times. + """ + x_perm_idx = generate_perms(n=len(X), n_rand=n_rand, seed=seed) + X_rand = list() + for i in np.arange(0, len(x_perm_idx)): + X_rand.append([X[j] for j in x_perm_idx[i]]) + return X_rand + + +def workflow_ttest(df, c1, c2, s0=1, parallelize=False): + """ + Function to perform a t-test on all rows in a data frame. + c1 specifies the column names with intensity values of the first condition. + c2 specifies the column names with intensity values of the second + condition. s0 is the tuning parameter that specifies the minimum + fold-change to be trusted. + """ + if parallelize: + res = df.swifter.apply(lambda row : perform_ttest(row[c1], row[c2], s0=s0), axis = 1) + else: + res = df.apply(lambda row : perform_ttest(row[c1], row[c2], s0=s0), axis = 1) + + res = pd.DataFrame(list(res), columns=['fc','tval','pval','tval_s0','pval_s0']) + + df_real = pd.concat([df, res], axis=1) + + return df_real + + +def workflow_permutation_tvals(df, c1, c2, s0=1, n_perm=2, parallelize=False): + """ + Function to perform a t-test on all rows in a data frame based on the + permutation of samples across conditions. + c1 specifies the column names with intensity value sof the first condition. + c2 specifies the column names with intensity value sof the second + condition. s0 is the tuning parameter that specifies the minimum + fold-change to be trusted. n_perm specifies the number of random + permutations to perform. + """ + all_c = c1 + c2 + all_c_rand = permutate_vars(all_c, n_rand=n_perm) + res_perm = list() + for i in np.arange(0,len(all_c_rand)): + if parallelize: + res_i = df.swifter.apply(lambda row : perform_ttest(row[all_c_rand[i][0:len(c1)]], + row[all_c_rand[i][len(c1):len(c1)+len(c2)]], + s0=s0), + axis = 1) + else: + res_i = df.apply(lambda row : perform_ttest(row[all_c_rand[i][0:len(c1)]], + row[all_c_rand[i][len(c1):len(c1)+len(c2)]], + s0=s0), + axis=1) + res_i = pd.DataFrame(list(res_i), columns=['fc','tval','pval','tval_s0','pval_s0']) + res_perm.append(list(np.sort(np.abs(res_i.tval_s0.values)))) + return res_perm + + +def get_tstat_cutoff(res_real, t_perm_avg, delta): + # Extract all t-stats from the results of the 'real' data + # and sort the absolute values. + t_real = res_real.tval_s0 + t_real_abs = np.sort(np.abs(t_real)) + # print(t_real_abs) + + # Calculate the difference between the observed t-stat + # and the average random t-stat for each rank position. + t_diff = t_real_abs - t_perm_avg + # print(t_diff) + + # Find the minimum t-stat value for which the difference + # between the observed and average random t-stat is + # larger than the selected delta. + t_max = t_real_abs[t_diff > delta] + if (t_max.shape[0] == 0): + t_max = np.ceil(np.max(t_real_abs)) + else: + t_max = np.min(t_max) + return t_max + + +@nb.njit +def get_positive_count(res_real_tval_s0, t_cut): + """ + Count number of tval_s0 in res_real that are above the t_cut threshold. + """ + ts_sig = list() + for r in res_real_tval_s0: + r_abs = np.abs(r) + if r_abs >= t_cut: + ts_sig.append(r_abs) + + return len(ts_sig) + + +@nb.njit +def get_false_positive_count(res_perm, t_cut): + """ + Get median number of tval_s0 in res_perm that are above the t_cut + threshold. + """ + ts_sig = list() + for rp in res_perm: + ts_abs = np.abs(rp) + ts_max = list() + for t in ts_abs: + if t >= t_cut: + ts_max.append(t) + + ts_sig.append(len(ts_max)) + + res = np.median(np.array(ts_sig)) + + return res + + +def get_pi0(res_real, res_perm): + """ + Estimate pi0, the proportion of true null (unaffected) genes in the data + set, as follows: + (a) Compute q25; q75 = 25% and 75% points of the permuted d values + (if p = # genes, B = # permutations, there are pB such d values). + (b) Count the number of values d in the real dataset that are between q25 + and q75 (there are p values to test). pi0 = #d/0.5*p. + (c) Let pi0 = min(pi0; 1) (i.e., truncate at 1). + Documentation in: https://statweb.stanford.edu/~tibs/SAM/sam.pdf + """ + t_real = res_real["tval_s0"] + t_real_abs = np.sort(np.abs(t_real)) + + t_perm = list() + for ri in res_perm: + for r in ri: + t_perm.append(r) + t_perm_abs = np.sort(np.abs(t_perm)) + + t_perm_25 = np.percentile(t_perm_abs, 25) + t_perm_75 = np.percentile(t_perm_abs, 75) + + n_real_in_range = np.sum((t_real_abs >= t_perm_25) & (t_real_abs <= t_perm_75)) + pi0 = n_real_in_range/(0.5*len(t_real_abs)) + + # pi0 can maximally be 1 + pi0 = np.min([pi0, 1]) + + return pi0 + + +def get_fdr(n_pos, n_false_pos, pi0): + """ + Compute the FDR by dividing the number of false positives by the number of + true positives. The number of false positives are adjusted by pi0, the + proportion of true null (unaffected) genes in the data set. + """ + n = n_false_pos*pi0 + if n != 0: + if n_pos != 0: + fdr = n/n_pos + else: + fdr = 0 + else: + if n_pos > 0: + fdr = 0 + else: + fdr = np.nan + return fdr + + +def estimate_fdr_stats(res_real, res_perm, delta): + """ + Helper function for get_fdr_stats_across_deltas. + It computes the FDR and tval_s0 thresholds for a specified delta. + The function returns a list of the following values: + t_cut, n_pos, n_false_pos, pi0, n_false_pos_corr, fdr + """ + perm_avg = np.mean(res_perm, axis=0) + t_cut = get_tstat_cutoff(res_real, perm_avg, delta) + n_pos = get_positive_count(res_real_tval_s0=np.array(res_real.tval_s0), t_cut=t_cut) + n_false_pos = get_false_positive_count(np.array(res_perm), t_cut=t_cut) + pi0 = get_pi0(res_real, res_perm) + n_false_pos_corr = n_false_pos*pi0 + fdr = get_fdr(n_pos, n_false_pos, pi0) + return [t_cut, n_pos, n_false_pos, pi0, n_false_pos_corr, fdr] + + +def get_fdr_stats_across_deltas(res_real, res_perm): + """ + Wrapper function that starts with the res_real and res_perm to derive + the FDR and tval_s0 thresholds for a range of different deltas. + """ + res_stats = list() + for d in np.arange(0, 10, 0.01): + res_d = estimate_fdr_stats(res_real, res_perm, d) + if np.isnan(res_d[5]): + break + else: + res_stats.append(res_d) + res_stats_df = pd.DataFrame(res_stats, + columns=['t_cut', 'n_pos', 'n_false_pos', 'pi0', 'n_false_pos_corr', 'fdr']) + + return(res_stats_df) + + +def get_tstat_limit(stats, fdr=0.01): + """ + Function to get tval_s0 at the specified FDR. + """ + t_limit = np.min(stats[stats.fdr <= fdr].t_cut) + return(t_limit) + + +def annotate_fdr_significance(res_real, stats, fdr=0.01): + t_limit = np.min(stats[stats.fdr <= fdr].t_cut) + res_real['qval'] = [np.min(stats[stats.t_cut <= abs(x)].fdr) for x in res_real['tval_s0']] + res_real['FDR' + str(int(fdr*100)) + '%'] = ["sig" if abs(x) >= t_limit else "non_sig" for x in res_real['tval_s0']] + return(res_real) + + +def perform_ttest_getMaxS(fc, s, s0, n_x, n_y): + """ + Helper function to get ttest stats for specified standard errors s. + Called from within get_fdr_line. + """ + # Get t-values + tval = fc/s + tval_s0 = fc/(s+s0) + + # Get p-values + pval = 2*(1-stats.t.cdf(np.abs(tval), n_x+n_y-2)) + pval_s0 = 2*(1-stats.t.cdf(np.abs(tval_s0), n_x+n_y-2)) + + return [fc, tval, pval, tval_s0, pval_s0] + + +def get_fdr_line(t_limit, s0, n_x, n_y, plot=False, + fc_s=np.arange(0, 6, 0.01), s_s=np.arange(0.005, 6, 0.005)): + """ + Function to get the fdr line for a volcano plot as specified tval_s0 + limit, s0, n_x and n_y. + """ + pvals = np.ones(len(fc_s)) + svals = np.zeros(len(fc_s)) + for i in np.arange(0, len(fc_s)): + for j in np.arange(0, len(s_s)): + res_s = perform_ttest_getMaxS(fc=fc_s[i], s=s_s[j], s0=s0, n_x=n_x, n_y=n_y) + if res_s[3] >= t_limit: + if svals[i] < s_s[j]: + svals[i] = s_s[j] + pvals[i] = res_s[2] + + s_df = pd.DataFrame(np.array([fc_s, svals, pvals]).T, columns=['fc_s','svals','pvals']) + s_df = s_df[s_df.pvals != 1] + + s_df_neg = s_df.copy() + s_df_neg.fc_s = -s_df_neg.fc_s + + s_df = s_df.append(s_df_neg) + + if (plot): + fig = px.scatter(x=s_df.fc_s, + y=-np.log10(s_df.pvals), + template='simple_white') + fig.show() + + return(s_df) + + + +def perform_ttest_analysis(df, c1, c2, s0=1, n_perm=2, fdr=0.01, id_col='Genes', plot_fdr_line=False, parallelize=False): + """ + Workflow function for the entire T-test analysis including FDR + estimation and visualizing a volcanoe plot. + """ + ttest_res = workflow_ttest(df, c1, c2, s0, parallelize=parallelize) + ttest_perm_res = workflow_permutation_tvals(df, c1, c2, s0, n_perm, parallelize=parallelize) + ttest_stats = get_fdr_stats_across_deltas(ttest_res, ttest_perm_res) + ttest_res = annotate_fdr_significance(res_real=ttest_res, stats=ttest_stats, fdr=fdr) + t_limit = get_tstat_limit(stats=ttest_stats, fdr=fdr) + return(ttest_res, t_limit) + + +def perform_regression(y, X, s0): + """ + Function to perform multiple linear regression including the s0 parameter. + """ + # Use LinearRegression from sklearn.linear_model + lm = LinearRegression() + lm.fit(X, y) + betas = np.append(lm.intercept_, lm.coef_) + predictions = lm.predict(X) + + # Add intercept column to X dataframe + newX = pd.DataFrame({"Constant": np.ones(len(X))}).join(pd.DataFrame(X)) + + # Calculate mean squared error (MSE) of observed y vs. prediction + # MSE = squared_error/degrees_of_freedom + # degrees_of_freedom = number_of_samples - number_of_betas (including beta0 = intercept) + MSE = (sum((y-predictions)**2))/(len(newX)-len(newX.columns)) + # Calculate t-values and p-values + ts_b, ps_b = get_tstats(newX, betas, MSE) + # Adjust the MSE by the minimum fold change that we trust (s0) + MSE_s0 = MSE + s0 + # Calculate t-values and p-values + ts_b_s0, ps_b_s0 = get_tstats(newX, betas, MSE_s0) + betas_std = list() + for i in np.arange(0, len(betas)): + betas_std.append(betas[i]*(get_std(np.array(newX)[:, i])/get_std(y))) + betas_std = np.array(betas_std) + return betas, betas_std, predictions, MSE, ts_b, ps_b, MSE_s0, ts_b_s0, ps_b_s0 + + +def get_tstats(newX, betas, MSE): + # Calculate standard deviation for each beta + # This scales the error as estimated by MSE to each beta + # (error on the betas might be different for different betas depending on + # the variable) + var_b = MSE*(np.linalg.inv(np.dot(newX.T, newX)).diagonal()) + sd_b = np.sqrt(var_b) + # Get t-values + ts_b = betas/sd_b + # Get p-values + #ps_b =[2*(1-stats.t.cdf(np.abs(i),len(newX)-len(newX.columns))) for i in ts_b] + #ps_b =[2*(1-nbs.t_cdf(np.abs(i),len(newX)-len(newX.columns),0,1)) for i in ts_b] + ps_b = get_cdf(ts_b, len(newX)-len(newX.columns)) + return ts_b, ps_b + + +@nb.njit +def get_cdf(ts, df): + pvals = [0.0][1:] + for t in ts: + pvals.append(2*(1-nbs.t_cdf(np.abs(t), df, 0, 1))) + return (pvals) + + +def filter_nan(y, X): + """ + Find nan values in quantitative data y and remove corresponding samples + from both X and y. + """ + nan_mask = np.invert(np.isnan(y)) + y_new = y[nan_mask] + X_new = X[nan_mask] + return y_new, X_new + + +def get_min_vars(X): + """ + Get the minimum number of observations for each covariate. + """ + min_vars = list() + for d in np.arange(0, X.shape[1]): + test_vals = X[:, d] + n_vars = len(set(test_vals)) + if n_vars > 2: + min_vars.append(n_vars) + else: + test_vals_0 = np.append(test_vals,[0,1]) # append 0 and 1 for counter + var_count = Counter(test_vals_0) + min_vars.append(min(var_count.values())) + + return min_vars + + +def regression_workflow(y, X, s0): + y_new, X_new = filter_nan(y, X) + # Removed min_var filtering after standard scaling of variable. + # @ToDo: Include sanity check that sufficient categories are covered! + # The following counts the minimum number of observations for each covariate + #min_vars = get_min_vars(X_new) + # A min(min_vars) value < 2 means essentially no values for at least one covariate were observed + # A beta of zero and p-values of 1 are returned for these cases + #if min(min_vars) < 2: + # betas, betas_std, tvals, pvals, tvals_s0, pvals_s0 = 0, 0, 0, 1, 0, 1 + #else: + # betas, betas_std, predictions, MSE, tvals, pvals, MSE_s0, tvals_s0, pvals_s0 = perform_regression(np.array(y_new), np.array(X_new), s0) + betas, betas_std, predictions, MSE, tvals, pvals, MSE_s0, tvals_s0, pvals_s0 = perform_regression(np.array(y_new), np.array(X_new), s0) + return betas, betas_std, tvals, pvals, tvals_s0, pvals_s0 + + +def regression_workflow_permutation(y, X_rand, s0): + res_rand = list() + for X_r in X_rand: + res_rand.append(regression_workflow(y, X_r, s0)) + # num_cores = multiprocessing.cpu_count() + # res_rand = Parallel(n_jobs=num_cores-1)(delayed(regression_workflow)(y=y, X=X_r, s0=s0) for X_r in X_rand) + return res_rand + + +def get_fdr_line_regression(t_limits, s0, X, plot = False, + fc_s = np.arange(0,6,0.01), s_s = np.arange(0.005,6,0.005)): + """ + Function to get the fdr line for a volcano plot as specified tval_s0 limit, s0, n_x and n_y. + """ + #pvals = [list(np.ones(len(fc_s)))] * X.shape[1] + pvals = [list(np.ones(len(fc_s))) for i in range(0,X.shape[1])] + #print(pvals) + #svals = [list(np.zeros(len(fc_s)))] * X.shape[1] + svals = [list(np.zeros(len(fc_s))) for i in range(0,X.shape[1])] + #print(svals) + for i in np.arange(0,len(fc_s)): + for j in np.arange(0,len(s_s)): + res_s = perform_ttest_getMaxS_regression(fc=fc_s[i], s=s_s[j], s0=s0, X=X) + for k in np.arange(0,X.shape[1]): + t_limit = t_limits[k] + if (res_s[k][3] >= t_limit) and (svals[k][i] < s_s[j]): + svals[k][i] = s_s[j] + pvals[k][i] = res_s[k][2] + s_df_list = list() + for k in np.arange(0, X.shape[1]): + s_df = pd.DataFrame(np.array([fc_s,svals[k],pvals[k]]).T, columns=['fc_s','svals','pvals']) + s_df = s_df[s_df.pvals != 1] + + s_df_neg = s_df.copy() + s_df_neg.fc_s = -s_df_neg.fc_s + + s_df = s_df.append(s_df_neg) + + if (plot): + fig = px.scatter(x=s_df.fc_s, + y=-np.log10(s_df.pvals), + template='simple_white') + fig.show() + + s_df_list.append(s_df) + + return(s_df_list) + + +def perform_ttest_getMaxS_regression(fc, s, s0, X): + """ + Helper function to get ttest stats for specified standard errors s. + Called from within get_fdr_line_regression. + """ + newX = pd.DataFrame({"Constant":np.ones(len(X))}).join(pd.DataFrame(X)) + fc=np.repeat(fc,newX.shape[1]) + #print(fc) + var_b = s*(np.linalg.inv(np.dot(newX.T,newX)).diagonal()) + sd_b = np.sqrt(var_b) + ts_b = fc/sd_b + #ps_b =[2*(1-stats.t.cdf(np.abs(i),len(newX)-len(newX.columns))) for i in ts_b] + ps_b = get_cdf(ts_b, len(newX)-len(newX.columns)) + var_b_s0 = (s+s0)*(np.linalg.inv(np.dot(newX.T,newX)).diagonal()) + sd_b_s0 = np.sqrt(var_b_s0) + ts_b_s0 = fc/sd_b_s0 + #ps_b_s0 =[2*(1-stats.t.cdf(np.abs(i),len(newX)-len(newX.columns))) for i in ts_b_s0] + ps_b_s0 = get_cdf(ts_b_s0, len(newX)-len(newX.columns)) + res = [[fc[x], ts_b[x], ps_b[x], ts_b_s0[x], ps_b_s0[x]] for x in np.arange(1,newX.shape[1])] + return res + +def generate_perms(n, n_rand, seed=42): + """ + Generate n_rand permutations of indeces ranging from 0 to n. + """ + np.random.seed(seed) + idx_v = np.arange(0, n) + rand_v = list() + n_rand_i = 0 + n_rand_max = math.factorial(n)-1 + if n_rand_max <= n_rand: + print("{} random permutations cannot be created. The maximum of n_rand={} is used instead.".format(n_rand, n_rand_max)) + n_rand = n_rand_max + while n_rand_i < n_rand: + rand_i = list(np.random.permutation(idx_v)) + if np.all(rand_i == idx_v): + next + else: + if rand_i in rand_v: + next + else: + rand_v.append(rand_i) + n_rand_i = len(rand_v) + return rand_v + + +def permutate_multi_vars(X, rand_index, n_rand, seed=42): + """ + Function to randomly permutate a multi-dimensional array X `n_rand` times + at rand_index position. + """ + x_perm_idx = generate_perms(X.shape[0], n_rand=n_rand, seed=seed) + X_rand = list() + for i in np.arange(0, len(x_perm_idx)): + idx_r = np.array(x_perm_idx[i]) + idx_a = np.tile(np.arange(0, X.shape[0]), (X.shape[1], 1)) + idx_a[rand_index] = idx_r + idx_a = np.transpose(idx_a) + X_rand.append(np.take_along_axis(X, indices=idx_a, axis=0)) + + return X_rand + + +def get_res_parallel(y_i, X, s0): + betas, betas_std, tvals, pvals, tvals_s0, pvals_s0 = regression_workflow(y=y_i, X = X, s0 = s0) + res = pd.DataFrame() + res["fc"], res["fc_std"], res["tval"], res["pval"], res["tval_s0"], res["pval_s0"] = [betas, betas_std, tvals, pvals, tvals_s0, pvals_s0] + return res + + +def get_perm_res_parallel(y_i, X_rand, s0): + res_perm_list = regression_workflow_permutation(y_i, X_rand, s0=s0) + return res_perm_list + + +def full_regression_analysis(quant_data, annotation, covariates, sample_column='sample_name', n_permutations=4, fdr=0.05, s0=0.05, seed=42): + data_cols = annotation[sample_column].values + quant_data = quant_data.dropna().reset_index(drop=True) + y = quant_data[data_cols].to_numpy().astype('float') + # @ToDo make sure that columns are sorted correctly!!! + X = np.array(annotation[covariates]) + + # standardize X: + X_scaled = StandardScaler().fit_transform(X) + + num_cores = multiprocessing.cpu_count() + res_list = Parallel(n_jobs=num_cores-1)(delayed(get_res_parallel)(y_i=y_i, X=X_scaled, s0=s0) for y_i in y) + + t_limit_dict = {} + + for test_index in np.arange(0, len(covariates)): + test_covariate = covariates[test_index] + print(test_covariate) + X_rand = permutate_multi_vars(X_scaled, rand_index=test_index, n_rand=n_permutations, seed=seed) + res_perm_list = Parallel(n_jobs=num_cores-1)(delayed(get_perm_res_parallel)(y_i=y_i, X_rand=X_rand, s0=s0) for y_i in y) + + i = test_index + 1 + res_i = [r.iloc[[i]] for r in res_list] + res_i = pd.concat(res_i) + + # get tvals_s0 list of permutations for covariate i + res_i_rand = [[0] * len(res_perm_list) for i in range(len(res_perm_list[0]))] + for a in np.arange(0,len(res_perm_list)): + for b in np.arange(0, len(res_perm_list[a])): + #print(res_perm_list[a][b][3]) + res_i_rand[b][a] = res_perm_list[a][b][4][i] + + res_i_rand = [list(np.sort(np.abs(r))) for r in res_i_rand] + fdr_d_i = get_fdr_stats_across_deltas(res_real=res_i, res_perm=res_i_rand) + res_i = annotate_fdr_significance(res_real=res_i, stats=fdr_d_i, fdr=fdr) + t_limit_i = get_tstat_limit(stats=fdr_d_i, fdr=fdr) + t_limit_dict[test_covariate] = t_limit_i + + # @ToDo: This multiple testing should potentially be done across all covariates together? + res_i['qval_BH'] = multipletests(res_i.pval, method='fdr_bh')[1] + res_i['BH_FDR ' + str(int(fdr*100)) + '%'] = ["sig" if abs(x)<=fdr else "non_sig" for x in res_i["qval_BH"]] + res_i['qval_BH_s0'] = multipletests(res_i.pval_s0, method='fdr_bh')[1] + res_i['BH_FDR_s0 ' + str(int(fdr*100)) + '%'] = ["sig" if abs(x)<=fdr else "non_sig" for x in res_i["qval_BH_s0"]] + + res_i = res_i.add_prefix(covariates[test_index] + "_") + + quant_data = pd.concat([quant_data.reset_index(drop=True), res_i.reset_index(drop=True)], axis=1) + + return [quant_data, t_limit_dict] + + +def add_random_covariate(annotation, name='random', n_random=50, seed=42): + annotation_test = annotation.copy() + annotation_test.reset_index(drop=True, inplace=True) + annotation_test[name] = 0 + random.seed(seed) + rand_true = random.sample(range(0, annotation_test.shape[0]), n_random) + annotation_test.loc[rand_true, name] = 1 + return annotation_test + + +def plot_evaluate_seed_and_perm(df, covariates): + config = {'toImageButtonOptions': {'format': 'svg', + 'filename': 'permutation_test', + 'scale': 1 + } + } + for c in covariates: + fig = px.line(df, x="permutations", + y=c, + line_dash="seed", + line_group="seed", + template="simple_white", + color_discrete_sequence=['lightgrey']) + fig.update_traces(mode='lines+markers') + fig.update_layout(width=620, height=350) + fig.show(config=config) + + +def evaluate_seed_and_perm(quant_data, + annotation, + covariates, + perms, + seeds, + sample_column='sample_name', + fdr=0.05, + s0=0.05): + resDF = pd.DataFrame({'seed': np.repeat(seeds, len(perms)), + 'permutations': np.tile(perms, len(seeds))}) + resDF[covariates] = pd.DataFrame([np.repeat(0, len(covariates))], + index=resDF.index) + + for i in np.arange(0,resDF.shape[0]): + res, tlim = full_regression_analysis(quant_data=quant_data, + annotation=annotation, + covariates=covariates, + n_permutations=resDF.permutations[i], + sample_column=sample_column, + fdr=fdr, + s0=s0, + seed=resDF.seed[i]) + + for c in covariates: + resDF.loc[i,c] = res[res[c + "_FDR " + str(int(fdr*100)) + "%"] == "sig"].shape[0] + plot_evaluate_seed_and_perm(resDF, covariates=covariates) + return resDF + + +def plot_evaluate_s0s(df, covariates): + for c in covariates: + fig = px.line(df, x="s0", y=c, title=c, template="simple_white", color_discrete_sequence=['lightgrey']) + fig.update_traces(mode='lines+markers') + fig.update_layout(width=800, height=500) + fig.show() + + +def evaluate_s0s(quant_data, + annotation, + covariates, + s0s, + sample_column='sample_name', + n_permutations=5, + seed=42, + fdr=0.01): + resDF = pd.DataFrame({'s0':s0s}) + resDF[covariates] = pd.DataFrame([np.repeat(0, len(covariates))], index=resDF.index) + + for i in np.arange(0,resDF.shape[0]): + res, tlim = full_regression_analysis(quant_data=quant_data, + annotation=annotation, + covariates=covariates, + sample_column=sample_column, + n_permutations=n_permutations, + fdr=fdr, + s0=resDF.s0[i], + seed=seed) + + for c in covariates: + resDF.loc[i,c] = res[res[c + "_FDR " + str(int(fdr*100)) + "%"] == "sig"].shape[0] + + plot_evaluate_s0s(resDF, covariates=covariates) + + return resDF + + +def plot_pval_dist(df, covariates, mode='separate'): + config = {'toImageButtonOptions': {'format': 'svg', + 'filename': 'pvalue_histogram', + 'scale': 1 + } + } + if (mode == 'separate'): + for c in covariates: + fig = px.histogram(df, + x=c+"_pval", + title=c, + nbins=50, + template='simple_white', + color_discrete_sequence=['lightgrey']) + fig.update_layout(width=400, height=300) + fig.show(config=config) + elif (mode == 'joined'): + df_cov = pd.DataFrame() + for c in covariates: + df_c = pd.DataFrame({'p-value': df[c+"_pval"], 'covariate': c}) + df_cov = pd.concat([df_cov, df_c]) + fig = px.histogram(df_cov, x='p-value', + color='covariate', + barmode='overlay', + opacity=0.6, + nbins=50, + template='simple_white') + fig.update_layout(width=620, height=350) + fig.show(config=config) + else: + raise ValueError("The mode parameter needs to be either 'separate' or 'joined'.") + + +def plot_beta_dist(df, covariates): + for c in covariates: + fig = px.histogram(df, x=c+"_fc", title=c, template='simple_white', color_discrete_sequence=['lightgrey']) + fig.update_layout(width=800, height=500) + fig.show() + + +def get_replacement_vals(df, threshold, mean_all, sd_all, ds_f): + """ + Helper function for missing value imputation. + """ + if df.percent_valid_vals == 100: + # print(100) + rep = [] + elif df.percent_valid_vals > threshold: + # print(70) + rep = np.random.normal(df.int_mean-(ds_f*df.int_sd), df.int_sd, df.invalid_vals) + else: + # print(0) + rep = np.random.normal(mean_all-(ds_f*sd_all), sd_all, df.invalid_vals) + return(rep) + +def impute_missing_values(data, percent_impute=20, percent_self_impute=70, downshift_factor=1.8): + """ + Function for missing value imputation. Proteins with less than + 'percent_impute' valid values are removed. + For proteins with less than 'percent_self_impute' valid values, global mean + and sd values are used for imputation. + For proteins with more than 'percent_self_impute' valid values, mean and sd + values for each specific protein are used. + The 'downshift_factor' determines by how many standard deviations the mean + of the imputed distribution is shifted. + """ + + data['int_mean'] = data.filter(regex=("Quantity")).mean(axis=1) + data['int_sd'] = data.filter(regex=("Quantity")).std(axis=1) + data['valid_vals'] = data.filter(regex=("Quantity")).count(axis=1) + data['invalid_vals'] = data.filter(regex=("Quantity")).shape[1]-data.filter(regex=("Quantity")).count(axis=1) + data['percent_valid_vals'] = 100/data.filter(regex=("Quantity")).shape[1]*data['valid_vals'] + + data = data[data.percent_valid_vals >= percent_impute] + + overall_mean = np.mean(data.int_mean) + overall_sd = np.mean(data.int_sd) + + nan_idx = np.where(data.isna()) + + replacement = data.apply(get_replacement_vals, threshold=percent_self_impute, mean_all=overall_mean, sd_all=overall_sd, ds_f = downshift_factor, axis=1) + replacement = [val for sublist in replacement for val in sublist] + + for i in np.arange(0, len(nan_idx[0])): + data.iloc[nan_idx[0][i], nan_idx[1][i]] = replacement[i] + + return data diff --git a/alphastats/plots/VolcanoPlot.py b/alphastats/plots/VolcanoPlot.py index eddf9bd6..6a6d323c 100644 --- a/alphastats/plots/VolcanoPlot.py +++ b/alphastats/plots/VolcanoPlot.py @@ -4,13 +4,14 @@ import numpy as np import pandas as pd import plotly.express as px +import plotly.graph_objects as go from functools import lru_cache class VolcanoPlot(PlotUtils): def __init__( - self, dataset, group1, group2, column=None, method=None, labels=None, min_fc=None, alpha=None, draw_line=None, plot=True + self, dataset, group1, group2, column=None, method=None, labels=None, min_fc=None, alpha=None, draw_line=None, plot=True, perm=100, fdr=0.05 ): self.dataset = dataset self.group1 = group1 @@ -19,11 +20,13 @@ def __init__( self.method = method self.labels = labels self.min_fc = min_fc + self.fdr = fdr self.alpha = alpha self.draw_line = draw_line self.hover_data = None self.res = None self.pvalue_column = None + self.perm=perm self._check_input() if plot: @@ -67,13 +70,69 @@ def _perform_differential_expression_analysis(self): elif self.method == "anova": self._anova() + + elif self.method == "sam": + self._sam() + + # elif self.method == "Multi Covariates": + # raise NotImplementedError else: raise ValueError( f"{self.method} is not available." - + "Please select from 'ttest' or 'anova' for anova with follow up tukey or 'wald' for wald-test using." + + "Please select from 'ttest', 'sam' or 'anova' for anova with follow up tukey or 'wald' for wald-test." ) + @lru_cache(maxsize=20) + def _sam_calculate_fdr_line(self): + from alphastats.multicova import multicova + + self.fdr_line= multicova.get_fdr_line( + t_limit=self.tlim_ttest, + s0=0.05, + n_x=len(list(self.dataset.metadata[self.dataset.metadata[self.column]==self.group1][self.dataset.sample])), + n_y=len(list(self.dataset.metadata[self.dataset.metadata[self.column]==self.group2][self.dataset.sample])), + fc_s = np.arange(0,np.max(np.abs(self.res.fc)),np.max(np.abs(self.res.fc))/200), + s_s = np.arange(0.005, 6, 0.0025), + plot=False + ) + + @lru_cache(maxsize=20) + def _sam(self): + from alphastats.multicova import multicova + + print( + "Calculating t-test and permuation based FDR (SAM)... " + ) + + transposed = self.dataset.mat.transpose() + + if self.dataset.preprocessing_info["Normalization"] is None: + # needs to be lpog2 transformed for fold change calculations + transposed = transposed.transform(lambda x: np.log2(x)) + + transposed[self.dataset.index_column] = transposed.index + transposed = transposed.reset_index(drop=True) + + res_ttest, tlim_ttest = multicova.perform_ttest_analysis( + transposed, + c1 =list(self.dataset.metadata[self.dataset.metadata[self.column]==self.group1][self.dataset.sample]), + c2 =list(self.dataset.metadata[self.dataset.metadata[self.column]==self.group2][self.dataset.sample]), + s0=0.05, + n_perm=self.perm, + fdr=self.fdr, + id_col=self.dataset.index_column, + parallelize=True + ) + + fdr_column = "FDR" + str(int(self.fdr*100)) + "%" + self.res = res_ttest[[self.dataset.index_column, 'fc', 'tval', 'pval', 'tval_s0', 'pval_s0', 'qval']] + self.res["log2fc"] = res_ttest["fc"] + self.res["FDR"] = res_ttest[fdr_column] + self.tlim_ttest = tlim_ttest + self.pvalue_column = "pval" + + @lru_cache(maxsize=20) def _wald(self): @@ -144,22 +203,39 @@ def _add_hover_data_columns(self): def _annotate_result_df(self): + """ + convert pvalue to log10 + add color labels for up and down regulates + """ self.res = self.res[(self.res["log2fc"] < 10) & (self.res["log2fc"] > -10)] self.res["-log10(p-value)"] = -np.log10(self.res[self.pvalue_column]) self.alpha = -np.log10(self.alpha) # add color variable to plot + + if self.method != "sam": - condition = [ - (self.res["log2fc"] < -self.min_fc) & (self.res["-log10(p-value)"] > self.alpha), - (self.res["log2fc"] > self.min_fc) & (self.res["-log10(p-value)"] > self.alpha), - ] + condition = [ + (self.res["log2fc"] < -self.min_fc) & (self.res["-log10(p-value)"] > self.alpha), + (self.res["log2fc"] > self.min_fc) & (self.res["-log10(p-value)"] > self.alpha), + ] + + else: + + condition = [ + (self.res["log2fc"] < 0) & (self.res["FDR"] == "sig"), + (self.res["log2fc"] > 0) & (self.res["FDR"] == "sig"), + ] + value = ["down", "up"] - self.res["color"] = np.select(condition, value, default="non-significant") + self.res["color"] = np.select(condition, value, default="non_sig") def _add_labels_plot(self): + """ + add gene names as hover data if they are given + """ if self.dataset.gene_names is not None: label_column = self.dataset.gene_names @@ -167,7 +243,7 @@ def _add_labels_plot(self): label_column = self.dataset.index_column self.res["label"] = np.where( - self.res.color != "non-significant", self.res[label_column], "" + self.res.color != "non_sig", self.res[label_column], "" ) #  replace nas with empty string (can cause error when plotting with gene names) self.res["label"] = self.res["label"].fillna("") @@ -181,6 +257,9 @@ def _add_labels_plot(self): ) def _draw_lines_plot(self): + """ + draw lines using fold change and p-value cut-off, fast method + """ self.plot.add_hline( y=self.alpha, line_width=1, line_dash="dash", line_color="#8c8c8c" @@ -192,6 +271,27 @@ def _draw_lines_plot(self): x=-self.min_fc, line_width=1, line_dash="dash", line_color="#8c8c8c" ) + def _draw_fdr_line(self): + """ + Draw fdr line if SAM was applied + """ + self._sam_calculate_fdr_line() + + self.plot.add_trace(go.Scatter( + x=self.fdr_line[self.fdr_line.fc_s > 0].fc_s, + y=-np.log10(self.fdr_line[self.fdr_line.fc_s > 0].pvals), + line_color="black", + line_shape='spline', + showlegend=False) + ) + self.plot.add_trace(go.Scatter( + x=self.fdr_line[self.fdr_line.fc_s < 0].fc_s, + y=-np.log10(self.fdr_line[self.fdr_line.fc_s < 0].pvals), + line_color="black", + line_shape='spline', + showlegend=False) + ) + def _plot(self): self.plot = px.scatter( @@ -201,16 +301,20 @@ def _plot(self): color="color", hover_data=self.hover_data, ) + + # update coloring + color_dict = {"non_sig": "#404040", "up": "#B65EAF", "down": "#009599"} + self.plot = self._update_colors_plotly(self.plot, color_dict=color_dict) if self.labels: self._add_labels_plot() + if self.draw_line: - self._draw_lines_plot() + if self.method == "sam": + self._draw_fdr_line() + else: + self._draw_lines_plot() - # update coloring - color_dict = {"non-significant": "#404040", "up": "#B65EAF", "down": "#009599"} - self.plot = self._update_colors_plotly(self.plot, color_dict=color_dict) - self.plot.update_layout(showlegend=False) self.plot.update_layout(width=600, height=700) @@ -222,4 +326,6 @@ def _plot(self): preprocessing_info=self.dataset.preprocessing_info, method=self.method ) + + diff --git a/docs/conf.py b/docs/conf.py index 7319ed76..a856a32b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -23,7 +23,7 @@ author = "Elena Krismer" # The full version, including alpha/beta/rc tags -release = "0.3.0" +release = "0.4.0" # -- General configuration --------------------------------------------------- diff --git a/release/one_click_linux_gui/control b/release/one_click_linux_gui/control index e12caeec..0eb49c56 100644 --- a/release/one_click_linux_gui/control +++ b/release/one_click_linux_gui/control @@ -1,5 +1,5 @@ Package: AlphaPeptStats -Version: 0.3.0 +Version: 0.4.0 Architecture: all Maintainer: MannLabs Description: alphastats diff --git a/release/one_click_linux_gui/create_installer_linux.sh b/release/one_click_linux_gui/create_installer_linux.sh index 998acbfb..9a51821a 100644 --- a/release/one_click_linux_gui/create_installer_linux.sh +++ b/release/one_click_linux_gui/create_installer_linux.sh @@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel # Setting up the local package cd release/one_click_linux_gui # Make sure you include the required extra packages and always use the stable or very-stable options! -pip install "../../dist/alphastats-0.3.0-py3-none-any.whl" +pip install "../../dist/alphastats-0.4.0-py3-none-any.whl" # Creating the stand-alone pyinstaller folder pip install pyinstaller==4.10 diff --git a/release/one_click_macos_gui/create_installer_macos.sh b/release/one_click_macos_gui/create_installer_macos.sh index 9ca6d490..37d88c5e 100644 --- a/release/one_click_macos_gui/create_installer_macos.sh +++ b/release/one_click_macos_gui/create_installer_macos.sh @@ -20,7 +20,7 @@ python setup.py sdist bdist_wheel # Setting up the local package cd release/one_click_macos_gui -pip install "../../dist/alphastats-0.3.0-py3-none-any.whl" +pip install "../../dist/alphastats-0.4.0-py3-none-any.whl" # Creating the stand-alone pyinstaller folder pip install pyinstaller==4.10 @@ -40,5 +40,5 @@ cp ../../LICENSE.txt Resources/LICENSE.txt cp ../logos/alphapeptstats_logo.png Resources/alphapeptstats_logo.png chmod 777 scripts/* -pkgbuild --root dist/alphastats --identifier de.mpg.biochem.alphastats.app --version 0.3.0 --install-location /Applications/AlphaPeptStats.app --scripts scripts AlphaPeptStats.pkg +pkgbuild --root dist/alphastats --identifier de.mpg.biochem.alphastats.app --version 0.4.0 --install-location /Applications/AlphaPeptStats.app --scripts scripts AlphaPeptStats.pkg productbuild --distribution distribution.xml --resources Resources --package-path AlphaPeptStats.pkg dist/alphastats_gui_installer_macos.pkg diff --git a/release/one_click_macos_gui/distribution.xml b/release/one_click_macos_gui/distribution.xml index e1d31d4f..b76dc8c9 100644 --- a/release/one_click_macos_gui/distribution.xml +++ b/release/one_click_macos_gui/distribution.xml @@ -1,6 +1,6 @@ - AlphaPeptStats 0.3.0 + AlphaPeptStats 0.4.0 diff --git a/release/one_click_windows_gui/alphastats_innoinstaller.iss b/release/one_click_windows_gui/alphastats_innoinstaller.iss index 115fe2f9..c984d3c4 100644 --- a/release/one_click_windows_gui/alphastats_innoinstaller.iss +++ b/release/one_click_windows_gui/alphastats_innoinstaller.iss @@ -2,7 +2,7 @@ ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! #define MyAppName "AlphaPeptStats" -#define MyAppVersion "0.3.0" +#define MyAppVersion "0.4.0" #define MyAppPublisher "MannLabs" #define MyAppURL "https://github.com/MannLabs/alphapeptstats" #define MyAppExeName "alphastats_gui.exe" diff --git a/release/one_click_windows_gui/create_installer_windows.sh b/release/one_click_windows_gui/create_installer_windows.sh index 14964d1a..58be5817 100644 --- a/release/one_click_windows_gui/create_installer_windows.sh +++ b/release/one_click_windows_gui/create_installer_windows.sh @@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel # Setting up the local package cd release/one_click_windows_gui # Make sure you include the required extra packages and always use the stable or very-stable options! -pip install "../../dist/alphastats-0.3.0-py3-none-any.whl" +pip install "../../dist/alphastats-0.4.0-py3-none-any.whl" # Creating the stand-alone pyinstaller folder pip install pyinstaller==4.10 diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 18fc99fc..bcfb1bac 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -15,5 +15,7 @@ umap-learn==0.5.3 streamlit==1.17.0 tables==3.6.1 numpy==1.23.5 - - +numba==0.56.4 +numba-stats==0.5.0 +swifter==1.0.7 +click==8.0.1 \ No newline at end of file diff --git a/setup.py b/setup.py index 83c4305f..1c7beb8e 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ def create_pip_wheel(): requirements, extra_requirements = get_requirements() setuptools.setup( name="alphastats", - version="0.3.0", + version="0.4.0", license="Apache", description="An open-source Python package for Mass Spectrometry Analysis", long_description=get_long_description(), diff --git a/tests/test_DataSet.py b/tests/test_DataSet.py index dbd851d2..822068ab 100644 --- a/tests/test_DataSet.py +++ b/tests/test_DataSet.py @@ -523,7 +523,26 @@ def test_plot_volcano_wald(self): group2 = ["1_78_G5", "1_77_G4", "1_76_G3"], method="ttest") column_added = "_comparison_column" in self.obj.metadata.columns.to_list() - self.assertTrue(column_added) + self.assertTrue(column_added) + + def test_plot_volcano_sam(self): + self.obj.preprocess(imputation="knn", normalization="zscore") + plot = self.obj.plot_volcano( + column = "disease", + group1="type 2 diabetes mellitus", + group2 ="type 2 diabetes mellitus|non-alcoholic fatty liver disease", + method="sam", + draw_line =True, + perm= 10 + ) + + # fdr lines get drawn + line_1 = plot.to_plotly_json()["data"][3].get("line").get("shape") + line_2 = plot.to_plotly_json()["data"][4].get("line").get("shape") + + self.assertEqual(line_1, "spline") + self.assertEqual(line_2, "spline") + def test_plot_clustermap_significant(self): import sys