diff --git a/DESCRIPTION b/DESCRIPTION index 47f34a1..70d062b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -48,7 +48,7 @@ biocViews: ImmunoOncology, MassSpectrometry, Proteomics, Software, Normalization LazyData: true URL: http://msstats.org BugReports: https://groups.google.com/forum/#!forum/msstats -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Encoding: UTF-8 NeedsCompilation: no Packaged: 2017-10-20 02:13:12 UTC; meenachoi diff --git a/NAMESPACE b/NAMESPACE index c73e7ec..5daf2bf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,6 +22,7 @@ export(MSstatsSummarizationOutput) export(MSstatsSummarize) export(MSstatsSummarizeSingleLinear) export(MSstatsSummarizeSingleTMP) +export(MSstatsSummarizeWithSingleCore) export(MaxQtoMSstatsFormat) export(OpenMStoMSstatsFormat) export(OpenSWATHtoMSstatsFormat) diff --git a/R/dataProcess.R b/R/dataProcess.R index 9aefb9c..9415d8e 100755 --- a/R/dataProcess.R +++ b/R/dataProcess.R @@ -165,6 +165,8 @@ MSstatsSummarizeWithMultipleCores = function(input, method, impute, censored_sym num_proteins = length(protein_indices) function_environment = environment() cl = parallel::makeCluster(numberOfCores) + getOption("MSstatsLog")("INFO", + "Starting the cluster setup for summarization") parallel::clusterExport(cl, c("MSstatsSummarizeSingleTMP", "MSstatsSummarizeSingleLinear", "input", "impute", "censored_symbol", @@ -196,12 +198,69 @@ MSstatsSummarizeWithMultipleCores = function(input, method, impute, censored_sym parallel::stopCluster(cl) return(summarized_results) } else { - input_split = split(input, input$PROTEIN) - return(MSstatsSummarize(input_split, method, impute, censored_symbol, + return(MSstatsSummarizeWithSingleCore(input, method, impute, censored_symbol, remove50missing, equal_variance)) } } +#' Feature-level data summarization with 1 core +#' +#' @inheritParams MSstatsSummarizeWithMultipleCores +#' +#' @importFrom data.table uniqueN +#' @importFrom utils setTxtProgressBar +#' +#' @return list of length one with run-level data. +#' +#' @export +#' +#' @examples +#' raw = DDARawData +#' method = "TMP" +#' cens = "NA" +#' impute = TRUE +#' MSstatsConvert::MSstatsLogsSettings(FALSE) +#' input = MSstatsPrepareForDataProcess(raw, 2, NULL) +#' input = MSstatsNormalize(input, "EQUALIZEMEDIANS") +#' input = MSstatsMergeFractions(input) +#' input = MSstatsHandleMissing(input, "TMP", TRUE, "NA", 0.999) +#' input = MSstatsSelectFeatures(input, "all") +#' processed = getProcessed(input) +#' input = MSstatsPrepareForSummarization(input, method, impute, cens, FALSE) +#' summarized = MSstatsSummarizeWithSingleCore(input, method, impute, cens, FALSE, TRUE) +#' length(summarized) # list of summarization outputs for each protein +#' head(summarized[[1]][[1]]) # run-level summary +#' +MSstatsSummarizeWithSingleCore = function(input, method, impute, censored_symbol, + remove50missing, equal_variance) { + + + protein_indices = split(seq_len(nrow(input)), list(input$PROTEIN)) + num_proteins = length(protein_indices) + summarized_results = vector("list", num_proteins) + if (method == "TMP") { + pb = utils::txtProgressBar(min = 0, max = num_proteins, style = 3) + for (protein_id in seq_len(num_proteins)) { + single_protein = input[protein_indices[[protein_id]],] + summarized_results[[protein_id]] = MSstatsSummarizeSingleTMP( + single_protein, impute, censored_symbol, remove50missing) + setTxtProgressBar(pb, protein_id) + } + close(pb) + } else { + pb = utils::txtProgressBar(min = 0, max = num_proteins, style = 3) + for (protein_id in seq_len(num_proteins)) { + single_protein = input[protein_indices[[protein_id]],] + summarized_result = MSstatsSummarizeSingleLinear(single_protein, + equal_variance) + summarized_results[[protein_id]] = summarized_result + setTxtProgressBar(pb, protein_id) + } + close(pb) + } + summarized_results +} + #' Feature-level data summarization #' @@ -257,6 +316,15 @@ MSstatsSummarize = function(proteins_list, method, impute, censored_symbol, } close(pb) } + msg_deprecation = paste("FUNCTION DEPRECATION NOTICE: We would like to", + "notify you that the MSstatsSummarize function", + "will undergo a transition process. Starting from release 3.21", + "the MSstatsSummarize function in MSstats will be deprecated", + "in favor of MSstatsSummarizeWithSingleCore.", + "Please take the necessary steps to update your codebase", + "and migrate to MSstatsSummarizeWithSingleCore before", + "release 3.21 to avoid any disruptions to your workflow.") + message(msg_deprecation) summarized_results } diff --git a/R/utils_output.R b/R/utils_output.R index 1991ffb..521fdfb 100644 --- a/R/utils_output.R +++ b/R/utils_output.R @@ -1,7 +1,7 @@ #' Post-processing output from MSstats summarization #' #' @param input `data.table` in MSstats format -#' @param summarized output of the `MSstatsSummarize` function +#' @param summarized output of the `MSstatsSummarizeWithSingleCore` function #' @param processed output of MSstatsSelectFeatures #' @param method name of the summarization method #' (`summaryMethod` parameter to `dataProcess`) @@ -32,8 +32,7 @@ #' input = MSstatsSelectFeatures(input, "all") #' processed = getProcessed(input) #' input = MSstatsPrepareForSummarization(input, method, impute, cens, FALSE) -#' input_split = split(input, input$PROTEIN) -#' summarized = MSstatsSummarize(input_split, method, impute, cens, FALSE, TRUE) +#' summarized = MSstatsSummarizeWithSingleCore(input, method, impute, cens, FALSE, TRUE) #' output = output = MSstatsSummarizationOutput(input, summarized, processed, #' method, impute, cens) #' diff --git a/R/utils_summarization.R b/R/utils_summarization.R index 211ec0d..ec7433d 100644 --- a/R/utils_summarization.R +++ b/R/utils_summarization.R @@ -63,7 +63,7 @@ #' Fit Tukey median polish #' @param input data.table with data for a single protein #' @param is_labeled logical, if TRUE, data is coming from an SRM experiment -#' @inheritParams MSstatsSummarize +#' @inheritParams MSstatsSummarizeWithSingleCore #' @return data.table #' @keywords internal .runTukey = function(input, is_labeled, censored_symbol, remove50missing) { diff --git a/man/MSstatsSummarizationOutput.Rd b/man/MSstatsSummarizationOutput.Rd index c20536b..280087c 100644 --- a/man/MSstatsSummarizationOutput.Rd +++ b/man/MSstatsSummarizationOutput.Rd @@ -16,7 +16,7 @@ MSstatsSummarizationOutput( \arguments{ \item{input}{`data.table` in MSstats format} -\item{summarized}{output of the `MSstatsSummarize` function} +\item{summarized}{output of the `MSstatsSummarizeWithSingleCore` function} \item{processed}{output of MSstatsSelectFeatures} @@ -53,8 +53,7 @@ input = MSstatsHandleMissing(input, "TMP", TRUE, "NA", 0.999) input = MSstatsSelectFeatures(input, "all") processed = getProcessed(input) input = MSstatsPrepareForSummarization(input, method, impute, cens, FALSE) -input_split = split(input, input$PROTEIN) -summarized = MSstatsSummarize(input_split, method, impute, cens, FALSE, TRUE) +summarized = MSstatsSummarizeWithSingleCore(input, method, impute, cens, FALSE, TRUE) output = output = MSstatsSummarizationOutput(input, summarized, processed, method, impute, cens) diff --git a/man/MSstatsSummarizeWithSingleCore.Rd b/man/MSstatsSummarizeWithSingleCore.Rd new file mode 100644 index 0000000..84b64a8 --- /dev/null +++ b/man/MSstatsSummarizeWithSingleCore.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataProcess.R +\name{MSstatsSummarizeWithSingleCore} +\alias{MSstatsSummarizeWithSingleCore} +\title{Feature-level data summarization with 1 core} +\usage{ +MSstatsSummarizeWithSingleCore( + input, + method, + impute, + censored_symbol, + remove50missing, + equal_variance +) +} +\arguments{ +\item{input}{feature-level data processed by dataProcess subfunctions} + +\item{method}{summarization method: "linear" or "TMP"} + +\item{impute}{only for summaryMethod = "TMP" and censoredInt = 'NA' or '0'. +TRUE (default) imputes 'NA' or '0' (depending on censoredInt option) by Accelated failure model. +FALSE uses the values assigned by cutoffCensored} + +\item{censored_symbol}{Missing values are censored or at random. +'NA' (default) assumes that all 'NA's in 'Intensity' column are censored. +'0' uses zero intensities as censored intensity. +In this case, NA intensities are missing at random. +The output from Skyline should use '0'. +Null assumes that all NA intensites are randomly missing.} + +\item{remove50missing}{only for summaryMethod = "TMP". TRUE removes the proteins +where every run has at least 50\% missing values for each peptide. FALSE is default.} + +\item{equal_variance}{only for summaryMethod = "linear". Default is TRUE. +Logical variable for whether the model should account for heterogeneous variation +among intensities from different features. Default is TRUE, which assume equal +variance among intensities from features. FALSE means that we cannot assume +equal variance among intensities from features, then we will account for +heterogeneous variation from different features.} +} +\value{ +list of length one with run-level data. +} +\description{ +Feature-level data summarization with 1 core +} +\examples{ +raw = DDARawData +method = "TMP" +cens = "NA" +impute = TRUE +MSstatsConvert::MSstatsLogsSettings(FALSE) +input = MSstatsPrepareForDataProcess(raw, 2, NULL) +input = MSstatsNormalize(input, "EQUALIZEMEDIANS") +input = MSstatsMergeFractions(input) +input = MSstatsHandleMissing(input, "TMP", TRUE, "NA", 0.999) +input = MSstatsSelectFeatures(input, "all") +processed = getProcessed(input) +input = MSstatsPrepareForSummarization(input, method, impute, cens, FALSE) +summarized = MSstatsSummarizeWithSingleCore(input, method, impute, cens, FALSE, TRUE) +length(summarized) # list of summarization outputs for each protein +head(summarized[[1]][[1]]) # run-level summary + +}