Skip to content

Commit

Permalink
feat(groupComparison): Enable parallel processing for groupComparison…
Browse files Browse the repository at this point in the history
… function (#110)

* feat(groupComparison): Enable parallel processing for groupComparison function

* refactored single core vs multicore group comparison into separate util functions

* update documentation regarding groupComparisonWithMultipleCores util function

* fix(groupComparison): Remove return statements, define environment() variable, fixed all_proteins_id iterator

* docs: Updated docs to mention that parallel processing only works for Linux/Mac OS
  • Loading branch information
tonywu1999 authored Mar 4, 2024
1 parent e589cb0 commit 9c655f6
Show file tree
Hide file tree
Showing 9 changed files with 187 additions and 20 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ Imports:
grDevices,
graphics,
methods,
statmod
statmod,
parallel
Suggests:
BiocStyle,
knitr,
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ importFrom(limma,squeezeVar)
importFrom(lme4,lmer)
importFrom(marray,maPalette)
importFrom(methods,is)
importFrom(parallel,clusterExport)
importFrom(parallel,makeCluster)
importFrom(parallel,parLapply)
importFrom(parallel,stopCluster)
importFrom(plotly,add_trace)
importFrom(plotly,ggplotly)
importFrom(plotly,plot_ly)
Expand Down
38 changes: 21 additions & 17 deletions R/groupComparison.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
#' @param save_fitted_models logical, if TRUE, fitted models will be added to
#' the output.
#' @param log_base base of the logarithm used in dataProcess.
#' @param numberOfCores Number of cores for parallel processing. When > 1,
#' a logfile named `MSstats_groupComparison_log_progress.log` is created to
#' track progress. Only works for Linux & Mac OS.
#' @inheritParams .documentFunction
#'
#' @details
Expand Down Expand Up @@ -44,7 +47,8 @@
groupComparison = function(contrast.matrix, data,
save_fitted_models = TRUE, log_base = 2,
use_log_file = TRUE, append = FALSE,
verbose = TRUE, log_file_path = NULL
verbose = TRUE, log_file_path = NULL,
numberOfCores = 1
) {
MSstatsConvert::MSstatsLogsSettings(use_log_file, append, verbose,
log_file_path,
Expand All @@ -61,7 +65,8 @@ groupComparison = function(contrast.matrix, data,
getOption("MSstatsMsg")("INFO",
" == Start to test and get inference in whole plot ...")
testing_results = MSstatsGroupComparison(split_summarized, contrast_matrix,
save_fitted_models, repeated, samples_info)
save_fitted_models, repeated, samples_info,
numberOfCores)
getOption("MSstatsLog")("INFO",
"== Comparisons for all proteins are done.")
getOption("MSstatsMsg")("INFO",
Expand Down Expand Up @@ -107,8 +112,12 @@ MSstatsPrepareForGroupComparison = function(summarization_output) {
#' @param save_fitted_models if TRUE, fitted models will be included in the output
#' @param repeated logical, output of checkRepeatedDesign function
#' @param samples_info data.table, output of getSamplesInfo function
#' @param numberOfCores Number of cores for parallel processing. When > 1,
#' a logfile named `MSstats_groupComparison_log_progress.log` is created to
#' track progress. Only works for Linux & Mac OS.
#'
#' @importFrom utils txtProgressBar setTxtProgressBar
#' @importFrom parallel makeCluster clusterExport parLapply stopCluster
#'
#' @export
#'
Expand All @@ -130,22 +139,17 @@ MSstatsPrepareForGroupComparison = function(summarization_output) {
#' group_comparison[[2]][[3]] # NULL, because we set save_fitted_models to FALSE
#'
MSstatsGroupComparison = function(summarized_list, contrast_matrix,
save_fitted_models, repeated, samples_info) {
groups = colnames(contrast_matrix)
has_imputed = attr(summarized_list, "has_imputed")
all_proteins_id = seq_along(summarized_list)
test_results = vector("list", length(all_proteins_id))
pb = txtProgressBar(max = length(all_proteins_id), style = 3)
for (i in all_proteins_id) {
comparison_outputs = MSstatsGroupComparisonSingleProtein(
summarized_list[[i]], contrast_matrix, repeated,
groups, samples_info, save_fitted_models, has_imputed
)
test_results[[i]] = comparison_outputs
setTxtProgressBar(pb, i)
save_fitted_models, repeated, samples_info,
numberOfCores = 1) {
if (numberOfCores > 1) {
return(.groupComparisonWithMultipleCores(summarized_list, contrast_matrix,
save_fitted_models, repeated,
samples_info, numberOfCores))
} else {
return(.groupComparisonWithSingleCore(summarized_list, contrast_matrix,
save_fitted_models, repeated,
samples_info))
}
close(pb)
test_results
}


Expand Down
65 changes: 65 additions & 0 deletions R/utils_groupcomparison.R
Original file line number Diff line number Diff line change
Expand Up @@ -439,3 +439,68 @@ getSamplesInfo = function(summarization_output) {
result
}

#' Perform group comparison per protein in parallel
#' @param summarized_list output of MSstatsPrepareForGroupComparison
#' @param contrast_matrix contrast matrix
#' @param save_fitted_models if TRUE, fitted models will be included in the output
#' @param repeated logical, output of checkRepeatedDesign function
#' @param samples_info data.table, output of getSamplesInfo function
#' @param numberOfCores Number of cores for parallel processing.
#' A logfile named `MSstats_groupComparison_log_progress.log` is created to
#' track progress. Only works for Linux & Mac OS.
#' @keywords internal
.groupComparisonWithMultipleCores = function(summarized_list, contrast_matrix,
save_fitted_models, repeated, samples_info,
numberOfCores) {
groups = colnames(contrast_matrix)
has_imputed = attr(summarized_list, "has_imputed")
all_proteins_id = seq_along(summarized_list)
function_environment = environment()
cl = parallel::makeCluster(numberOfCores)
parallel::clusterExport(cl, c("MSstatsGroupComparisonSingleProtein",
"contrast_matrix", "repeated", "groups",
"samples_info", "save_fitted_models", "has_imputed"),
envir = function_environment)
cat(paste0("Number of proteins to process: ", length(all_proteins_id)),
sep = "\n", file = "MSstats_groupComparison_log_progress.log")
test_results = parallel::parLapply(cl, all_proteins_id, function(i) {
if (i %% 100 == 0) {
cat("Finished processing an additional 100 protein comparisons",
sep = "\n", file = "MSstats_groupComparison_log_progress.log", append = TRUE)
}
MSstatsGroupComparisonSingleProtein(
summarized_list[[i]], contrast_matrix, repeated,
groups, samples_info, save_fitted_models, has_imputed
)
})
parallel::stopCluster(cl)
test_results
}

#' Perform group comparison per protein iteratively with a single loop
#' @param summarized_list output of MSstatsPrepareForGroupComparison
#' @param contrast_matrix contrast matrix
#' @param save_fitted_models if TRUE, fitted models will be included in the output
#' @param repeated logical, output of checkRepeatedDesign function
#' @param samples_info data.table, output of getSamplesInfo function
#' @keywords internal
.groupComparisonWithSingleCore = function(summarized_list, contrast_matrix,
save_fitted_models, repeated,
samples_info) {
groups = colnames(contrast_matrix)
has_imputed = attr(summarized_list, "has_imputed")
all_proteins_id = seq_along(summarized_list)
test_results = vector("list", length(all_proteins_id))
pb = txtProgressBar(max = length(all_proteins_id), style = 3)
for (i in all_proteins_id) {
comparison_outputs = MSstatsGroupComparisonSingleProtein(
summarized_list[[i]], contrast_matrix, repeated,
groups, samples_info, save_fitted_models, has_imputed
)
test_results[[i]] = comparison_outputs
setTxtProgressBar(pb, i)
}
close(pb)
test_results
}

20 changes: 20 additions & 0 deletions inst/tinytest/test_groupComparison.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Setup ------------------------------------------------------------------
QuantData = dataProcess(SRMRawData, use_log_file = FALSE)
comparison = matrix(c(-1,0,0,0,0,0,1,0,0,0),nrow=1)
row.names(comparison) = "T7-T1"
groups = levels(QuantData$ProteinLevelData$GROUP)
colnames(comparison) = groups[order(as.numeric(groups))]

# Test groupComparison with default parameters ---------------------------
testResultDefaultComparison = groupComparison(contrast.matrix=comparison,
data=QuantData,
use_log_file = FALSE)

# Test groupComparison with numberOfCores parameter ----------------------
testResultParallelComparison = groupComparison(contrast.matrix=comparison,
data=QuantData,
use_log_file = FALSE,
numberOfCores = 2)

expect_equal(nrow(testResultDefaultComparison$ComparisonResult),
nrow(testResultParallelComparison$ComparisonResult))
7 changes: 6 additions & 1 deletion man/MSstatsGroupComparison.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions man/dot-groupComparisonWithMultipleCores.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions man/dot-groupComparisonWithSingleCore.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion man/groupComparison.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9c655f6

Please sign in to comment.