docs(dataProcess+groupComparison): Add documentation on output tables…

… of dataProcess and groupComparison
Vitek-Lab · Oct 31, 2024 · 16df86f · 16df86f
1 parent cf7f650
commit 16df86f
Show file tree

Hide file tree

Showing 4 changed files with 150 additions and 5 deletions.
diff --git a/R/dataProcess.R b/R/dataProcess.R
@@ -55,7 +55,45 @@
 #' @inheritParams .documentFunction
 #' 
 #' @importFrom utils sessionInfo
-#' @importFrom data.table as.data.table 
+#' @importFrom data.table as.data.table
+#' 
+#' @return A list containing:
+#' \describe{
+#'   \item{FeatureLevelData}{A data frame with feature-level information after processing. Columns include:
+#'     \describe{
+#'       \item{PROTEIN}{Identifier for the protein associated with the feature.}
+#'       \item{PEPTIDE}{Identifier for the peptide sequence.}
+#'       \item{TRANSITION}{Identifier for the transition, typically representing a specific ion pair.}
+#'       \item{FEATURE}{Unique identifier for the feature, which could be a combination of peptide and transition.}
+#'       \item{LABEL}{Specifies the isotopic labeling of peptides, notably for SRM-based experiments. "L" indicates light-labeled peptides while "H" denotes heavy-labeled peptides.}
+#'       \item{GROUP}{Experimental group identifier.}
+#'       \item{RUN}{Identifier for the specific MS run.}
+#'       \item{SUBJECT}{Subject identifier within the experimental group.}
+#'       \item{FRACTION}{Fraction identifier if fractionation was performed.}
+#'       \item{originalRUN}{Original run identifier before any processing.}
+#'       \item{censored}{Logical indicator of whether the intensity value is considered missing or below limit of detection.}
+#'       \item{INTENSITY}{Original intensity measurement of the feature in the given run.}
+#'       \item{ABUNDANCE}{Processed abundance or intensity value after log-transformation and normalization.}
+#'       \item{newABUNDANCE}{The ABUNDANCE column but includes imputed missing values. It is the column that is used for protein summarization.}
+#'       \item{predicted}{Predicted intensity values for censored data, typically derived from a statistical model.}
+#'     }
+#'   }
+#'   \item{ProteinLevelData}{A data frame with run-level summarized information for each protein. Columns include:
+#'     \describe{
+#'       \item{RUN}{Identifier for the specific MS run.}
+#'       \item{Protein}{Identifier for the protein.}
+#'       \item{LogIntensities}{Log-transformed intensities for the protein in each run.}
+#'       \item{originalRUN}{Original run identifier before any processing.}
+#'       \item{GROUP}{Experimental group identifier.}
+#'       \item{SUBJECT}{Subject identifier within the experimental group.}
+#'       \item{TotalGroupMeasurements}{Total number of feature measurements for the protein in the given group.}
+#'       \item{NumMeasuredFeatures}{Number of features measured for the protein in the given run.}
+#'       \item{MissingPercentage}{Percentage of missing feature values for the protein in the given run.}
+#'       \item{more50missing}{Logical indicator of whether more than 50 percent of the features values are missing for the protein in the given run.}
+#'       \item{NumImputedFeature}{Number of features for which values were imputed due to missing or censored data for the protein in the given run.}
+#'     }
+#'   }
+#' }
 #' 
 #' @export
 #' 

diff --git a/R/groupComparison.R b/R/groupComparison.R
@@ -15,8 +15,42 @@
 #' The underlying model fitting functions are lm and lmer for the fixed effects model and mixed effects model, respectively.
 #' The input of this function is the quantitative data from function (dataProcess).
 #'
-#' @return list that consists of three elements: "ComparisonResult" - data.frame with results of statistical testing,
-#' "ModelQC" - data.frame with data used to fit models for group comparison and "FittedModel" - list of fitted models.
+#' @return A list with the following components:
+#' \describe{
+#'   \item{ComparisonResult}{A `data.frame` containing the results of the statistical testing for each protein. The columns include:
+#'     \describe{
+#'       \item{Protein}{The name of the protein for which the comparison is made.}
+#'       \item{Label}{The label of the comparison, typically derived from the `contrast.matrix`.}
+#'       \item{log2FC}{The log2 fold change between the conditions being compared. The base of the logarithm is specified by the `log_base` parameter.}
+#'       \item{SE}{The standard error of the log2 fold change estimate.}
+#'       \item{Tvalue}{The t-statistic value for the comparison.}
+#'       \item{DF}{The degrees of freedom associated with the t-statistic.}
+#'       \item{pvalue}{The p-value for the statistical test of the comparison.}
+#'       \item{adj.pvalue}{The adjusted p-value using the Benjamini-Hochberg method for controlling the false discovery rate.}
+#'       \item{issue}{Any issues encountered during the comparison.  NA indicates no issues. "oneConditionMissing" occurs when data for one of the conditions being compared is entirely missing for a particular protein.}
+#'       \item{MissingPercentage}{The percentage of missing features for a given protein across all runs. This column is included only if missing values were imputed.}
+#'       \item{ImputationPercentage}{The percentage of features that were imputed for a given protein across all runs. This column is included only if missing values were imputed.}
+#'     }
+#'   }
+#'   \item{ModelQC}{A `data.frame` containing quality control data used to fit models for group comparison. The columns include:
+#'     \describe{
+#'       \item{RUN}{Identifier for the specific MS run.}
+#'       \item{Protein}{Identifier for the protein.}
+#'       \item{ABUNDANCE}{Summarized intensity for the protein in a given run.}
+#'       \item{originalRUN}{Original run identifier before any processing.}
+#'       \item{GROUP}{Experimental group identifier.}
+#'       \item{SUBJECT}{Subject identifier within the experimental group.}
+#'       \item{TotalGroupMeasurements}{Total number of feature measurements for the protein in the given group.}
+#'       \item{NumMeasuredFeatures}{Number of features measured for the protein in the given run.}
+#'       \item{MissingPercentage}{Percentage of missing feature values for the protein in the given run.}
+#'       \item{more50missing}{Logical indicator of whether more than 50 percent of the features values are missing for the protein in the given run.}
+#'       \item{NumImputedFeature}{Number of features for which values were imputed due to missing or censored data for the protein in the given run.}
+#'       \item{residuals}{Contains the differences between the observed values and the values predicted by the fitted model. }
+#'       \item{fitted}{The predicted values obtained from the model for a protein measurement for a given run in the dataset. }
+#'     }
+#'   }
+#'   \item{FittedModel}{A list of fitted models for each protein. This is included only if `save_fitted_models` is set to TRUE. Each element of the list corresponds to a protein and contains the fitted model object.}
+#' }
 #' 
 #' @export 
 #' @import lme4

diff --git a/man/dataProcess.Rd b/man/dataProcess.Rd
diff --git a/man/groupComparison.Rd b/man/groupComparison.Rd