diff --git a/.gitignore b/.gitignore index d11e9ba..06f002a 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ Treatment-Annotation*.Rmd ./*.csv CCLE_treatmentMetadata.csv +AnnotationGx.code-workspace diff --git a/AnnotationGx.code-workspace b/AnnotationGx.code-workspace index 84e94ec..6fdbfa5 100644 --- a/AnnotationGx.code-workspace +++ b/AnnotationGx.code-workspace @@ -2,6 +2,9 @@ "folders": [ { "path": "." + }, + { + "path": "../CoreGx" } ], "settings": { diff --git a/R/cellosaurus.R b/R/cellosaurus.R index e6f48cb..8fc44da 100644 --- a/R/cellosaurus.R +++ b/R/cellosaurus.R @@ -107,9 +107,9 @@ mapCell2Accession <- function( if (query_only) return(lapply(requests, function(req) req$url)) - # perform the requests + # Submit requests using parallel httr2 since cellosaurus doesnt throttle .info(funContext, "Performing Cellosaurus queries") - responses <- .perform_request_parallel(requests) + responses <- .perform_request_parallel(requests, progress = "Querying Cellosaurus...") names(responses) <- as.character(ids) # in case its an numeric ID like cosmic ids if (raw) return(responses) diff --git a/R/cellosaurus_annotations.R b/R/cellosaurus_annotations.R index 76f8af6..045e549 100644 --- a/R/cellosaurus_annotations.R +++ b/R/cellosaurus_annotations.R @@ -4,7 +4,9 @@ #' #' @param accessions The Cellosaurus accession to annotate. #' @param to A character vector specifying the types of annotations to retrieve. Possible values include "id", "ac", "hi", "sy", "ca", "sx", "ag", "di", "derived-from-site", "misspelling", and "dt". -#' +#' @param query_only A logical value indicating whether to only return the query string. +#' @param raw A logical value indicating whether to return the raw response. +#' #' @return A data frame containing the annotations for the cell line. #' #' @examples @@ -14,10 +16,13 @@ #' @export annotateCellAccession <- function( accessions, - to = c("id", "ac", "hi", "sy", "ca", "sx", "ag", "di", "derived-from-site", "misspelling", "dt") + to = c("id", "ac", "hi", "sy", "ca", "sx", "ag", "di", "derived-from-site", "misspelling", "dt"), + query_only = FALSE, raw = FALSE ) { + funContext <- .funContext("annotateCellAccession") + .info(funContext, "Building Cellosaurus requests...") requests <- parallel::mclapply(accessions, function(accession) { .build_cellosaurus_request( query = accession, @@ -29,19 +34,26 @@ annotateCellAccession <- function( query_only = FALSE ) }) - - responses <- .perform_request_parallel(requests) + + .info(funContext, "Performing Requests...") + responses <- .perform_request_parallel(requests, progress = "Querying Cellosaurus...") names(responses) <- accessions - responses_dt <- parallel::mclapply(accessions,function(name) { + if(raw) return(responses) + + .info(funContext, "Parsing Responses...") + responses_dt <- parallel::mclapply(accessions, function(name) { resp <- responses[[name]] .parse_cellosaurus_lines(resp) |> unlist(recursive = FALSE) |> .processEntry() |> .formatSynonyms() - } - ) |> data.table::rbindlist(fill = TRUE) - - responses_dt + } + ) + names(responses_dt) <- accessions + + responses_dt <- data.table::rbindlist(responses_dt, fill = TRUE) + + return(responses_dt) } diff --git a/R/pubchem_rest.R b/R/pubchem_rest.R index 968f5a1..03831cf 100644 --- a/R/pubchem_rest.R +++ b/R/pubchem_rest.R @@ -21,10 +21,10 @@ #' @export getPubchemCompound <- function( ids, from = "cid", to = "property", properties = c("Title", "InChIKey"), - raw = FALSE, query_only = FALSE, output = "JSON", ...) { - funContext <- .funContext("getPubchemCompound") - + raw = FALSE, query_only = FALSE, output = "JSON", ... +) { + funContext <- .funContext("getPubchemCompound") to_ <- if (to == "property") { checkmate::assert_atomic(properties, all.missing = FALSE) checkmate::assert_character(properties) @@ -33,33 +33,37 @@ getPubchemCompound <- function( to } + .info(funContext, "Building PubChem REST queries...") requests <- lapply(ids, function(x) { .build_pubchem_rest_query( id = x, domain = "compound", namespace = from, operation = to_, output = output, raw = raw, query_only = query_only, ... ) }) - if (query_only) { - return(requests) - } + if (query_only) return(requests) tryCatch({ - resps_raw <- httr2::req_perform_sequential(requests, on_error = "continue") + .info(funContext, "Retrieving compound information...") + resps_raw <- httr2::req_perform_sequential( + requests, + on_error = "continue", + progress = "Querying PubCHEM REST API...." + ) + names(resps_raw) <- ids }, error = function(e) { .err(funContext, " An error occurred while retrieving the compound information:\n", e) }) .debug(funContext, " Number of responses: ", length(resps_raw)) - names(resps_raw) <- ids - if (raw) { - return(resps_raw) - } - + if (raw) return(resps_raw) # Parse the responses + .info(funContext, "Parsing PubChem REST responses...") resps <- .parse_pubchem_rest_responses(resps_raw) - failed <- sapply(resps_raw, httr2::resp_is_error, USE.NAMES = T) + # filter failed + # if any query failed, return the failed queries as attributes + failed <- sapply(resps_raw, httr2::resp_is_error, USE.NAMES = T) if (any(failed)) { .warn(funContext, " Some queries failed. See the 'failed' object for details.") failures <- lapply(resps_raw[failed], function(resp) { @@ -69,7 +73,9 @@ getPubchemCompound <- function( failures <- NULL } - if (from != "name") { + # Combine the responses + # might be able to just do the else part... + if (from != "name") { responses <- data.table::rbindlist(resps, fill = TRUE) } else { responses <- data.table::rbindlist(resps, idcol = from, fill = TRUE) @@ -78,7 +84,7 @@ getPubchemCompound <- function( attributes(responses)$failed <- failures - responses + return(responses) } diff --git a/R/pubchem_view_helpers.R b/R/pubchem_view_helpers.R index 01d0f68..fe8812d 100644 --- a/R/pubchem_view_helpers.R +++ b/R/pubchem_view_helpers.R @@ -49,13 +49,14 @@ ) { funContext <- .funContext(".build_pubchem_view_query") - + # Check the inputs checkmate::assert_choice( annotation, c("data", "index", "annotations", "categories", "neighbors", "literature", "structure", "image", "qr", "linkout") ) checkmate::assert_choice(record, c("compound", "substance", "assay", "cell", "gene", "protein")) + # Configure the options for the query opts_ <- list() if (!is.null(heading)) { if (record == "substance") { @@ -95,7 +96,11 @@ url |> httr2::url_build() |> - .build_request() + httr2::request() + + # url |> + # httr2::url_build() |> + # .build_request() } #' Generic function to parse one of the annotation helpers diff --git a/_pkgdown.yml b/_pkgdown.yml index e5311f8..b5fffa1 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -106,6 +106,7 @@ articles: navbar: Annotating Samples contents: - Cellosaurus + - OncoTree - title: Compounds navbar: Annotating Compounds @@ -113,7 +114,6 @@ articles: - PubChemAPI - Unichem - ChEMBL - - OncoTree - title: Pipelines navbar: Pipeline Tutorials diff --git a/man/annotateCellAccession.Rd b/man/annotateCellAccession.Rd index df1e520..862c0ef 100644 --- a/man/annotateCellAccession.Rd +++ b/man/annotateCellAccession.Rd @@ -7,13 +7,19 @@ annotateCellAccession( accessions, to = c("id", "ac", "hi", "sy", "ca", "sx", "ag", "di", "derived-from-site", - "misspelling", "dt") + "misspelling", "dt"), + query_only = FALSE, + raw = FALSE ) } \arguments{ \item{accessions}{The Cellosaurus accession to annotate.} \item{to}{A character vector specifying the types of annotations to retrieve. Possible values include "id", "ac", "hi", "sy", "ca", "sx", "ag", "di", "derived-from-site", "misspelling", and "dt".} + +\item{query_only}{A logical value indicating whether to only return the query string.} + +\item{raw}{A logical value indicating whether to return the raw response.} } \value{ A data frame containing the annotations for the cell line.