bhklab · jjjermiah · Mar 11, 2024 · Mar 8, 2024 · Mar 11, 2024 · Mar 11, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: AnnotationGx
 Title: AnnotationGx: A package for building, updating and querying an
     annotation database for pharmaco-genomic data
-Version: 0.0.0.9076
+Version: 0.0.0.9080
 Authors@R: c(
     person("Jermiah", "Joseph", role = c("aut", "cre"),
         email = "jermiah.joseph@gmail.com"),

diff --git a/NAMESPACE b/NAMESPACE
@@ -3,13 +3,18 @@
 export(annotatePubchemCompound)
 export(getChemblMechanism)
 export(getChemblResourceFields)
+export(getOncotreeMainTypes)
+export(getOncotreeTumorTypes)
+export(getOncotreeVersions)
 export(getPubchemAnnotationHeadings)
 export(getPubchemCompound)
 export(getPubchemProperties)
 export(getPubchemStatus)
+export(getUnichemSources)
 export(mapCID2Properties)
 export(mapCell2Accession)
 export(mapCompound2CID)
+export(queryUnichem)
 export(standardize_names)
 import(BiocParallel)
 import(data.table)

diff --git a/R/GuideToPharm.R b/R/GuideToPharm.R
@@ -0,0 +1,48 @@
+#' Get data from the Guide to PHARMACOLOGY Database Web Services
+#'
+#' @param ids `character()` or `integer()` Identifiers to query the web
+#'   service with. If excluded, the entire record for the specified service
+#'   is returned.
+#' @param service `character(1)` Which Guide to PHARMACOLOGY web service
+#'   to query. Defaults to 'ligands'. Other options are 'targets', 'interactions',
+#'   'diseases' and 'references'.
+#' @param id_type `character(1)` What type of identifiers are in `ids`? Defaults
+#'   to 'name', for drug name. Other options are 'accession', which accepts
+#'   PubChem CIDs.
+#' @param ... Force subsequent parameters to be named. Not used.
+#'
+#' @return A `data.table` of query results.
+#'
+#' @details
+#' The API reference documentation can be found here:
+#' https://www.guidetopharmacology.org/webServices.jsp
+#'
+#' There is also a Python interface available for querying this API. See:
+#' https://github.com/samirelanduk/pygtop
+#'
+#' @importFrom data.table data.table as.data.table rbindlist setnames
+#' @importFrom jsonlite fromJSON
+#' @importFrom httr RETRY GET status_code
+#'
+#' @export
+# getGuideToPharm <- function(
+#     ids = character(),
+#     service = c("ligands", "targets", "interactions", "diseases", "references"),
+#     id_type = c("name", "accession"),
+#     ...,
+# ){
+
+
+#     checkmate::assert_atomic(ids, any.missing = FALSE, min.len = 1)
+#     checkmate::assert_character(service, len = 1)
+#     checkmate::assert_character(id_type, len = 1)
+
+#     url <- httr2::url_parse("https://www.guidetopharmacology.org/services")
+#     url$path <- .buildURL(url$path, service)
+
+#     opts <- list()
+
+#     opts[id_type] <- paste0(ids, collapse = ",")
+
+# }
+
diff --git a/R/oncotree.R b/R/oncotree.R
@@ -0,0 +1,59 @@
+
+#' Get data from Oncotree API
+#'
+#' This function retrieves data from the Oncotree API based on the specified target.
+#'
+#' @param target A character vector specifying the target data to retrieve. 
+#'              Valid options are "versions", "mainTypes", and "tumorTypes".
+#'
+#' @return A data table containing the retrieved data.
+#'
+#' @noRd 
+#' @keywords internal
+.getRequestOncotree <- function(
+    target = c("versions", "mainTypes", "tumorTypes")
+) {
+
+    url <- "http://oncotree.mskcc.org"
+    targetClean <- match.arg(target)
+    .buildURL(url, "api", targetClean) |> 
+        .build_request() |>
+        .perform_request() |>
+        .parse_resp_json() |> 
+        .asDT()
+}
+#' Get available Oncotree versions
+#'
+#' This function retrieves the available versions of Oncotree.
+#'
+#' @return A `data.table` containing available Oncotree versions.
+#'
+#' @export
+getOncotreeVersions <- function() {
+    .getRequestOncotree(target="versions")
+}
+
+#' Get the main types from the Oncotree database.
+#'
+#' This function retrieves the main types from the Oncotree database.
+#' 
+#' @return A `data.table` containing the main types from the Oncotree database.
+#' 
+#' @export
+getOncotreeMainTypes <- function() {
+    res <- .getRequestOncotree(target="mainTypes") 
+    setnames(res, "mainType")
+    return(res)
+}
+
+
+#' Get the tumor types from the Oncotree database.
+#' 
+#' This function retrieves the tumor types from the Oncotree database.
+#' 
+#' @return A `data.table` containing the tumor types from the Oncotree database.
+#' 
+#' @export
+getOncotreeTumorTypes <- function() {
+    .getRequestOncotree(target="tumorTypes")
+}
diff --git a/R/pubchem_view.R b/R/pubchem_view.R
@@ -113,5 +113,11 @@ annotatePubchemCompound <- function(
     )
   })
 
-  parsed_responses |> unlist()
+  sapply(parsed_responses, .replace_null)
+
 }
+
+# helper function to replace NULL with NA
+.replace_null <- function(x) {
+  ifelse(is.null(x), NA_character_, x)
+}
diff --git a/R/unichem.R b/R/unichem.R
@@ -0,0 +1,126 @@
+
+# Unichem API documentation: https://www.ebi.ac.uk/unichem/info/webservices
+
+
+#' Get the list of sources in UniChem.
+#' 
+#' Returns a `data.table` with the following columns:
+#' - `CompoundCount` (integer): Total of compounds provided by that source
+#' - `BaseURL` (string): Source Base URL for compounds
+#' - `Description` (string): Source database description
+#' - `LastUpdated` (string): Date in which the source database was last updated
+#' - `Name` (string): Short name of the source database
+#' - `NameLabel` (string): Machine readable label name of the source database
+#' - `NameLong` (string): Full name of the source database
+#' - `SourceID` (integer): Unique ID for the source database
+#' - `Details` (string): Notes about the source
+#' - `ReleaseDate` (string): Date in which the source database was released
+#' - `ReleaseNumber` (integer): Release number of the source database data stored in UniChEM
+#' - `URL` (string): Main URL for the source
+#' - `UpdateComments` (string): Notes about the update process of that source to UniChEM
+#' 
+#' 
+#' @return A data.table with the list of sources in UniChem.
+#' 
+#' @export
+getUnichemSources <- function() {
+    funContext <- .funContext("AnnotationGx::getUnichemSources")
+
+    response <- .build_unichem_query("sources") |>
+        .build_request() |>
+        .perform_request() |>
+        .parse_resp_json() 
+
+    if(response$response != "Success"){
+        .err(funContext, "Unichem API request failed.")
+    }
+
+    .debug(funContext, sprintf("Unichem sourceCount: %s", response$totalSources))
+
+    sources_dt <- .asDT(response$sources)
+
+    old_names <- c(
+        "UCICount", "baseIdUrl", "description", "lastUpdated", "name", 
+        "nameLabel", "nameLong", "sourceID", "srcDetails", "srcReleaseDate", 
+        "srcReleaseNumber", "srcUrl", "updateComments")
+
+    new_names <- c(
+        "CompoundCount", "BaseURL", "Description", "LastUpdated", "Name", 
+        "NameLabel", "NameLong", "SourceID", "Details", "ReleaseDate",
+        "ReleaseNumber", "URL", "UpdateComments")
+
+    setnames(sources_dt, old_names, new_names)
+
+    new_order <- c(
+        "Name", "NameLabel", "NameLong", "SourceID", "CompoundCount", 
+        "BaseURL", "URL", "Details",
+        "Description", "ReleaseNumber", "ReleaseDate", "LastUpdated", 
+        "UpdateComments"
+    )
+
+    sources_dt[, ..new_order]
+
+}
+
+#' Query UniChem for a compound.
+#' 
+#' This function queries the UniChem API for a compound based on the provided parameters.
+#' 
+#' @param type `character` The type of compound identifier to search for. Valid types are "uci", "inchi", "inchikey", and "sourceID".
+#' @param compound `character` or `integer` The compound identifier to search for.
+#' @param sourceID `integer` The source ID to search for if the type is "sourceID". Defaults to NULL.
+#' @param request_only `boolean` Whether to return the request only. Defaults to FALSE.
+#' @param raw `boolean` Whether to return the raw response. Defaults to FALSE.
+#' @param ... Additional arguments.
+#' 
+#' @return A list with the external mappings and the UniChem mappings.
+#' 
+#' @examples
+#' queryUnichem(type = "sourceID", compound = "444795", sourceID = 22)
+#' 
+#' @export
+queryUnichem <- function(
+    type, compound, sourceID = NA_integer_, request_only = FALSE, raw = FALSE, ...
+){
+    checkmate::assert_string(type)
+    checkmate::assert_atomic(compound)
+    checkmate::assert_integerish(sourceID)
+    checkmate::assertLogical(request_only)
+    checkmate::assertLogical(raw)
+
+    request <- .build_unichem_compound_req(type, compound, sourceID,...)
+    if(request_only) return(request)
+
+    response <- request |> 
+        .perform_request() |>  
+        .parse_resp_json() 
+
+    if(raw) return(response)
+
+    if(response$response != "Success"){
+        .err("Unichem API request failed.")
+    }
+
+    # Mapping names to be consistent with other API calls
+    mapped_sources_dt <- .asDT(response$compounds$sources)
+    old_names <- c("compoundId", "shortName", "longName", "id", "url")
+    new_names <- c("compoundID", "Name", "NameLong", "sourceID", "sourcURL")
+    setnames(mapped_sources_dt, old = old_names, new = new_names)
+
+    External_Mappings <- mapped_sources_dt[, ..new_names]
+
+    UniChem_Mappings <- list(
+        UniChem.UCI = response$compounds$uci,
+        UniChem.InchiKey = response$compounds$standardInchiKey,
+        UniChem.Inchi = response$compounds$inchi$inchi,
+        UniChem.formula = response$compounds$inchi$formula,
+        UniChem.connections = response$compounds$inchi$connections,
+        UniChem.hAtoms = response$compounds$inchi$hAtoms
+    )
+
+    list(
+        External_Mappings = External_Mappings,
+        UniChem_Mappings = UniChem_Mappings
+    )
+
+}
diff --git a/R/unichem_helpers.R b/R/unichem_helpers.R
@@ -0,0 +1,87 @@
+#' Build a UniChem query URL
+#'
+#' This function builds a UniChem query URL based on the specified endpoint.
+#'
+#' @param endpoint The UniChem endpoint to query (valid options: "compounds", "connectivity", "images", "sources")
+#' @param query_only Logical indicating whether to return only the query URL without building it (default: FALSE)
+#'
+#' @return `httr2::httr2_url` object if `query_only` is TRUE, otherwise the built URL.
+#'
+#' @examples
+#' .build_unichem_query("sources")
+#' .build_unichem_query("connectivity", query_only = TRUE)
+#' 
+#' @noRd
+#' @keywords internal
+.build_unichem_query <- function(
+    endpoint, query_only = FALSE
+) {
+    funContext <- .funContext("AnnotationGx:::.build_unichem_query")
+
+    valid_endpoints <- c("compounds", "connectivity", "images", "sources")
+    checkmate::assert_subset(endpoint, valid_endpoints)
+
+    unichem_api <- "https://www.ebi.ac.uk/unichem/api/v1"
+    url <- httr2::url_parse(unichem_api)
+    url$path <- .buildURL(url$path, endpoint)
+
+    .debug(funContext, "URL: ", capture.output(show(url)))
+
+    if (query_only) return(url)
+
+    return(httr2::url_build(url))
+}
+
+
+#' Build a UniChem compound request
+#'
+#' This function builds a UniChem compound request based on the provided parameters.
+#'
+#' @param type The type of compound identifier to search for. Valid types are "uci", "inchi", "inchikey", and "sourceID".
+#' @param compound The compound identifier to search for.
+#' @param sourceID The source ID to search for if the type is "sourceID". Defaults to NULL.
+#' @param ... Additional arguments.
+#'
+#' @return A `httr2_request`  request object for the UniChem compound query.
+#'
+#' @examples
+#' .build_unichem_compound_req(type = "uci", compound = "538323")
+#' .build_unichem_compound_req(type = "sourceID", sourceID = 22, compound = "2244")
+#' 
+#' @noRd
+#' @keywords internal
+.build_unichem_compound_req <- function(
+    type, compound, sourceID = NULL, ...
+){
+    funContext <- .funContext("AnnotationGx:::.build_unichem_compound_req")
+
+    valid_types <- c("uci", "inchi", "inchikey", "sourceID")
+    checkmate::assert_subset(type, valid_types)
+
+    base_url <- .build_unichem_query("compounds")
+
+    .debug(funContext, "Base URL: ", capture.output(show(base_url)))
+
+    body <- list(
+        type = type,
+        compound = compound
+    )
+
+    body$sourceID <- if (type == "sourceID") {
+        checkmate::assert_integerish(
+            x = sourceID,
+            lower = 1,
+            upper = max(getUnichemSources()$SourceID),
+            len = 1
+            )
+        sourceID
+    } else NULL
+
+
+    request <- base_url |> 
+        .build_request() |>
+        httr2::req_body_json(body) 
+
+    .debug(funContext, "Request: ", capture.output(show(request)))
+    return(request)
+}
diff --git a/man/getOncotreeMainTypes.Rd b/man/getOncotreeMainTypes.Rd
diff --git a/man/getOncotreeTumorTypes.Rd b/man/getOncotreeTumorTypes.Rd