Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added OncoTree and UniChem functionality #27

Merged
merged 8 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: AnnotationGx
Title: AnnotationGx: A package for building, updating and querying an
annotation database for pharmaco-genomic data
Version: 0.0.0.9076
Version: 0.0.0.9080
Authors@R: c(
person("Jermiah", "Joseph", role = c("aut", "cre"),
email = "jermiah.joseph@gmail.com"),
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,18 @@
export(annotatePubchemCompound)
export(getChemblMechanism)
export(getChemblResourceFields)
export(getOncotreeMainTypes)
export(getOncotreeTumorTypes)
export(getOncotreeVersions)
export(getPubchemAnnotationHeadings)
export(getPubchemCompound)
export(getPubchemProperties)
export(getPubchemStatus)
export(getUnichemSources)
export(mapCID2Properties)
export(mapCell2Accession)
export(mapCompound2CID)
export(queryUnichem)
export(standardize_names)
import(BiocParallel)
import(data.table)
Expand Down
48 changes: 48 additions & 0 deletions R/GuideToPharm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#' Get data from the Guide to PHARMACOLOGY Database Web Services
#'
#' @param ids `character()` or `integer()` Identifiers to query the web
#' service with. If excluded, the entire record for the specified service
#' is returned.
#' @param service `character(1)` Which Guide to PHARMACOLOGY web service
#' to query. Defaults to 'ligands'. Other options are 'targets', 'interactions',
#' 'diseases' and 'references'.
#' @param id_type `character(1)` What type of identifiers are in `ids`? Defaults
#' to 'name', for drug name. Other options are 'accession', which accepts
#' PubChem CIDs.
#' @param ... Force subsequent parameters to be named. Not used.
#'
#' @return A `data.table` of query results.
#'
#' @details
#' The API reference documentation can be found here:
#' https://www.guidetopharmacology.org/webServices.jsp
#'
#' There is also a Python interface available for querying this API. See:
#' https://github.com/samirelanduk/pygtop
#'
#' @importFrom data.table data.table as.data.table rbindlist setnames
#' @importFrom jsonlite fromJSON
#' @importFrom httr RETRY GET status_code
#'
#' @export
# getGuideToPharm <- function(
# ids = character(),
# service = c("ligands", "targets", "interactions", "diseases", "references"),
# id_type = c("name", "accession"),
# ...,
# ){


# checkmate::assert_atomic(ids, any.missing = FALSE, min.len = 1)
# checkmate::assert_character(service, len = 1)
# checkmate::assert_character(id_type, len = 1)

# url <- httr2::url_parse("https://www.guidetopharmacology.org/services")
# url$path <- .buildURL(url$path, service)

# opts <- list()

# opts[id_type] <- paste0(ids, collapse = ",")

# }

59 changes: 59 additions & 0 deletions R/oncotree.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@

#' Get data from Oncotree API
#'
#' This function retrieves data from the Oncotree API based on the specified target.
#'
#' @param target A character vector specifying the target data to retrieve.
#' Valid options are "versions", "mainTypes", and "tumorTypes".
#'
#' @return A data table containing the retrieved data.
#'
#' @noRd
#' @keywords internal
.getRequestOncotree <- function(
target = c("versions", "mainTypes", "tumorTypes")
) {

url <- "http://oncotree.mskcc.org"
targetClean <- match.arg(target)
.buildURL(url, "api", targetClean) |>
.build_request() |>
.perform_request() |>
.parse_resp_json() |>
.asDT()
}
#' Get available Oncotree versions
#'
#' This function retrieves the available versions of Oncotree.
#'
#' @return A `data.table` containing available Oncotree versions.
#'
#' @export
getOncotreeVersions <- function() {
.getRequestOncotree(target="versions")
}

#' Get the main types from the Oncotree database.
#'
#' This function retrieves the main types from the Oncotree database.
#'
#' @return A `data.table` containing the main types from the Oncotree database.
#'
#' @export
getOncotreeMainTypes <- function() {
res <- .getRequestOncotree(target="mainTypes")
setnames(res, "mainType")
return(res)
}


#' Get the tumor types from the Oncotree database.
#'
#' This function retrieves the tumor types from the Oncotree database.
#'
#' @return A `data.table` containing the tumor types from the Oncotree database.
#'
#' @export
getOncotreeTumorTypes <- function() {
.getRequestOncotree(target="tumorTypes")
}
8 changes: 7 additions & 1 deletion R/pubchem_view.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,5 +113,11 @@ annotatePubchemCompound <- function(
)
})

parsed_responses |> unlist()
sapply(parsed_responses, .replace_null)

}

# helper function to replace NULL with NA
.replace_null <- function(x) {
ifelse(is.null(x), NA_character_, x)
}
126 changes: 126 additions & 0 deletions R/unichem.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@

# Unichem API documentation: https://www.ebi.ac.uk/unichem/info/webservices


#' Get the list of sources in UniChem.
#'
#' Returns a `data.table` with the following columns:
#' - `CompoundCount` (integer): Total of compounds provided by that source
#' - `BaseURL` (string): Source Base URL for compounds
#' - `Description` (string): Source database description
#' - `LastUpdated` (string): Date in which the source database was last updated
#' - `Name` (string): Short name of the source database
#' - `NameLabel` (string): Machine readable label name of the source database
#' - `NameLong` (string): Full name of the source database
#' - `SourceID` (integer): Unique ID for the source database
#' - `Details` (string): Notes about the source
#' - `ReleaseDate` (string): Date in which the source database was released
#' - `ReleaseNumber` (integer): Release number of the source database data stored in UniChEM
#' - `URL` (string): Main URL for the source
#' - `UpdateComments` (string): Notes about the update process of that source to UniChEM
#'
#'
#' @return A data.table with the list of sources in UniChem.
#'
#' @export
getUnichemSources <- function() {
funContext <- .funContext("AnnotationGx::getUnichemSources")

response <- .build_unichem_query("sources") |>
.build_request() |>
.perform_request() |>
.parse_resp_json()

if(response$response != "Success"){
.err(funContext, "Unichem API request failed.")

Check warning on line 35 in R/unichem.R

View check run for this annotation

Codecov / codecov/patch

R/unichem.R#L35

Added line #L35 was not covered by tests
}

.debug(funContext, sprintf("Unichem sourceCount: %s", response$totalSources))

sources_dt <- .asDT(response$sources)

old_names <- c(
"UCICount", "baseIdUrl", "description", "lastUpdated", "name",
"nameLabel", "nameLong", "sourceID", "srcDetails", "srcReleaseDate",
"srcReleaseNumber", "srcUrl", "updateComments")

new_names <- c(
"CompoundCount", "BaseURL", "Description", "LastUpdated", "Name",
"NameLabel", "NameLong", "SourceID", "Details", "ReleaseDate",
"ReleaseNumber", "URL", "UpdateComments")

setnames(sources_dt, old_names, new_names)

new_order <- c(
"Name", "NameLabel", "NameLong", "SourceID", "CompoundCount",
"BaseURL", "URL", "Details",
"Description", "ReleaseNumber", "ReleaseDate", "LastUpdated",
"UpdateComments"
)

sources_dt[, ..new_order]

}

#' Query UniChem for a compound.
#'
#' This function queries the UniChem API for a compound based on the provided parameters.
#'
#' @param type `character` The type of compound identifier to search for. Valid types are "uci", "inchi", "inchikey", and "sourceID".
#' @param compound `character` or `integer` The compound identifier to search for.
#' @param sourceID `integer` The source ID to search for if the type is "sourceID". Defaults to NULL.
#' @param request_only `boolean` Whether to return the request only. Defaults to FALSE.
#' @param raw `boolean` Whether to return the raw response. Defaults to FALSE.
#' @param ... Additional arguments.
#'
#' @return A list with the external mappings and the UniChem mappings.
#'
#' @examples
#' queryUnichem(type = "sourceID", compound = "444795", sourceID = 22)
#'
#' @export
queryUnichem <- function(
type, compound, sourceID = NA_integer_, request_only = FALSE, raw = FALSE, ...
){
checkmate::assert_string(type)
checkmate::assert_atomic(compound)
checkmate::assert_integerish(sourceID)
checkmate::assertLogical(request_only)
checkmate::assertLogical(raw)

request <- .build_unichem_compound_req(type, compound, sourceID,...)
if(request_only) return(request)

Check warning on line 92 in R/unichem.R

View check run for this annotation

Codecov / codecov/patch

R/unichem.R#L92

Added line #L92 was not covered by tests

response <- request |>
.perform_request() |>
.parse_resp_json()

if(raw) return(response)

if(response$response != "Success"){
.err("Unichem API request failed.")
}

# Mapping names to be consistent with other API calls
mapped_sources_dt <- .asDT(response$compounds$sources)
old_names <- c("compoundId", "shortName", "longName", "id", "url")
new_names <- c("compoundID", "Name", "NameLong", "sourceID", "sourcURL")
setnames(mapped_sources_dt, old = old_names, new = new_names)

External_Mappings <- mapped_sources_dt[, ..new_names]

UniChem_Mappings <- list(
UniChem.UCI = response$compounds$uci,
UniChem.InchiKey = response$compounds$standardInchiKey,
UniChem.Inchi = response$compounds$inchi$inchi,
UniChem.formula = response$compounds$inchi$formula,
UniChem.connections = response$compounds$inchi$connections,
UniChem.hAtoms = response$compounds$inchi$hAtoms
)

list(
External_Mappings = External_Mappings,
UniChem_Mappings = UniChem_Mappings
)

}
87 changes: 87 additions & 0 deletions R/unichem_helpers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#' Build a UniChem query URL
#'
#' This function builds a UniChem query URL based on the specified endpoint.
#'
#' @param endpoint The UniChem endpoint to query (valid options: "compounds", "connectivity", "images", "sources")
#' @param query_only Logical indicating whether to return only the query URL without building it (default: FALSE)
#'
#' @return `httr2::httr2_url` object if `query_only` is TRUE, otherwise the built URL.
#'
#' @examples
#' .build_unichem_query("sources")
#' .build_unichem_query("connectivity", query_only = TRUE)
#'
#' @noRd
#' @keywords internal
.build_unichem_query <- function(
endpoint, query_only = FALSE
) {
funContext <- .funContext("AnnotationGx:::.build_unichem_query")

valid_endpoints <- c("compounds", "connectivity", "images", "sources")
checkmate::assert_subset(endpoint, valid_endpoints)

unichem_api <- "https://www.ebi.ac.uk/unichem/api/v1"
url <- httr2::url_parse(unichem_api)
url$path <- .buildURL(url$path, endpoint)

.debug(funContext, "URL: ", capture.output(show(url)))

if (query_only) return(url)

return(httr2::url_build(url))
}


#' Build a UniChem compound request
#'
#' This function builds a UniChem compound request based on the provided parameters.
#'
#' @param type The type of compound identifier to search for. Valid types are "uci", "inchi", "inchikey", and "sourceID".
#' @param compound The compound identifier to search for.
#' @param sourceID The source ID to search for if the type is "sourceID". Defaults to NULL.
#' @param ... Additional arguments.
#'
#' @return A `httr2_request` request object for the UniChem compound query.
#'
#' @examples
#' .build_unichem_compound_req(type = "uci", compound = "538323")
#' .build_unichem_compound_req(type = "sourceID", sourceID = 22, compound = "2244")
#'
#' @noRd
#' @keywords internal
.build_unichem_compound_req <- function(
type, compound, sourceID = NULL, ...
){
funContext <- .funContext("AnnotationGx:::.build_unichem_compound_req")

valid_types <- c("uci", "inchi", "inchikey", "sourceID")
checkmate::assert_subset(type, valid_types)

base_url <- .build_unichem_query("compounds")

.debug(funContext, "Base URL: ", capture.output(show(base_url)))

body <- list(
type = type,
compound = compound
)

body$sourceID <- if (type == "sourceID") {
checkmate::assert_integerish(
x = sourceID,
lower = 1,
upper = max(getUnichemSources()$SourceID),
len = 1
)
sourceID
} else NULL


request <- base_url |>
.build_request() |>
httr2::req_body_json(body)

.debug(funContext, "Request: ", capture.output(show(request)))
return(request)
}
14 changes: 14 additions & 0 deletions man/getOncotreeMainTypes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions man/getOncotreeTumorTypes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading