bhklab · jjjermiah · Mar 15, 2024 · Mar 15, 2024 · Mar 15, 2024 · Mar 15, 2024
diff --git a/.github/ISSUE_TEMPLATE/issue_template.md b/.github/ISSUE_TEMPLATE/issue_template.md
@@ -0,0 +1,58 @@
+---
+name: Bug Report
+about: Describe the bug in detail
+title: "[BUG] A short description of the bug"
+labels: ''
+assignees: ''
+---
+
+**Note**. Update the issue title to concisely describe the bug.
+
+## Describe the bug
+
+Please provide a clear and concise description of what the bug is.
+
+### Provide a minimally reproducible example (reprex)
+
+Provide a clear and concise description of the bug. It can be easily (but not
+necessarily) illustrated with a minimally reproducible example using the
+[`reprex` package](https://reprex.tidyverse.org/articles/learn-reprex.html).
+
+For tips on creating a reprex, see this
+[StackOverflow link](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example).
+
+## Expected behavior
+
+A clear and concise description of what you expected to happen.
+
+## R Session Information
+
+Please report the output of either `sessionInfo()` or
+`sessioninfo::session_info()` here.
+
+<details>
+
+```R
+options(width = 120)
+## insert session info here
+# sessioninfo::session_info() ## provides GitHub, pandoc, and other details
+# sessionInfo() ## base R function in case you don't want to install sessioninfo
+```
+
+</details>
+
+- [ ] `BiocManager::valid()` is `TRUE`
+
+**Note**. To avoid potential issues with version mixing and reproducibility, do
+not install packages from `GitHub`.
+
+## Additional Context
+
+Provide some additional context for the bug report. You may include web links
+(e.g., from GitHub) to:
+
+* raw code
+* a commit
+* code inside a commit
+* code from an R package
+
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 export(annotatePubchemCompound)
+export(cleanCharacterStrings)
 export(getChemblFilterTypes)
 export(getChemblMechanism)
 export(getChemblResourceFields)
@@ -17,7 +18,7 @@ export(mapCID2Properties)
 export(mapCell2Accession)
 export(mapCompound2CID)
 export(queryChemblAPI)
-export(queryUnichem)
+export(queryUnichemCompound)
 export(standardize_names)
 import(BiocParallel)
 import(data.table)

diff --git a/R/standardize_names.R b/R/standardize_names.R
@@ -34,3 +34,65 @@ standardize_names <- function(object) {
   object <- toupper(object)
   object
 }
+
+
+#' Clean character strings by removing special characters and formatting.
+#'
+#' This function takes a character string as input and performs several cleaning operations
+#' to remove special characters, formatting, and unwanted substrings. The cleaned string
+#' is then returned as the output.
+#'
+#' @param name A character string to be cleaned.
+#' @param space_action A character vector specifying the actions to be taken for space characters.
+#'                     One of c("", "-", " ").
+#' @return The cleaned character string.
+#'
+#' @examples
+#' cleanCharacterStrings("Cisplatin: 1 mg/mL (1.5 mM); 5 mM in DMSO")
+#'
+#' @export
+cleanCharacterStrings <- function(name, space_action = "") {
+
+  # make sure name is a string
+  name <- as.character(name)
+
+  # replace space characters based on space_action
+  if (space_action == "-") {
+    name <- gsub(" ", "-", name)
+  } else if (space_action == " ") {
+    name <- gsub(" ", " ", name)
+  }else{
+    name <- gsub(" ", "", name)
+  }
+
+  # if there is a colon like in "Cisplatin: 1 mg/mL (1.5 mM); 5 mM in DMSO"
+  # remove everything after the colon
+  name <- gsub(":.*", "", name)
+
+  # remove ,  ;  -  +  *  $  %  #  ^  _  as well as any spaces
+  name <- gsub("[\\,\\;\\+\\*\\$\\%\\#\\^\\_]", "", name, perl = TRUE)
+
+  # remove hyphen 
+  if (!space_action == "-")  name <- gsub("-", "", name)
+
+  # remove substring of round brackets and contents
+  name <- gsub("\\s*\\(.*\\)", "", name)
+
+  # remove substring of square brackets and contents
+  name <- gsub("\\s*\\[.*\\]", "", name)
+
+  # remove substring of curly brackets and contents
+  name <- gsub("\\s*\\{.*\\}", "", name)
+
+
+
+  # convert entire string to uppercase
+  name <- toupper(name)
+
+  # dealing with unicode characters 
+  name <- gsub("Unicode", "", iconv(name, "LATIN1", "ASCII", "Unicode"), perl=TRUE)
+
+  name
+}
+
+
diff --git a/R/unichem.R b/R/unichem.R
@@ -1,8 +1,11 @@
 
 # Unichem API documentation: https://www.ebi.ac.uk/unichem/info/webservices
 
-
 #' Get the list of sources in UniChem.
+#' 
+#' @param all_columns `boolean` Whether to return all columns. Defaults to FALSE.
+#' 
+
 #' 
 #' Returns a `data.table` with the following columns:
 #' - `CompoundCount` (integer): Total of compounds provided by that source
@@ -23,7 +26,7 @@
 #' @return A data.table with the list of sources in UniChem.
 #' 
 #' @export
-getUnichemSources <- function() {
+getUnichemSources <- function(all_columns = FALSE) {
     funContext <- .funContext("AnnotationGx::getUnichemSources")
 
     response <- .build_unichem_query("sources") |>
@@ -58,16 +61,21 @@ getUnichemSources <- function() {
         "UpdateComments"
     )
 
-    sources_dt[, new_order, with = FALSE]
+
+    sources_dt <- sources_dt[, new_order, with = FALSE]
+
+    if(all_columns) return(sources_dt)
+
+    sources_dt[, c("Name", "SourceID")]
 
 }
 
 #' Query UniChem for a compound.
 #' 
 #' This function queries the UniChem API for a compound based on the provided parameters.
 #' 
-#' @param type `character` The type of compound identifier to search for. Valid types are "uci", "inchi", "inchikey", and "sourceID".
 #' @param compound `character` or `integer` The compound identifier to search for.
+#' @param type `character` The type of compound identifier to search for. Valid types are "uci", "inchi", "inchikey", and "sourceID".
 #' @param sourceID `integer` The source ID to search for if the type is "sourceID". Defaults to NULL.
 #' @param request_only `boolean` Whether to return the request only. Defaults to FALSE.
 #' @param raw `boolean` Whether to return the raw response. Defaults to FALSE.
@@ -76,11 +84,11 @@ getUnichemSources <- function() {
 #' @return A list with the external mappings and the UniChem mappings.
 #' 
 #' @examples
-#' queryUnichem(type = "sourceID", compound = "444795", sourceID = 22)
+#' queryUnichemCompound(type = "sourceID", compound = "444795", sourceID = 22)
 #' 
 #' @export
-queryUnichem <- function(
-    type, compound, sourceID = NA_integer_, request_only = FALSE, raw = FALSE, ...
+queryUnichemCompound <- function(
+    compound, type, sourceID = NA_integer_, request_only = FALSE, raw = FALSE, ...
 ){
     checkmate::assert_string(type)
     checkmate::assert_atomic(compound)
@@ -105,7 +113,7 @@ queryUnichem <- function(
     mapped_sources_dt <- .asDT(response$compounds$sources)
     old_names <- c("compoundId", "shortName", "longName", "id", "url")
 
-    new_names <- c("compoundID", "Name", "NameLong", "sourceID", "sourcURL")
+    new_names <- c("compoundID", "Name", "NameLong", "sourceID", "sourceURL")
     setnames(mapped_sources_dt, old = old_names, new = new_names)
 
     External_Mappings <- mapped_sources_dt[, new_names, with = FALSE]

diff --git a/R/unichem_helpers.R b/R/unichem_helpers.R
@@ -25,11 +25,12 @@
     url <- httr2::url_parse(unichem_api)
     url$path <- .buildURL(url$path, endpoint)
 
-    .debug(funContext, "URL: ", utils::capture.output(show(url)))
+    output <- httr2::url_build(url)
 
-    if (query_only) return(url)
+    .debug(funContext, "URL: ", output )
 
-    return(httr2::url_build(url))
+    if (query_only) return(url)
+    httr2::url_build(url) 
 }
 
 
@@ -60,7 +61,7 @@
 
     base_url <- .build_unichem_query("compounds")
 
-    .debug(funContext, "Base URL: ", utils::capture.output(show(base_url)))
+    .debug(funContext, "Base URL: ", base_url)
 
     body <- list(
         type = type,
@@ -82,6 +83,6 @@
         .build_request() |>
         httr2::req_body_json(body) 
 
-    .debug(funContext, "Request: ", utils::capture.output(show(request)))
+    .debug(funContext, "Request: ", request)
     return(request)
 }
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -2,3 +2,13 @@ url: https://bhklab.github.io/AnnotationGx/
 template:
   bootstrap: 5
 
+articles:
+- title: Articles
+  navbar: ~
+  contents:
+  - Introduction
+  - Cellosaurus
+  - ChEMBL
+  - OncoTree
+  - PubChemAPI
+  - Unichem
diff --git a/inst/extdata/treatment_pipeline.Rmd b/inst/extdata/treatment_pipeline.Rmd
@@ -37,7 +37,7 @@ sources <- getUnichemSources()
 
 lapply(
   names_to_cids[1:2, cids],
-  queryUnichem,
+  queryUnichemCompound,
   type = "sourceID", 
   sourceID = sources[Name == "pubchem", SourceID]
 )

diff --git a/man/cleanCharacterStrings.Rd b/man/cleanCharacterStrings.Rd
diff --git a/man/getUnichemSources.Rd b/man/getUnichemSources.Rd
diff --git a/man/queryUnichem.Rd → man/queryUnichemCompound.Rd b/man/queryUnichem.Rd → man/queryUnichemCompound.Rd