Skip to content

Commit

Permalink
Merge pull request #37 from bhklab/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
jjjermiah authored Mar 15, 2024
2 parents 7b85bf7 + 88d5279 commit 31b2558
Show file tree
Hide file tree
Showing 15 changed files with 512 additions and 62 deletions.
58 changes: 58 additions & 0 deletions .github/ISSUE_TEMPLATE/issue_template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
---
name: Bug Report
about: Describe the bug in detail
title: "[BUG] A short description of the bug"
labels: ''
assignees: ''
---

**Note**. Update the issue title to concisely describe the bug.

## Describe the bug

Please provide a clear and concise description of what the bug is.

### Provide a minimally reproducible example (reprex)

Provide a clear and concise description of the bug. It can be easily (but not
necessarily) illustrated with a minimally reproducible example using the
[`reprex` package](https://reprex.tidyverse.org/articles/learn-reprex.html).

For tips on creating a reprex, see this
[StackOverflow link](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example).

## Expected behavior

A clear and concise description of what you expected to happen.

## R Session Information

Please report the output of either `sessionInfo()` or
`sessioninfo::session_info()` here.

<details>

```R
options(width = 120)
## insert session info here
# sessioninfo::session_info() ## provides GitHub, pandoc, and other details
# sessionInfo() ## base R function in case you don't want to install sessioninfo
```

</details>

- [ ] `BiocManager::valid()` is `TRUE`

**Note**. To avoid potential issues with version mixing and reproducibility, do
not install packages from `GitHub`.

## Additional Context

Provide some additional context for the bug report. You may include web links
(e.g., from GitHub) to:

* raw code
* a commit
* code inside a commit
* code from an R package

3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(annotatePubchemCompound)
export(cleanCharacterStrings)
export(getChemblFilterTypes)
export(getChemblMechanism)
export(getChemblResourceFields)
Expand All @@ -17,7 +18,7 @@ export(mapCID2Properties)
export(mapCell2Accession)
export(mapCompound2CID)
export(queryChemblAPI)
export(queryUnichem)
export(queryUnichemCompound)
export(standardize_names)
import(BiocParallel)
import(data.table)
Expand Down
62 changes: 62 additions & 0 deletions R/standardize_names.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,65 @@ standardize_names <- function(object) {
object <- toupper(object)
object
}


#' Clean character strings by removing special characters and formatting.
#'
#' This function takes a character string as input and performs several cleaning operations
#' to remove special characters, formatting, and unwanted substrings. The cleaned string
#' is then returned as the output.
#'
#' @param name A character string to be cleaned.
#' @param space_action A character vector specifying the actions to be taken for space characters.
#' One of c("", "-", " ").
#' @return The cleaned character string.
#'
#' @examples
#' cleanCharacterStrings("Cisplatin: 1 mg/mL (1.5 mM); 5 mM in DMSO")
#'
#' @export
cleanCharacterStrings <- function(name, space_action = "") {

# make sure name is a string
name <- as.character(name)

# replace space characters based on space_action
if (space_action == "-") {
name <- gsub(" ", "-", name)
} else if (space_action == " ") {
name <- gsub(" ", " ", name)
}else{
name <- gsub(" ", "", name)
}

# if there is a colon like in "Cisplatin: 1 mg/mL (1.5 mM); 5 mM in DMSO"
# remove everything after the colon
name <- gsub(":.*", "", name)

# remove , ; - + * $ % # ^ _ as well as any spaces
name <- gsub("[\\,\\;\\+\\*\\$\\%\\#\\^\\_]", "", name, perl = TRUE)

# remove hyphen
if (!space_action == "-") name <- gsub("-", "", name)

# remove substring of round brackets and contents
name <- gsub("\\s*\\(.*\\)", "", name)

# remove substring of square brackets and contents
name <- gsub("\\s*\\[.*\\]", "", name)

# remove substring of curly brackets and contents
name <- gsub("\\s*\\{.*\\}", "", name)



# convert entire string to uppercase
name <- toupper(name)

# dealing with unicode characters
name <- gsub("Unicode", "", iconv(name, "LATIN1", "ASCII", "Unicode"), perl=TRUE)

name
}


24 changes: 16 additions & 8 deletions R/unichem.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@

# Unichem API documentation: https://www.ebi.ac.uk/unichem/info/webservices


#' Get the list of sources in UniChem.
#'
#' @param all_columns `boolean` Whether to return all columns. Defaults to FALSE.
#'

#'
#' Returns a `data.table` with the following columns:
#' - `CompoundCount` (integer): Total of compounds provided by that source
Expand All @@ -23,7 +26,7 @@
#' @return A data.table with the list of sources in UniChem.
#'
#' @export
getUnichemSources <- function() {
getUnichemSources <- function(all_columns = FALSE) {
funContext <- .funContext("AnnotationGx::getUnichemSources")

response <- .build_unichem_query("sources") |>
Expand Down Expand Up @@ -58,16 +61,21 @@ getUnichemSources <- function() {
"UpdateComments"
)

sources_dt[, new_order, with = FALSE]

sources_dt <- sources_dt[, new_order, with = FALSE]

if(all_columns) return(sources_dt)

sources_dt[, c("Name", "SourceID")]

}

#' Query UniChem for a compound.
#'
#' This function queries the UniChem API for a compound based on the provided parameters.
#'
#' @param type `character` The type of compound identifier to search for. Valid types are "uci", "inchi", "inchikey", and "sourceID".
#' @param compound `character` or `integer` The compound identifier to search for.
#' @param type `character` The type of compound identifier to search for. Valid types are "uci", "inchi", "inchikey", and "sourceID".
#' @param sourceID `integer` The source ID to search for if the type is "sourceID". Defaults to NULL.
#' @param request_only `boolean` Whether to return the request only. Defaults to FALSE.
#' @param raw `boolean` Whether to return the raw response. Defaults to FALSE.
Expand All @@ -76,11 +84,11 @@ getUnichemSources <- function() {
#' @return A list with the external mappings and the UniChem mappings.
#'
#' @examples
#' queryUnichem(type = "sourceID", compound = "444795", sourceID = 22)
#' queryUnichemCompound(type = "sourceID", compound = "444795", sourceID = 22)
#'
#' @export
queryUnichem <- function(
type, compound, sourceID = NA_integer_, request_only = FALSE, raw = FALSE, ...
queryUnichemCompound <- function(
compound, type, sourceID = NA_integer_, request_only = FALSE, raw = FALSE, ...
){
checkmate::assert_string(type)
checkmate::assert_atomic(compound)
Expand All @@ -105,7 +113,7 @@ queryUnichem <- function(
mapped_sources_dt <- .asDT(response$compounds$sources)
old_names <- c("compoundId", "shortName", "longName", "id", "url")

new_names <- c("compoundID", "Name", "NameLong", "sourceID", "sourcURL")
new_names <- c("compoundID", "Name", "NameLong", "sourceID", "sourceURL")
setnames(mapped_sources_dt, old = old_names, new = new_names)

External_Mappings <- mapped_sources_dt[, new_names, with = FALSE]
Expand Down
11 changes: 6 additions & 5 deletions R/unichem_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@
url <- httr2::url_parse(unichem_api)
url$path <- .buildURL(url$path, endpoint)

.debug(funContext, "URL: ", utils::capture.output(show(url)))
output <- httr2::url_build(url)

if (query_only) return(url)
.debug(funContext, "URL: ", output )

return(httr2::url_build(url))
if (query_only) return(url)
httr2::url_build(url)
}


Expand Down Expand Up @@ -60,7 +61,7 @@

base_url <- .build_unichem_query("compounds")

.debug(funContext, "Base URL: ", utils::capture.output(show(base_url)))
.debug(funContext, "Base URL: ", base_url)

body <- list(
type = type,
Expand All @@ -82,6 +83,6 @@
.build_request() |>
httr2::req_body_json(body)

.debug(funContext, "Request: ", utils::capture.output(show(request)))
.debug(funContext, "Request: ", request)
return(request)
}
10 changes: 10 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,13 @@ url: https://bhklab.github.io/AnnotationGx/
template:
bootstrap: 5

articles:
- title: Articles
navbar: ~
contents:
- Introduction
- Cellosaurus
- ChEMBL
- OncoTree
- PubChemAPI
- Unichem
2 changes: 1 addition & 1 deletion inst/extdata/treatment_pipeline.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ sources <- getUnichemSources()
lapply(
names_to_cids[1:2, cids],
queryUnichem,
queryUnichemCompound,
type = "sourceID",
sourceID = sources[Name == "pubchem", SourceID]
)
Expand Down
26 changes: 26 additions & 0 deletions man/cleanCharacterStrings.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 10 additions & 5 deletions man/getUnichemSources.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 7 additions & 7 deletions man/queryUnichem.Rd → man/queryUnichemCompound.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 31b2558

Please sign in to comment.