Skip to content

Commit

Permalink
Add pgRNA annotations files
Browse files Browse the repository at this point in the history
  • Loading branch information
cansavvy committed Mar 27, 2024
1 parent b01d765 commit 2789de1
Show file tree
Hide file tree
Showing 3 changed files with 33,193 additions and 6 deletions.
23 changes: 18 additions & 5 deletions R/03-annotate.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,31 @@ gimap_annotate <- function(.data = NULL,
if (!file.exists(annotation_file)) stop("The annotation_file specified cannot be found. Please double check the file path")
}

# TODO: Put the code that annotates the data here!

# This file is from https://depmap.org/portal/download/all/ and from DepMap Public 19Q3 All Files
# Read in from inst/extdata/Achilles_common_essentials.csv

# We'll take a look at the gimap_dataset$pg_ids and see what kinds of gene ids are there
# If we need to do gene conversion we'd do something like:

# biocLite('org.Hs.eg.db')
# mapIds(org.Hs.eg.db, <column of gene IDs>, 'ENTREZID', 'SYMBOL')
# https://github.com/FredHutch/GI_mapping/blob/main/workflow/scripts/02-get_pgRNA_annotations.Rmd

# This file is from https://depmap.org/portal/download/all/ and from DepMap Public 19Q3 All Files
# Essential gene labeling is from inst/extdata/Achilles_common_essentials.csv
# ctrl vs gene labels are from inst/extdata/pgPEN_annotations.txt

# This is the core of the code we'll need but we'll need to refactor
#d.annot <- d.annot %>%
#mutate(norm_ctrl_flag = case_when(
# target_type == "gene_gene" ~ "double_targeting",
# target_type == "gene_ctrl" & gene1_essential_flag == TRUE ~ "positive_control",
# target_type == "ctrl_gene" & gene2_essential_flag == TRUE ~ "positive_control",
# target_type == "gene_ctrl" & gene1_essential_flag != TRUE ~ "single_targeting",
# target_type == "ctrl_gene" & gene2_essential_flag != TRUE ~ "single_targeting",
# target_type == "ctrl_ctrl" ~ "negative_control")) %>%
#mutate(norm_ctrl_flag = factor(norm_ctrl_flag, levels = c("negative_control",
# "positive_control",
# "single_targeting",
# "double_targeting")))

gimap_dataset$annotation <- NULL #TODO: Final step is annotations that line up to the same order as the pg gene data should be stored here.

return(gimap_dataset)
Expand Down
5 changes: 4 additions & 1 deletion R/04-foldchange.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ calc_lfc <- function(.data = NULL,
}
}

# TODO: we need to think about what happens if there are or are not replicates
# TODO: we need to think about what happens if there are or are not timepoints
if (!is.null(timepoints)) {
if (!(timepoints %in% colnames(gimap_dataset$metadata$sample_metadata))) {
stop("The column name specified for 'timepoints' does not exist in gimap_dataset$metadata$sample_metadata")
Expand All @@ -53,6 +53,9 @@ calc_lfc <- function(.data = NULL,
# TODO: Here's where the log fold change calculations and other handling will go based on the code in:
# https://github.com/FredHutch/GI_mapping/blob/main/workflow/scripts/03-filter_and_calculate_LFC.Rmd

# lfc_plasmid_vs_late = log2_cpm - plasmid_log2_cpm,
# lfc_early_vs_late = log2_cpm - early_log2_cpm

gimap_dataset$log_fc <- NULL # TODO: the log fold changes calculated can be returned here

return(gimap_dataset)
Expand Down
Loading

0 comments on commit 2789de1

Please sign in to comment.