diff --git a/.gitignore b/.gitignore index f8c3acd..74d287f 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,5 @@ inst/doc git_token.txt /doc/ /Meta/ +inst/extdata/CCLE_gene_cn.csv +inst/extdata/CCLE_expression.csv diff --git a/DESCRIPTION b/DESCRIPTION index 0e00fe7..65ae402 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,6 +30,6 @@ Suggests: roxygen2, Config/testthat/edition: 3 Encoding: UTF-8 -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 LazyData: true VignetteBuilder: knitr diff --git a/R/03-annotate.R b/R/03-annotate.R index 6c6d356..9c53e6a 100644 --- a/R/03-annotate.R +++ b/R/03-annotate.R @@ -2,8 +2,10 @@ #' @description In this function, a `gimap_dataset` is annotated as far as which genes should be used as controls. #' @param .data Data can be piped in with %>% or |> from function to function. But the data must still be a gimap_dataset #' @param gimap_dataset A special dataset structure that is setup using the `setup_data()` function. -#' @param gene_id_type Specify what kind of gene IDs are specified in the `pg_ids`. By default will assume gene symbol. -#' @param control_genes A list of genes that should be labeled as control genes. These will be used for log fold change calculations. +#' @param cell_line which cell line are you using? Default is "HELA" +#' @param cn_annotate TRUE or FALSE you'd also like to have Copy number annotation from DepMap. These data are optional +#' @param annotation_file If no file is given, will attempt to use the design file from https://media.addgene.org/cms/filer_public/a9/9a/a99a9328-324b-42ff-8ccc-30c544b899e4/pgrna_library.xlsx +#' @param control_genes A vector of gene symbols (e.g. AAMP) that should be labeled as control genes. These will be used for log fold change calculations. If no list is given then DepMap Public 23Q4 Achilles_common_essentials.csv is used https://depmap.org/portal/download/all/ #' @export #' @examples \dontrun{ #' @@ -17,46 +19,196 @@ #' gimap_annotate() #' #' # To see anotations -#' gimap_dataset$annotations +#' gimap_dataset$annotation #' } gimap_annotate <- function(.data = NULL, gimap_dataset, + cell_line = "HELA", + control_genes = NULL, + cn_annotate = TRUE, annotation_file = NULL) { if (!is.null(.data)) gimap_dataset <- .data if (!("gimap_dataset" %in% class(gimap_dataset))) stop("This function only works with gimap_dataset objects which can be made with the setup_data() function.") + # Get the annotation data based on the pg construct design if (!is.null(annotation_file)) { if (!file.exists(annotation_file)) stop("The annotation_file specified cannot be found. Please double check the file path") + annotation_df <- read_table(annotation_file) + } else { + annotation_df <- get_example_data("annotation") } - # We'll take a look at the gimap_dataset$pg_ids and see what kinds of gene ids are there - # If we need to do gene conversion we'd do something like: - - # biocLite('org.Hs.eg.db') - # mapIds(org.Hs.eg.db, , 'ENTREZID', 'SYMBOL') - # https://github.com/FredHutch/GI_mapping/blob/main/workflow/scripts/02-get_pgRNA_annotations.Rmd - - # This file is from https://depmap.org/portal/download/all/ and from DepMap Public 19Q3 All Files - # Essential gene labeling is from inst/extdata/Achilles_common_essentials.csv - # ctrl vs gene labels are from inst/extdata/pgPEN_annotations.txt - - # This is the core of the code we'll need but we'll need to refactor - #d.annot <- d.annot %>% - #mutate(norm_ctrl_flag = case_when( - # target_type == "gene_gene" ~ "double_targeting", - # target_type == "gene_ctrl" & gene1_essential_flag == TRUE ~ "positive_control", - # target_type == "ctrl_gene" & gene2_essential_flag == TRUE ~ "positive_control", - # target_type == "gene_ctrl" & gene1_essential_flag != TRUE ~ "single_targeting", - # target_type == "ctrl_gene" & gene2_essential_flag != TRUE ~ "single_targeting", - # target_type == "ctrl_ctrl" ~ "negative_control")) %>% - #mutate(norm_ctrl_flag = factor(norm_ctrl_flag, levels = c("negative_control", - # "positive_control", - # "single_targeting", - # "double_targeting"))) - - gimap_dataset$annotation <- NULL #TODO: Final step is annotations that line up to the same order as the pg gene data should be stored here. + ############################ CONTROL GENE ANNOTATION ######################### + # If control genes aren't provided then we get some from DepMap + if (!is.null(control_genes)) { + if (!file.exists(control_genes)) stop("The annotation_file specified cannot be found. Please double check the file path") + control_genes <- read_table(control_genes)[, 1] + } else { + # This file is from https://depmap.org/portal/download/all/ and from DepMap Public 19Q3 All Files + # Essential gene labeling is from inst/extdata/Achilles_common_essentials.csv + control_genes <- readr::read_tsv("https://figshare.com/ndownloader/files/40448429", show_col_types = FALSE) + control_genes <- control_genes %>% + tidyr::separate(col = Gene, into = c("gene_symbol", "entrez_id"), remove = FALSE, extra = "drop") %>% + dplyr::pull(gene_symbol) + } + + ############################ Get TPM data #################################### + # This is not optional because its used to flag things + ## get TPM and CN information (w/ option for user to upload their own info) + depmap_metadata <- readr::read_csv("https://figshare.com/ndownloader/files/35020903", show_col_types = FALSE) + + my_depmap_id <- depmap_metadata %>% + dplyr::filter(stripped_cell_line_name == cell_line) %>% + dplyr::pull(DepMap_ID) + + tpm_file <- file.path(system.file("extdata", package = "gimap"), "CCLE_expression.csv") + + if (!file.exists(tpm_file)) tpm_setup() + + depmap_tpm <- readr::read_csv(tpm_file, + show_col_types = FALSE, + col_select = c("genes", dplyr::all_of(my_depmap_id)) + ) %>% + dplyr::rename(log2_tpm = my_depmap_id) %>% + dplyr::mutate(expressed_flag = dplyr::case_when( + log2_tpm < 1 ~ FALSE, + log2_tpm >= 1 ~ TRUE, + is.na(log2_tpm) ~ NA + )) + + ############################ COPY NUMBER ANNOTATION ########################## + if (cn_annotate) { + + cn_file <- file.path(system.file("extdata", package = "gimap"), "CCLE_gene_cn.csv") + if (!file.exists(cn_file)) cn_setup() + + # Read in the CN data + depmap_cn <- readr::read_csv(cn_file, + show_col_types = FALSE, + col_select = c("genes", my_depmap_id) + ) %>% + dplyr::rename(log2_cn = my_depmap_id) + + annotation_df <- annotation_df %>% + dplyr::left_join(depmap_cn, by = c("gene1_symbol" = "genes")) %>% + dplyr::left_join(depmap_cn, by = c("gene2_symbol" = "genes"), suffix = c("_gene1", "_gene2")) + } + + ############################ ANNOTATION COMBINING ############################ + # This set up is more or less the same as the original + # https://github.com/FredHutch/GI_mapping/blob/41ac7d5ed7025252343e2c823fba22f8a363e25c/workflow/scripts/02-get_pgRNA_annotations.Rmd#L435 + annotation_df <- annotation_df %>% + dplyr::mutate( + gene1_essential_flag = gene1_symbol %in% control_genes, + gene2_essential_flag = gene2_symbol %in% control_genes) %>% + dplyr::left_join(depmap_tpm, by = c("gene1_symbol" = "genes")) %>% + dplyr::rename(gene1_expressed_flag = expressed_flag) %>% + dplyr::left_join(depmap_tpm, by = c("gene2_symbol" = "genes"), suffix = c("_gene1", "_gene2")) %>% + dplyr::rename(gene2_expressed_flag = expressed_flag) %>% + dplyr::mutate(norm_ctrl_flag = dplyr::case_when( + target_type == "gene_gene" ~ "double_targeting", + target_type == "gene_ctrl" & gene1_essential_flag == TRUE ~ "positive_control", + target_type == "ctrl_gene" & gene2_essential_flag == TRUE ~ "positive_control", + target_type == "gene_ctrl" & gene1_essential_flag != TRUE ~ "single_targeting", + target_type == "ctrl_gene" & gene2_essential_flag != TRUE ~ "single_targeting", + target_type == "ctrl_ctrl" ~ "negative_control" + )) %>% + dplyr::mutate(norm_ctrl_flag = factor(norm_ctrl_flag, levels = c( + "negative_control", + "positive_control", + "single_targeting", + "double_targeting" + ))) + + ################################ STORE IT #################################### + gimap_dataset$annotation <- annotation_df return(gimap_dataset) } + + +# This function sets up the tpm data from DepMap is called by the `gimap_annotate()` function +tpm_setup <- function() { + tpm_file <- file.path( + system.file("extdata", package = "gimap"), + "CCLE_expression.csv" + ) + + download.file("https://figshare.com/ndownloader/files/34989919", + destfile = tpm_file, + method = "wget" + ) + + data_df <- readr::read_csv(tpm_file, + show_col_types = FALSE, + name_repair = make.names + ) + + cell_line_ids <- data_df$X + + genes <- stringr::word(colnames(data_df)[-1], sep = "\\.\\.", 1) + + colnames(data_df) <- c("cell_line_ids", genes) + + data_df <- as.data.frame(t(data_df[, -1])) + colnames(data_df) <- cell_line_ids + data_df$genes <- genes + + data_df %>% + dplyr::select(genes, dplyr::everything()) %>% + readr::write_csv(tpm_file) + + return(tpm_file) +} + +# This function sets up the tpm data from DepMap is called by the `gimap_annotate()` function if the cn_annotate = TRUE +cn_setup <- function() { + cn_file <- file.path( + system.file("extdata", package = "gimap"), + "CCLE_gene_cn.csv" + ) + + download.file("https://figshare.com/ndownloader/files/34989937", + destfile = cn_file, + method = "wget" + ) + + data_df <- readr::read_csv(cn_file, + show_col_types = FALSE, + name_repair = make.names + ) + + cell_line_ids <- data_df$X + + genes <- stringr::word(colnames(data_df)[-1], sep = "\\.\\.", 1) + + colnames(data_df) <- c("cell_line_ids", genes) + + data_df <- as.data.frame(t(data_df[, -1])) + colnames(data_df) <- cell_line_ids + data_df$genes <- genes + + data_df %>% + dplyr::select(genes, dplyr::everything()) %>% + readr::write_csv(cn_file) + + return(cn_file) +} + +# This function sets up the control genes file from DepMap is called by the `gimap_annotate()` +crtl_genes <- function() { + crtl_genes_file <- file.path( + system.file("extdata", package = "gimap"), + "Achilles_common_essentials.csv" + ) + + download.file("https://figshare.com/ndownloader/files/34989871", + destfile = crtl_genes_file, + method = "wget" + ) + + return(crtl_genes_file) +} + diff --git a/R/utils.R b/R/utils.R index f896eb4..b9257fb 100644 --- a/R/utils.R +++ b/R/utils.R @@ -31,8 +31,16 @@ get_example_data <- function(which_data) { full.names = TRUE ) return(readr::read_rds(file)) + } else if (which_data == "annotation") { + file <- list.files( + pattern = "pgPEN_annotations.txt", + recursive = TRUE, + system.file("extdata", package = "gimap"), + full.names = TRUE + ) + return(readr::read_tsv(file, show_col_types = FALSE)) } else { - stop("Specification for `which_data` not understood; Need to use 'gimap', count', or 'meta'") + stop("Specification for `which_data` not understood; Need to use 'gimap', count', 'meta', or 'annotation' ") } } @@ -116,6 +124,7 @@ example_data_folder <- function() { dirname(file) } +# This function sets up the example count data save_example_data <- function() { example_data <- get_example_data("count") diff --git a/inst/extdata/Achilles_common_essentials.csv b/inst/extdata/Achilles_common_essentials.csv deleted file mode 100644 index 94185ab..0000000 --- a/inst/extdata/Achilles_common_essentials.csv +++ /dev/null @@ -1,2150 +0,0 @@ -gene -AAAS (8086) -AAMP (14) -AARS (16) -AARS2 (57505) -AASDHPPT (60496) -AATF (26574) -ABCB10 (23456) -ABCB7 (22) -ABCE1 (6059) -ABCF1 (23) -ABCG1 (9619) -ABHD11 (83451) -ABHD17A (81926) -ABT1 (29777) -ACD (65057) -ACIN1 (22985) -ACOT12 (134526) -ACTB (60) -ACTL6A (86) -ACTR10 (55860) -ACTR1A (10121) -ACTR1B (10120) -ACTR2 (10097) -ACTR3 (10096) -ACTR6 (64431) -ACTR8 (93973) -ADAT2 (134637) -ADAT3 (113179) -ADRM1 (11047) -ADSL (158) -AFG3L2 (10939) -AGAP5 (729092) -AGAP6 (414189) -AHCTF1 (25909) -AHCY (191) -AIFM1 (9131) -AK6 (102157402) -AKIRIN2 (55122) -ALDOA (226) -ALG1 (56052) -ALG11 (440138) -ALG13 (79868) -ALG14 (199857) -ALG1L (200810) -ALG2 (85365) -ALYREF (10189) -ANAPC1 (64682) -ANAPC10 (10393) -ANAPC11 (51529) -ANAPC13 (25847) -ANAPC15 (25906) -ANAPC2 (29882) -ANAPC4 (29945) -ANAPC5 (51433) -ANKLE2 (23141) -ANKRD11 (29123) -ANKRD20A1 (84210) -ANKRD36B (57730) -ANKRD49 (54851) -ANKS6 (203286) -ANLN (54443) -AP2M1 (1173) -AP2S1 (1175) -AQP7 (364) -AQR (9716) -ARCN1 (372) -ARF4 (378) -ARFRP1 (10139) -ARGFX (503582) -ARGLU1 (55082) -ARHGAP11B (89839) -ARID3A (1820) -ARIH1 (25820) -ARL2 (402) -ARL4D (379) -ARMC6 (93436) -ARMC7 (79637) -ARPC2 (10109) -ARPC3 (10094) -ARPC4 (10093) -ASF1B (55723) -ASNA1 (439) -ATF4 (468) -ATL2 (64225) -ATP1A1 (476) -ATP2A2 (488) -ATP5F1A (498) -ATP5F1B (506) -ATP5F1C (509) -ATP5F1D (513) -ATP5F1E (514) -ATP5MC1 (516) -ATP5ME (521) -ATP5MF (9551) -ATP5MG (10632) -ATP5PB (515) -ATP5PO (539) -ATP6AP1 (537) -ATP6AP2 (10159) -ATP6V0B (533) -ATP6V0C (527) -ATP6V0D1 (9114) -ATP6V1A (523) -ATP6V1B2 (526) -ATP6V1C1 (528) -ATP6V1D (51382) -ATP6V1E1 (529) -ATP6V1F (9296) -ATP6V1G1 (9550) -ATP6V1H (51606) -ATR (545) -ATRIP (84126) -AURKA (6790) -AURKAIP1 (54998) -AURKB (9212) -BANF1 (8815) -BANP (54971) -BAP1 (8314) -BARD1 (580) -BCAR1 (9564) -BCAS2 (10286) -BCL2L1 (598) -BCLAF1 (9774) -BCS1L (617) -BDP1 (55814) -BET1 (10282) -BIRC5 (332) -BMS1 (9790) -BNIP1 (662) -BOP1 (23246) -BORA (79866) -BORCS5 (118426) -BPTF (2186) -BRCA1 (672) -BRCA2 (675) -BRD4 (23476) -BRD8 (10902) -BRF1 (2972) -BRF2 (55290) -BRIP1 (83990) -BRIX1 (55299) -BRK1 (55845) -BTF3 (689) -BTF3L4 (91408) -BUB1 (699) -BUB1B (701) -BUB3 (9184) -BUD13 (84811) -BUD23 (114049) -BUD31 (8896) -BYSL (705) -C12orf45 (121053) -C16orf86 (388284) -C17orf58 (284018) -C19orf53 (28974) -C1D (10438) -C1QBP (708) -C1QTNF4 (114900) -C1orf109 (54955) -C6orf15 (29113) -C7orf26 (79034) -C7orf50 (84310) -C9orf16 (79095) -C9orf78 (51759) -CA5A (763) -CACNB3 (784) -CACTIN (58509) -CADM4 (199731) -CAMLG (819) -CAP1 (10487) -CAPZB (832) -CARS (833) -CBLL1 (79872) -CBLN1 (869) -CBWD2 (150472) -CBWD3 (445571) -CBWD5 (220869) -CBX1 (10951) -CCAR1 (55749) -CCDC115 (84317) -CCDC130 (81576) -CCDC137 (339230) -CCDC144A (9720) -CCDC144NL (339184) -CCDC174 (51244) -CCDC59 (29080) -CCDC78 (124093) -CCDC84 (338657) -CCDC86 (79080) -CCNA2 (890) -CCNB1 (891) -CCND1 (595) -CCNH (902) -CCNK (8812) -CCNL1 (57018) -CCP110 (9738) -CCT2 (10576) -CCT3 (7203) -CCT4 (10575) -CCT5 (22948) -CCT6A (908) -CCT7 (10574) -CCT8 (10694) -CD2BP2 (10421) -CD3EAP (10849) -CD8B (926) -CDAN1 (146059) -CDC123 (8872) -CDC16 (8881) -CDC20 (991) -CDC23 (8697) -CDC26 (246184) -CDC27 (996) -CDC37 (11140) -CDC40 (51362) -CDC42 (998) -CDC45 (8318) -CDC5L (988) -CDC6 (990) -CDC7 (8317) -CDC73 (79577) -CDCA5 (113130) -CDCA8 (55143) -CDIPT (10423) -CDK1 (983) -CDK11A (728642) -CDK2 (1017) -CDK7 (1022) -CDK9 (1025) -CDT1 (81620) -CEBPZ (10153) -CENPA (1058) -CENPC (1060) -CENPE (1062) -CENPH (64946) -CENPI (2491) -CENPK (64105) -CENPN (55839) -CENPT (80152) -CENPW (387103) -CENPX (201254) -CEP131 (22994) -CEP152 (22995) -CEP192 (55125) -CEP68 (23177) -CEP85 (64793) -CEP97 (79598) -CFAP20 (29105) -CFAP298 (56683) -CFDP1 (10428) -CFL1 (1072) -CHAF1A (10036) -CHAF1B (8208) -CHCHD1 (118487) -CHCHD2 (51142) -CHCHD4 (131474) -CHD4 (1108) -CHEK1 (1111) -CHERP (10523) -CHMP1A (5119) -CHMP2A (27243) -CHMP3 (51652) -CHMP4B (128866) -CHMP5 (51510) -CHMP6 (79643) -CHMP7 (91782) -CHORDC1 (26973) -CHTF18 (63922) -CHTF8 (54921) -CHTOP (26097) -CIAO1 (9391) -CIAO2B (51647) -CIAO3 (64428) -CIAPIN1 (57019) -CINP (51550) -CKAP5 (9793) -CLASRP (11129) -CLCC1 (23155) -CLK2 (1196) -CLNS1A (1207) -CLP1 (10978) -CLSPN (63967) -CLTC (1213) -CMPK1 (51727) -CMTR1 (23070) -CNIH4 (29097) -CNN2 (1265) -CNOT1 (23019) -CNOT2 (4848) -CNOT3 (4849) -CNOT9 (9125) -COA5 (493753) -COA7 (65260) -COASY (80347) -COG1 (9382) -COG2 (22796) -COG3 (83548) -COG4 (25839) -COG6 (57511) -COG8 (84342) -COPA (1314) -COPB1 (1315) -COPB2 (9276) -COPE (11316) -COPG1 (22820) -COPS2 (9318) -COPS3 (8533) -COPS4 (51138) -COPS5 (10987) -COPS6 (10980) -COPS8 (10920) -COPZ1 (22818) -COQ2 (27235) -COQ4 (51117) -COQ5 (84274) -COX10 (1352) -COX11 (1353) -COX15 (1355) -COX17 (10063) -COX5A (9377) -COX5B (1329) -COX6C (1345) -COX7B (1349) -COX7C (1350) -CPAMD8 (27151) -CPNE7 (27132) -CPOX (1371) -CPSF1 (29894) -CPSF2 (53981) -CPSF3 (51692) -CPSF4 (10898) -CPSF6 (11052) -CRCP (27297) -CRKL (1399) -CRLS1 (54675) -CRNKL1 (51340) -CS (1431) -CSE1L (1434) -CSH2 (1443) -CSNK1A1 (1452) -CSNK2B (1460) -CSTF1 (1477) -CSTF3 (1479) -CTAGE9 (643854) -CTBP2 (1488) -CTCF (10664) -CTDP1 (9150) -CTNNBL1 (56259) -CTPS1 (1503) -CTR9 (9646) -CTU2 (348180) -CUL1 (8454) -CUL2 (8453) -CUL3 (8452) -CUL7 (9820) -CWC22 (57703) -CWC25 (54883) -CWF19L2 (143884) -CXXC1 (30827) -CYC1 (1537) -CYCS (54205) -CYFIP1 (23191) -CYP2A13 (1553) -CYP4F11 (57834) -CYS1 (192668) -DAD1 (1603) -DAP3 (7818) -DARS (1615) -DBF4 (10926) -DBR1 (51163) -DCAF6 (55827) -DCLRE1B (64858) -DCTN1 (1639) -DCTN2 (10540) -DCTN3 (11258) -DCTN4 (51164) -DCTN5 (84516) -DCTN6 (10671) -DDB1 (1642) -DDN (23109) -DDOST (1650) -DDX1 (1653) -DDX10 (1662) -DDX11 (1663) -DDX18 (8886) -DDX19A (55308) -DDX19B (11269) -DDX20 (11218) -DDX21 (9188) -DDX23 (9416) -DDX24 (57062) -DDX27 (55661) -DDX3X (1654) -DDX41 (51428) -DDX42 (11325) -DDX46 (9879) -DDX47 (51202) -DDX49 (54555) -DDX5 (1655) -DDX51 (317781) -DDX52 (11056) -DDX54 (79039) -DDX55 (57696) -DDX56 (54606) -DDX59 (83479) -DDX6 (1656) -DEFB131B (100129216) -DENR (8562) -DERL2 (51009) -DHDDS (79947) -DHFR (1719) -DHODH (1723) -DHPS (1725) -DHX15 (1665) -DHX16 (8449) -DHX33 (56919) -DHX36 (170506) -DHX37 (57647) -DHX8 (1659) -DHX9 (1660) -DICER1 (23405) -DIMT1 (27292) -DIS3 (22894) -DKC1 (1736) -DMAP1 (55929) -DMRTA2 (63950) -DNAJA1 (3301) -DNAJA3 (9093) -DNAJC11 (55735) -DNAJC17 (55192) -DNAJC8 (22826) -DNAJC9 (23234) -DNLZ (728489) -DNM1 (1759) -DNM1L (10059) -DNM2 (1785) -DNMT1 (1786) -DNTTIP2 (30836) -DOHH (83475) -DOLK (22845) -DONSON (29980) -DPAGT1 (1798) -DPH1 (1801) -DPH2 (1802) -DPH3 (285381) -DPH6 (89978) -DPM2 (8818) -DPPA2 (151871) -DPY19L2 (283417) -DR1 (1810) -DRAP1 (10589) -DRG1 (4733) -DSN1 (79980) -DTL (51514) -DTYMK (1841) -DUT (1854) -DUX4 (100288687) -DYNC1H1 (1778) -DYNC1I2 (1781) -DYNLL1 (8655) -DYNLRB1 (83658) -E2F6 (1876) -E4F1 (1877) -EARS2 (124454) -EBNA1BP2 (10969) -ECD (11319) -ECT2 (1894) -EDC4 (23644) -EEF1A1 (1915) -EEF1B2 (1933) -EEF1G (1937) -EEF2 (1938) -EEF2KMT (196483) -EFL1 (79631) -EFTUD2 (9343) -EGLN2 (112398) -EIF1 (10209) -EIF1AD (84285) -EIF1AX (1964) -EIF2B1 (1967) -EIF2B2 (8892) -EIF2B3 (8891) -EIF2B4 (8890) -EIF2B5 (8893) -EIF2S1 (1965) -EIF2S2 (8894) -EIF2S3 (1968) -EIF3A (8661) -EIF3B (8662) -EIF3CL (728689) -EIF3D (8664) -EIF3E (3646) -EIF3F (8665) -EIF3G (8666) -EIF3H (8667) -EIF3I (8668) -EIF3J (8669) -EIF3L (51386) -EIF3M (10480) -EIF4A1 (1973) -EIF4A3 (9775) -EIF4B (1975) -EIF4E (1977) -EIF4G1 (1981) -EIF4G2 (1982) -EIF5 (1983) -EIF5A (1984) -EIF6 (3692) -ELAC2 (60528) -ELL (8178) -ELOA3B (728929) -ELOB (6923) -ELOC (6921) -ELOVL1 (64834) -ELP1 (8518) -ELP2 (55250) -ELP3 (55140) -ELP5 (23587) -ELP6 (54859) -EMC1 (23065) -EMC3 (55831) -EMC4 (51234) -EMC7 (56851) -ENO1 (2023) -EP400 (57634) -EPOP (100170841) -EPRS (2058) -ERAL1 (26284) -ERCC2 (2068) -ERCC3 (2071) -ERH (2079) -ERVW-1 (30816) -ESF1 (51575) -ESPL1 (9700) -ESPN (83715) -ESS2 (8220) -ETF1 (2107) -EWSR1 (2130) -EXOC1 (55763) -EXOC2 (55770) -EXOC3 (11336) -EXOC4 (60412) -EXOC5 (10640) -EXOC7 (23265) -EXOC8 (149371) -EXOSC1 (51013) -EXOSC10 (5394) -EXOSC2 (23404) -EXOSC3 (51010) -EXOSC4 (54512) -EXOSC5 (56915) -EXOSC6 (118460) -EXOSC7 (23016) -EXOSC8 (11340) -EXOSC9 (5393) -F8A1 (8263) -FAF2 (23197) -FAM133B (257415) -FAM136A (84908) -FAM207A (85395) -FAM229A (100128071) -FAM25G (100133093) -FAM32A (26017) -FAM50A (9130) -FAM72D (728833) -FAM86B1 (85002) -FARS2 (10667) -FARSA (2193) -FARSB (10056) -FASTKD5 (60493) -FAU (2197) -FBL (2091) -FBLIM1 (54751) -FBXO5 (26271) -FCF1 (51077) -FDPS (2224) -FDXR (2232) -FEN1 (2237) -FGFR1OP (11116) -FIP1L1 (81608) -FNBP4 (23360) -FNTA (2339) -FNTB (2342) -FOLR3 (2352) -FOXD4 (2298) -FOXD4L6 (653404) -FOXL2 (668) -FRG2 (448831) -FTSJ3 (117246) -FXN (2395) -GABPA (2551) -GAGE12J (729396) -GAGE2A (729447) -GAK (2580) -GALP (85569) -GAPDH (2597) -GAR1 (54433) -GARS (2617) -GBF1 (8729) -GCN1 (10985) -GCOM1 (145781) -GDF5OS (554250) -GEMIN4 (50628) -GEMIN5 (25929) -GEMIN6 (79833) -GEMIN7 (79760) -GEMIN8 (54960) -GFER (2671) -GFM1 (85476) -GGPS1 (9453) -GGTLC2 (91227) -GINS1 (9837) -GINS2 (51659) -GINS3 (64785) -GINS4 (84296) -GJA3 (2700) -GLE1 (2733) -GLI4 (2738) -GLMN (11146) -GLRX3 (10539) -GLRX5 (51218) -GMPPB (29925) -GMPS (8833) -GNB1L (54584) -GNL2 (29889) -GNL3 (26354) -GNL3L (54552) -GNPNAT1 (64841) -GOLGA6A (342096) -GOLGA6L1 (283767) -GOLGA6L10 (647042) -GOLGA8K (653125) -GOLGA8N (643699) -GOLGA8R (101059918) -GON4L (54856) -GOSR2 (9570) -GPKOW (27238) -GPN1 (11321) -GPN2 (54707) -GPN3 (51184) -GPR61 (83873) -GPS1 (2873) -GRB2 (2885) -GRPEL1 (80273) -GRSF1 (2926) -GRWD1 (83743) -GSDMA (284110) -GSPT1 (2935) -GTF2A1 (2957) -GTF2A2 (2958) -GTF2B (2959) -GTF2E1 (2960) -GTF2E2 (2961) -GTF2F1 (2962) -GTF2F2 (2963) -GTF2H1 (2965) -GTF2H2 (2966) -GTF2H2C (728340) -GTF2H3 (2967) -GTF2H4 (2968) -GTF3A (2971) -GTF3C1 (2975) -GTF3C2 (2976) -GTF3C3 (9330) -GTF3C4 (9329) -GTF3C5 (9328) -GTF3C6 (112495) -GTPBP4 (23560) -GUK1 (2987) -H2AFX (3014) -H2AFZ (3015) -H3F3A (3020) -HAMP (57817) -HAPLN2 (60484) -HARS (3035) -HARS2 (23438) -HAUS1 (115106) -HAUS3 (79441) -HAUS4 (54930) -HAUS5 (23354) -HAUS6 (54801) -HAUS7 (55559) -HAUS8 (93323) -HCFC1 (3054) -HCRTR1 (3061) -HDAC3 (8841) -HEATR1 (55127) -HECTD1 (25831) -HGS (9146) -HINFP (25988) -HIRA (7290) -HIST1H2AB (8335) -HIST1H2AE (3012) -HIST1H2AI (8329) -HIST1H2BB (3018) -HIST1H2BC (8347) -HIST1H2BE (8344) -HIST1H2BI (8346) -HIST1H2BJ (8970) -HIST1H2BL (8340) -HIST1H2BM (8342) -HIST1H2BN (8341) -HIST2H2AA3 (8337) -HIST2H2AC (8338) -HIST2H2BE (8349) -HIST2H2BF (440689) -HIST2H3A (333932) -HIST2H3D (653604) -HJURP (55355) -HLA-C (3107) -HMGA1 (3159) -HMGB1 (3146) -HMGCR (3156) -HMGCS1 (3157) -HMGN2 (3151) -HNRNPC (3183) -HNRNPCL4 (101060301) -HNRNPH1 (3187) -HNRNPK (3190) -HNRNPL (3191) -HNRNPM (4670) -HNRNPR (10236) -HNRNPU (3192) -HOXC10 (3226) -HS6ST1 (9394) -HSCB (150274) -HSD17B10 (3028) -HSD17B12 (51144) -HSF1 (3297) -HSP90B1 (7184) -HSPA14 (51182) -HSPA5 (3309) -HSPA9 (3313) -HSPD1 (3329) -HSPE1 (3336) -HTATSF1 (27336) -HUS1 (3364) -HUWE1 (10075) -HYOU1 (10525) -HYPK (25764) -IARS (3376) -IARS2 (55699) -ICE1 (23379) -ICE2 (79664) -IFITM2 (10581) -IFITM3 (10410) -IGBP1 (3476) -IK (3550) -ILF2 (3608) -ILF3 (3609) -IMP3 (55272) -IMP4 (92856) -IMPDH2 (3615) -INCENP (3619) -ING3 (54556) -INO80 (54617) -INO80B (83444) -INTS1 (26173) -INTS11 (54973) -INTS13 (55726) -INTS14 (81556) -INTS2 (57508) -INTS3 (65123) -INTS4 (92105) -INTS5 (80789) -INTS6 (26512) -INTS7 (25896) -INTS8 (55656) -INTS9 (55756) -IPO11 (51194) -IPO13 (9670) -IPO7 (10527) -IPO9 (55705) -ISCA2 (122961) -ISCU (23479) -ISG20L2 (81875) -ISY1 (57461) -JAZF1 (221895) -JMJD6 (23210) -KANSL1 (284058) -KANSL2 (54934) -KANSL3 (55683) -KARS (3735) -KAT5 (10524) -KAT8 (84148) -KATNB1 (10300) -KCMF1 (56888) -KCNA10 (3744) -KDM2A (22992) -KDM8 (79831) -KIAA0391 (9692) -KIAA1143 (57456) -KIF11 (3832) -KIF14 (9928) -KIF18A (81930) -KIF18B (146909) -KIF20A (10112) -KIF23 (9493) -KIF4A (24137) -KIN (22944) -KLC2 (64837) -KLF7 (8609) -KLHL11 (55175) -KNL1 (57082) -KPNA6 (23633) -KPNB1 (3837) -KRI1 (65095) -KRR1 (11103) -KRT10 (3858) -KRT17 (3872) -KRT8 (3856) -KRTAP4-2 (85291) -KRTAP4-7 (100132476) -KRTAP5-7 (440050) -KTI12 (112970) -KXD1 (79036) -LAMTOR2 (28956) -LAMTOR3 (8649) -LAMTOR4 (389541) -LARS (51520) -LARS2 (23395) -LAS1L (81887) -LCE1C (353133) -LCE1E (353135) -LCN10 (414332) -LENG8 (114823) -LETM1 (3954) -LIMS1 (3987) -LIN52 (91750) -LIN54 (132660) -LONP1 (9361) -LPA (4018) -LRPPRC (10128) -LRR1 (122769) -LRRC37A3 (374819) -LRRC37B (114659) -LSG1 (55341) -LSM10 (84967) -LSM11 (134353) -LSM2 (57819) -LSM3 (27258) -LSM4 (25804) -LSM5 (23658) -LSM6 (11157) -LSM7 (51690) -LSM8 (51691) -LST1 (7940) -LTB4R2 (56413) -LTBP3 (4054) -LTO1 (220064) -LTV1 (84946) -LUC7L3 (51747) -LURAP1 (541468) -LYRM4 (57128) -MAD2L1 (4085) -MAD2L2 (10459) -MAGOH (4116) -MAK16 (84549) -MANF (7873) -MARS (4141) -MARS2 (92935) -MASTL (84930) -MAT2A (4144) -MAU2 (23383) -MAX (4149) -MBTPS1 (8720) -MBTPS2 (51360) -MCL1 (4170) -MCM2 (4171) -MCM3 (4172) -MCM3AP (8888) -MCM4 (4173) -MCM5 (4174) -MCM6 (4175) -MCM7 (4176) -MCMBP (79892) -MCRS1 (10445) -MDN1 (23195) -MED1 (5469) -MED10 (84246) -MED11 (400569) -MED12 (9968) -MED14 (9282) -MED17 (9440) -MED18 (54797) -MED20 (9477) -MED21 (9412) -MED22 (6837) -MED26 (9441) -MED27 (9442) -MED28 (80306) -MED29 (55588) -MED30 (90390) -MED31 (51003) -MED4 (29079) -MED6 (10001) -MED7 (9443) -MED8 (112950) -MED9 (55090) -MEPCE (56257) -METAP1 (23173) -METAP2 (10988) -METTL1 (4234) -METTL14 (57721) -METTL16 (79066) -METTL17 (64745) -METTL23 (124512) -METTL3 (56339) -MEX3A (92312) -MFAP1 (4236) -MFN2 (9927) -MINDY2 (54629) -MINOS1 (440574) -MIOS (54468) -MIPEP (4285) -MIS12 (79003) -MIS18A (54069) -MIS18BP1 (55320) -MKRN1 (23608) -MLST8 (64223) -MMGT1 (93380) -MMS19 (64210) -MMS22L (253714) -MNAT1 (4331) -MOB4 (25843) -MOCS3 (27304) -MOG (4340) -MOK (5891) -MPHOSPH10 (10199) -MRE11 (4361) -MRGBP (55257) -MRPL1 (65008) -MRPL10 (124995) -MRPL12 (6182) -MRPL13 (28998) -MRPL14 (64928) -MRPL15 (29088) -MRPL16 (54948) -MRPL17 (63875) -MRPL18 (29074) -MRPL19 (9801) -MRPL2 (51069) -MRPL20 (55052) -MRPL21 (219927) -MRPL22 (29093) -MRPL24 (79590) -MRPL27 (51264) -MRPL28 (10573) -MRPL3 (11222) -MRPL32 (64983) -MRPL33 (9553) -MRPL34 (64981) -MRPL35 (51318) -MRPL36 (64979) -MRPL37 (51253) -MRPL38 (64978) -MRPL39 (54148) -MRPL4 (51073) -MRPL41 (64975) -MRPL43 (84545) -MRPL44 (65080) -MRPL45 (84311) -MRPL49 (740) -MRPL50 (54534) -MRPL51 (51258) -MRPL53 (116540) -MRPL54 (116541) -MRPL55 (128308) -MRPL57 (78988) -MRPL9 (65005) -MRPS10 (55173) -MRPS11 (64963) -MRPS12 (6183) -MRPS14 (63931) -MRPS16 (51021) -MRPS18A (55168) -MRPS18B (28973) -MRPS2 (51116) -MRPS21 (54460) -MRPS22 (56945) -MRPS23 (51649) -MRPS24 (64951) -MRPS25 (64432) -MRPS26 (64949) -MRPS27 (23107) -MRPS28 (28957) -MRPS31 (10240) -MRPS34 (65993) -MRPS35 (60488) -MRPS5 (64969) -MRPS6 (64968) -MRPS7 (51081) -MRPS9 (64965) -MRTO4 (51154) -MSRB1 (51734) -MST1 (4485) -MSTO1 (55154) -MT2A (4502) -MTBP (27085) -MTG2 (26164) -MTOR (2475) -MTPAP (55149) -MTREX (23517) -MTRNR2L1 (100462977) -MVD (4597) -MVK (4598) -MYBBP1A (10514) -MYBL2 (4605) -MYC (4609) -MYCBP (26292) -MYO1H (283446) -MZF1 (7593) -MZT1 (440145) -N6AMT1 (29104) -NAA10 (8260) -NAA15 (80155) -NAA20 (51126) -NAA25 (80018) -NAA35 (60560) -NAA50 (80218) -NACA (4666) -NAE1 (8883) -NAF1 (92345) -NAGLU (4669) -NANOG (79923) -NAPA (8775) -NAPG (8774) -NARS (4677) -NASP (4678) -NAT10 (55226) -NBAS (51594) -NBEAL1 (65065) -NBPF10 (100132406) -NBPF12 (149013) -NBPF15 (284565) -NBPF3 (84224) -NCAPD2 (9918) -NCAPD3 (23310) -NCAPG (64151) -NCAPG2 (54892) -NCAPH (23397) -NCAPH2 (29781) -NCBP1 (4686) -NCBP2 (22916) -NCKAP1 (10787) -NCL (4691) -NCOA4 (8031) -NDC1 (55706) -NDC80 (10403) -NDOR1 (27158) -NDUFA11 (126328) -NDUFA2 (4695) -NDUFA4 (4697) -NDUFA6 (4700) -NDUFA8 (4702) -NDUFAB1 (4706) -NDUFAF3 (25915) -NDUFAF7 (55471) -NDUFAF8 (284184) -NDUFB10 (4716) -NDUFB3 (4709) -NDUFB4 (4710) -NDUFB6 (4712) -NDUFB7 (4713) -NDUFB8 (4714) -NDUFS2 (4720) -NDUFS3 (4722) -NDUFS5 (4725) -NEDD1 (121441) -NEDD8 (4738) -NELFA (7469) -NELFB (25920) -NELFCD (51497) -NELFE (7936) -NEMF (9147) -NEPRO (25871) -NFATC2IP (84901) -NFKBIE (4794) -NFRKB (4798) -NFS1 (9054) -NFYB (4801) -NFYC (4802) -NHLRC2 (374354) -NHP2 (55651) -NIFK (84365) -NIP7 (51388) -NISCH (11188) -NKAP (79576) -NKAPD1 (55216) -NLE1 (54475) -NMD3 (51068) -NMT1 (4836) -NOB1 (28987) -NOC2L (26155) -NOC3L (64318) -NOC4L (79050) -NOL10 (79954) -NOL11 (25926) -NOL12 (79159) -NOL6 (65083) -NOL7 (51406) -NOL8 (55035) -NOL9 (79707) -NOLC1 (9221) -NOM1 (64434) -NOMO3 (408050) -NOP10 (55505) -NOP14 (8602) -NOP16 (51491) -NOP2 (4839) -NOP53 (29997) -NOP56 (10528) -NOP58 (51602) -NOP9 (161424) -NOS1AP (9722) -NOTCH2NLB (100996763) -NPAT (4863) -NPB (256933) -NPEPPS (9520) -NPIPA1 (9284) -NPIPA5 (100288332) -NPIPB6 (728741) -NPIPB8 (728734) -NPLOC4 (55666) -NPM1 (4869) -NPM3 (10360) -NR2C2AP (126382) -NRBP1 (29959) -NRDE2 (55051) -NRF1 (4899) -NSA2 (10412) -NSF (4905) -NSL1 (25936) -NSMCE1 (197370) -NSMCE2 (286053) -NSMCE3 (56160) -NSUN4 (387338) -NUBP1 (4682) -NUBP2 (10101) -NUDC (10726) -NUDCD3 (23386) -NUDT21 (11051) -NUDT4B (440672) -NUF2 (83540) -NUFIP1 (26747) -NUMA1 (4926) -NUP107 (57122) -NUP133 (55746) -NUP153 (9972) -NUP155 (9631) -NUP160 (23279) -NUP205 (23165) -NUP214 (8021) -NUP35 (129401) -NUP43 (348995) -NUP54 (53371) -NUP62 (23636) -NUP85 (79902) -NUP88 (4927) -NUP93 (9688) -NUP98 (4928) -NUS1 (116150) -NUTF2 (10204) -NVL (4931) -NXF1 (10482) -OGFOD1 (55239) -OGT (8473) -OIP5 (11339) -OLFML3 (56944) -OPA1 (4976) -OR11H1 (81061) -OR1D2 (4991) -OR1E2 (8388) -OR2A1 (346528) -OR2T29 (343563) -OR2T5 (401993) -OR4F4 (26682) -OR4K1 (79544) -OR4N2 (390429) -OR7A10 (390892) -OR7A17 (26333) -ORC1 (4998) -ORC3 (23595) -ORC4 (5000) -ORC5 (5001) -ORC6 (23594) -OSBP (5007) -OSGEP (55644) -OSTC (58505) -OTOP1 (133060) -OVCA2 (124641) -PABPC1 (26986) -PABPN1 (8106) -PAF1 (54623) -PAFAH1B1 (5048) -PAK1IP1 (55003) -PALB2 (79728) -PAM16 (51025) -PARN (5073) -PARS2 (25973) -PAXBP1 (94104) -PC (5091) -PCBP1 (5093) -PCBP2 (5094) -PCF11 (51585) -PCID2 (55795) -PCNA (5111) -PCNX3 (399909) -PDAP1 (11333) -PDCD11 (22984) -PDCD2 (5134) -PDCD5 (9141) -PDCD6 (10016) -PDCD6IP (10015) -PDCD7 (10081) -PDPK1 (5170) -PDRG1 (81572) -PEF1 (553115) -PELO (53918) -PELP1 (27043) -PES1 (23481) -PET117 (100303755) -PEX1 (5189) -PFDN1 (5201) -PFDN2 (5202) -PFDN4 (5203) -PFDN5 (5204) -PFDN6 (10471) -PFN1 (5216) -PGAM1 (5223) -PGD (5226) -PGK1 (5230) -PGPEP1 (54858) -PGS1 (9489) -PHAX (51808) -PHB (5245) -PHB2 (11331) -PHF12 (57649) -PHF5A (84844) -PI4KA (5297) -PIGH (5283) -PIK3C3 (5289) -PIK3R4 (30849) -PKM (5315) -PKMYT1 (9088) -PLA2G10 (8399) -PLEKHN1 (84069) -PLGLB2 (5342) -PLK1 (5347) -PLK4 (10733) -PLRG1 (5356) -PMF1 (11243) -PMPCA (23203) -PMPCB (9512) -PNISR (25957) -PNKP (11284) -PNN (5411) -PNO1 (56902) -PNPT1 (87178) -POGZ (23126) -POLA1 (5422) -POLA2 (23649) -POLD1 (5424) -POLD2 (5425) -POLD3 (10714) -POLE (5426) -POLE2 (5427) -POLG2 (11232) -POLR1A (25885) -POLR1B (84172) -POLR1C (9533) -POLR1E (64425) -POLR2B (5431) -POLR2C (5432) -POLR2D (5433) -POLR2E (5434) -POLR2F (5435) -POLR2G (5436) -POLR2H (5437) -POLR2I (5438) -POLR2J (5439) -POLR2K (5440) -POLR2L (5441) -POLR3A (11128) -POLR3B (55703) -POLR3C (10623) -POLR3D (661) -POLR3E (55718) -POLR3F (10621) -POLR3H (171568) -POLR3K (51728) -POLRMT (5442) -POP1 (10940) -POP4 (10775) -POP5 (51367) -POP7 (10248) -POT1 (25913) -POTEG (404785) -POTEI (653269) -POU5F1B (5462) -PPA1 (5464) -PPA2 (27068) -PPAN (56342) -PPARGC1B (133522) -PPIA (5478) -PPIAL4C (653598) -PPIE (10450) -PPIH (10465) -PPIL1 (51645) -PPIL2 (23759) -PPIL4 (85313) -PPME1 (51400) -PPP1CA (5499) -PPP1CB (5500) -PPP1R10 (5514) -PPP1R11 (6992) -PPP1R12A (4659) -PPP1R15B (84919) -PPP1R2 (5504) -PPP1R7 (5510) -PPP1R8 (5511) -PPP2CA (5515) -PPP2R1A (5518) -PPP2R3C (55012) -PPP4C (5531) -PPP4R2 (151987) -PPP6C (5537) -PPRC1 (23082) -PPWD1 (23398) -PPY (5539) -PRAMEF15 (653619) -PRAMEF17 (391004) -PRAMEF18 (391003) -PRAMEF9 (343070) -PRC1 (9055) -PRDM10 (56980) -PREB (10113) -PRELID1 (27166) -PRELID3B (51012) -PRIM1 (5557) -PRKRA (8575) -PRKRIP1 (79706) -PRMT1 (3276) -PRMT5 (10419) -PRODH (5625) -PRPF18 (8559) -PRPF19 (27339) -PRPF3 (9129) -PRPF31 (26121) -PRPF38A (84950) -PRPF38B (55119) -PRPF4 (9128) -PRPF40A (55660) -PRPF4B (8899) -PRPF6 (24148) -PRPF8 (10594) -PRRC2A (7916) -PRSS50 (29122) -PSAT1 (29968) -PSMA1 (5682) -PSMA2 (5683) -PSMA3 (5684) -PSMA4 (5685) -PSMA5 (5686) -PSMA6 (5687) -PSMA7 (5688) -PSMB1 (5689) -PSMB2 (5690) -PSMB3 (5691) -PSMB4 (5692) -PSMB5 (5693) -PSMB6 (5694) -PSMB7 (5695) -PSMC1 (5700) -PSMC2 (5701) -PSMC3 (5702) -PSMC4 (5704) -PSMC5 (5705) -PSMC6 (5706) -PSMD1 (5707) -PSMD11 (5717) -PSMD12 (5718) -PSMD13 (5719) -PSMD14 (10213) -PSMD2 (5708) -PSMD3 (5709) -PSMD4 (5710) -PSMD6 (9861) -PSMD7 (5713) -PSMD8 (5714) -PSMD9 (5715) -PSME1 (5720) -PSME2 (5721) -PSMG1 (8624) -PSMG2 (56984) -PSMG3 (84262) -PSMG4 (389362) -PSTK (118672) -PTBP1 (5725) -PTCD1 (26024) -PTCD3 (55037) -PTK2 (5747) -PTMA (5757) -PTPA (5524) -PTPMT1 (114971) -PTPN23 (25930) -PUF60 (22827) -PWP1 (11137) -PWP2 (5822) -PXN (5829) -PYM1 (84305) -PYROXD1 (79912) -QARS (5859) -QRSL1 (55278) -RAB18 (22931) -RAB4A (5867) -RAB6A (5870) -RAB6D (150786) -RABGGTA (5875) -RABGGTB (5876) -RAC1 (5879) -RAC3 (5881) -RACGAP1 (29127) -RACK1 (10399) -RAD17 (5884) -RAD21 (5885) -RAD51 (5888) -RAD51C (5889) -RAD51D (5892) -RAD9A (5883) -RAE1 (8480) -RAN (5901) -RANBP2 (5903) -RANGAP1 (5905) -RARS (5917) -RARS2 (57038) -RBBP4 (5928) -RBBP5 (5929) -RBBP6 (5930) -RBBP8 (5932) -RBM10 (8241) -RBM14 (10432) -RBM17 (84991) -RBM19 (9904) -RBM22 (55696) -RBM25 (58517) -RBM28 (55131) -RBM39 (9584) -RBM4 (5936) -RBM42 (79171) -RBM48 (84060) -RBM8A (9939) -RBMX (27316) -RBMX2 (51634) -RBMXL1 (494115) -RBX1 (9978) -RCC1 (1104) -RCC1L (81554) -RCL1 (10171) -RCOR1 (23186) -REV3L (5980) -REXO2 (25996) -RFC1 (5981) -RFC2 (5982) -RFC3 (5983) -RFC4 (5984) -RFC5 (5985) -RFFL (117584) -RFT1 (91869) -RGP1 (9827) -RGPD6 (729540) -RHOQ (23433) -RHPN1 (114822) -RIC1 (57589) -RINT1 (60561) -RIOK1 (83732) -RIOK2 (55781) -RITA1 (84934) -RMI1 (80010) -RNASEH2C (84153) -RNF103 (7844) -RNF113A (7737) -RNF123 (63891) -RNF14 (9604) -RNF168 (165918) -RNF20 (56254) -RNF4 (6047) -RNF40 (9810) -RNF8 (9025) -RNGTT (8732) -RNMT (8731) -RNPC3 (55599) -RNPS1 (10921) -ROMO1 (140823) -RPA1 (6117) -RPA2 (6118) -RPA3 (6119) -RPAIN (84268) -RPAP1 (26015) -RPAP2 (79871) -RPAP3 (79657) -RPE (6120) -RPF1 (80135) -RPF2 (84154) -RPL10 (6134) -RPL10A (4736) -RPL11 (6135) -RPL12 (6136) -RPL13 (6137) -RPL13A (23521) -RPL14 (9045) -RPL15 (6138) -RPL17 (6139) -RPL18 (6141) -RPL18A (6142) -RPL19 (6143) -RPL23 (9349) -RPL23A (6147) -RPL24 (6152) -RPL26 (6154) -RPL26L1 (51121) -RPL27 (6155) -RPL27A (6157) -RPL28 (6158) -RPL29 (6159) -RPL3 (6122) -RPL30 (6156) -RPL31 (6160) -RPL32 (6161) -RPL34 (6164) -RPL35 (11224) -RPL36 (25873) -RPL36A (6173) -RPL36AL (6166) -RPL37 (6167) -RPL37A (6168) -RPL38 (6169) -RPL39 (6170) -RPL4 (6124) -RPL41 (6171) -RPL5 (6125) -RPL6 (6128) -RPL7 (6129) -RPL7A (6130) -RPL7L1 (285855) -RPL8 (6132) -RPL9 (6133) -RPLP0 (6175) -RPLP1 (6176) -RPLP2 (6181) -RPN1 (6184) -RPN2 (6185) -RPP14 (11102) -RPP21 (79897) -RPP30 (10556) -RPP38 (10557) -RPP40 (10799) -RPS11 (6205) -RPS12 (6206) -RPS13 (6207) -RPS14 (6208) -RPS15 (6209) -RPS15A (6210) -RPS16 (6217) -RPS17 (6218) -RPS18 (6222) -RPS19 (6223) -RPS19BP1 (91582) -RPS2 (6187) -RPS20 (6224) -RPS21 (6227) -RPS23 (6228) -RPS24 (6229) -RPS25 (6230) -RPS27 (6232) -RPS27A (6233) -RPS28 (6234) -RPS29 (6235) -RPS3 (6188) -RPS3A (6189) -RPS4X (6191) -RPS5 (6193) -RPS6 (6194) -RPS7 (6201) -RPS8 (6202) -RPS9 (6203) -RPSA (3921) -RPTOR (57521) -RPUSD3 (285367) -RPUSD4 (84881) -RRM1 (6240) -RRM2 (6241) -RRN3 (54700) -RRP1 (8568) -RRP12 (23223) -RRP15 (51018) -RRP7A (27341) -RRP9 (9136) -RRS1 (23212) -RSL1D1 (26156) -RSL24D1 (51187) -RSRC2 (65117) -RTCB (51493) -RTEL1 (51750) -RTF1 (23168) -RTF2 (51507) -RTRAF (51637) -RTTN (25914) -RUVBL1 (8607) -RUVBL2 (10856) -SACM1L (22908) -SAE1 (10055) -SAMM50 (25813) -SAP130 (79595) -SAP18 (10284) -SAP30BP (29115) -SARNP (84324) -SARS (6301) -SARS2 (54938) -SART1 (9092) -SART3 (9733) -SASS6 (163786) -SBDS (51119) -SBNO1 (55206) -SCAP (22937) -SCD (6319) -SCFD1 (23256) -SCNM1 (79005) -SDAD1 (55153) -SDC1 (6382) -SDE2 (163859) -SDHA (6389) -SDHAF2 (54949) -SDHC (6391) -SDHD (6392) -SEC13 (6396) -SEC16A (9919) -SEC61A1 (29927) -SEC61B (10952) -SEC61G (23480) -SEC62 (7095) -SEC63 (11231) -SEH1L (81929) -SEM1 (7979) -SENP6 (26054) -SEPHS2 (22928) -SEPSECS (51091) -SERBP1 (26135) -SET (6418) -SETD1A (9739) -SETD2 (29072) -SETDB1 (9869) -SF1 (7536) -SF3A1 (10291) -SF3A2 (8175) -SF3A3 (10946) -SF3B1 (23451) -SF3B2 (10992) -SF3B3 (23450) -SF3B4 (10262) -SF3B5 (83443) -SF3B6 (51639) -SFPQ (6421) -SFSWAP (6433) -SGF29 (112869) -SGO1 (151648) -SGPP1 (81537) -SHQ1 (55164) -SIGLEC14 (100049587) -SIN3A (25942) -SINHCAF (58516) -SKA1 (220134) -SKA3 (221150) -SKP1 (6500) -SKP2 (6502) -SLBP (7884) -SLC16A5 (9121) -SLC1A5 (6510) -SLC25A10 (1468) -SLC25A26 (115286) -SLC25A3 (5250) -SLC35B1 (10237) -SLC35G6 (643664) -SLC39A10 (57181) -SLC39A7 (7922) -SLC4A5 (57835) -SLC7A5 (8140) -SLC7A6OS (84138) -SLC9B1 (150159) -SLU7 (10569) -SMAGP (57228) -SMARCB1 (6598) -SMARCE1 (6605) -SMC1A (8243) -SMC2 (10592) -SMC3 (9126) -SMC4 (10051) -SMC5 (23137) -SMC6 (79677) -SMG1 (23049) -SMG5 (23381) -SMG6 (23293) -SMN2 (6607) -SMNDC1 (10285) -SMU1 (55234) -SMUG1 (23583) -SNAPC1 (6617) -SNAPC2 (6618) -SNAPC3 (6619) -SNAPC4 (6621) -SNAPC5 (10302) -SNF8 (11267) -SNIP1 (79753) -SNRNP200 (23020) -SNRNP25 (79622) -SNRNP27 (11017) -SNRNP35 (11066) -SNRNP40 (9410) -SNRNP48 (154007) -SNRNP70 (6625) -SNRPA1 (6627) -SNRPB (6628) -SNRPC (6631) -SNRPD1 (6632) -SNRPD2 (6633) -SNRPD3 (6634) -SNRPE (6635) -SNRPF (6636) -SNRPG (6637) -SNU13 (4809) -SNUPN (10073) -SNW1 (22938) -SOD1 (6647) -SOD2 (6648) -SON (6651) -SP2 (6668) -SPATA31A6 (389730) -SPATA5 (166378) -SPATA5L1 (79029) -SPC24 (147841) -SPC25 (57405) -SPCS2 (9789) -SPCS3 (60559) -SPDL1 (54908) -SPDYE1 (285955) -SPDYE2 (441273) -SPOUT1 (51490) -SPRTN (83932) -SPTLC1 (10558) -SPTSSA (171546) -SRBD1 (55133) -SRCAP (10847) -SRF (6722) -SRFBP1 (153443) -SRP14 (6727) -SRP19 (6728) -SRP54 (6729) -SRP68 (6730) -SRP72 (6731) -SRP9 (6726) -SRPRA (6734) -SRPRB (58477) -SRRM1 (10250) -SRRM2 (23524) -SRRT (51593) -SRSF1 (6426) -SRSF10 (10772) -SRSF11 (9295) -SRSF2 (6427) -SRSF3 (6428) -SRSF6 (6431) -SRSF7 (6432) -SS18L2 (51188) -SSBP1 (6742) -SSBP3 (23648) -SSRP1 (6749) -SSU72 (29101) -SSX4 (6759) -STARD7 (56910) -STAT5A (6776) -STIL (6491) -STN1 (79991) -STRAP (11171) -STRIP1 (85369) -STX18 (53407) -STX5 (6811) -SUGP1 (57794) -SUGT1 (10910) -SUMO2 (6613) -SUPT16H (11198) -SUPT4H1 (6827) -SUPT5H (6829) -SUPT6H (6830) -SUPV3L1 (6832) -SURF6 (6838) -SYF2 (25949) -SYMPK (8189) -SYS1 (90196) -TACC3 (10460) -TADA2A (6871) -TADA3 (10474) -TAF1 (6872) -TAF10 (6881) -TAF12 (6883) -TAF1A (9015) -TAF1B (9014) -TAF1C (9013) -TAF1D (79101) -TAF2 (6873) -TAF5 (6877) -TAF6 (6878) -TAF7 (6879) -TAF8 (129685) -TAMM41 (132001) -TANGO6 (79613) -TARDBP (23435) -TARS (6897) -TARS2 (80222) -TBC1D28 (254272) -TBC1D3 (729873) -TBC1D3B (414059) -TBCA (6902) -TBCB (1155) -TBCC (6903) -TBCD (6904) -TBCE (6905) -TBL3 (10607) -TBP (6908) -TCEAL9 (51186) -TCERG1 (10915) -TCOF1 (6949) -TCP1 (6950) -TDGF1 (6997) -TEAD3 (7005) -TEDC1 (283643) -TEDC2 (80178) -TEFM (79736) -TELO2 (9894) -TEN1 (100134934) -TEPSIN (146705) -TERF1 (7013) -TERF2 (7014) -TEX10 (54881) -TFAM (7019) -TFB2M (64216) -TFDP1 (7027) -TFIP11 (24144) -TFRC (7037) -TGS1 (96764) -THAP1 (55145) -THAP11 (57215) -THAP12 (5612) -THG1L (54974) -THOC1 (9984) -THOC2 (57187) -THOC3 (84321) -THOC5 (8563) -THOC6 (79228) -THOC7 (80145) -TICRR (90381) -TIGD1 (200765) -TIMELESS (8914) -TIMM10 (26519) -TIMM13 (26517) -TIMM22 (29928) -TIMM23 (100287932) -TIMM29 (90580) -TIMM44 (10469) -TIMM8A (1678) -TIMM9 (26520) -TINF2 (26277) -TIPIN (54962) -TKT (7086) -TLCD1 (116238) -TLN1 (7094) -TM7SF2 (7108) -TMA16 (55319) -TMED2 (10959) -TMEM127 (55654) -TMEM199 (147007) -TMEM240 (339453) -TMEM258 (746) -TMSB10 (9168) -TMX2 (51075) -TNNT2 (7139) -TNPO3 (23534) -TOE1 (114034) -TOMM20 (9804) -TOMM22 (56993) -TOMM40 (10452) -TONSL (4796) -TOP1 (7150) -TOP2A (7153) -TOP3A (7156) -TOPBP1 (11073) -TOR2A (27433) -TP53RK (112858) -TPI1 (7167) -TPR (7175) -TPRKB (51002) -TPT1 (7178) -TPX2 (22974) -TRA2B (6434) -TRAIP (10293) -TRAPPC1 (58485) -TRAPPC11 (60684) -TRAPPC2B (10597) -TRAPPC3 (27095) -TRAPPC4 (51399) -TRAPPC5 (126003) -TRAPPC8 (22878) -TREML2 (79865) -TRIAP1 (51499) -TRIM49C (642612) -TRMT10C (54931) -TRMT112 (51504) -TRMT5 (57570) -TRMT6 (51605) -TRNP1 (388610) -TRNT1 (51095) -TRPM7 (54822) -TRRAP (8295) -TSEN2 (80746) -TSEN34 (79042) -TSEN54 (283989) -TSFM (10102) -TSG101 (7251) -TSPYL5 (85453) -TSR1 (55720) -TSR2 (90121) -TSSK3 (81629) -TTC1 (7265) -TTC27 (55622) -TTC4 (7268) -TTF2 (8458) -TTI1 (9675) -TTI2 (80185) -TTK (7272) -TUBA1B (10376) -TUBA1C (84790) -TUBA3D (113457) -TUBB (203068) -TUBB8P12 (260334) -TUBD1 (51174) -TUBE1 (51175) -TUBG1 (7283) -TUBGCP2 (10844) -TUBGCP3 (10426) -TUBGCP4 (27229) -TUBGCP5 (114791) -TUBGCP6 (85378) -TULP1 (7287) -TUT1 (64852) -TVP23C (201158) -TWF1 (5756) -TWISTNB (221830) -TWNK (56652) -TXN (7295) -TXNL4A (10907) -TXNL4B (54957) -U2AF1 (7307) -U2AF2 (11338) -U2SURP (23350) -UBA1 (7317) -UBA2 (10054) -UBA3 (9039) -UBA5 (79876) -UBA52 (7311) -UBAP1 (51271) -UBC (7316) -UBE2D3 (7323) -UBE2H (7328) -UBE2I (7329) -UBE2L3 (7332) -UBE2L5 (171222) -UBE2M (9040) -UBE2N (7334) -UBE2Z (65264) -UBL5 (59286) -UBQLN4 (56893) -UBR4 (23352) -UBR5 (51366) -UBTF (7343) -UFD1 (7353) -UFL1 (23376) -UFM1 (51569) -UNC45A (55898) -UNC50 (25972) -UPF1 (5976) -UPF2 (26019) -UPF3A (65110) -UQCC2 (84300) -UQCRB (7381) -UQCRC1 (7384) -UQCRC2 (7385) -UQCRFS1 (7386) -UQCRH (7388) -UQCRQ (27089) -URB1 (9875) -URB2 (9816) -URI1 (8725) -URM1 (81605) -UROD (7389) -USF2 (7392) -USP10 (9100) -USP17L5 (728386) -USP19 (10869) -USP36 (57602) -USP37 (57695) -USP39 (10713) -USP5 (8078) -USP7 (7874) -USP8 (9101) -USP9X (8239) -USPL1 (10208) -UTP11 (51118) -UTP15 (84135) -UTP18 (51096) -UTP20 (27340) -UTP23 (84294) -UTP25 (27042) -UTP3 (57050) -UTP4 (84916) -UTP6 (55813) -UXS1 (80146) -UXT (8409) -VARS (7407) -VARS2 (57176) -VBP1 (7411) -VCP (7415) -VCX3A (51481) -VEZT (55591) -VHL (7428) -VIRMA (25962) -VMP1 (81671) -VPS13D (55187) -VPS18 (57617) -VPS25 (84313) -VPS28 (51160) -VPS29 (51699) -VPS33A (65082) -VPS35 (55737) -VPS37A (137492) -VPS37C (55048) -VPS41 (27072) -VPS51 (738) -VPS72 (6944) -WAC (51322) -WARS (7453) -WBP1 (23559) -WBP11 (51729) -WBP1L (54838) -WDHD1 (11169) -WDR1 (9948) -WDR12 (55759) -WDR18 (57418) -WDR24 (84219) -WDR25 (79446) -WDR26 (80232) -WDR3 (10885) -WDR33 (55339) -WDR4 (10785) -WDR43 (23160) -WDR46 (9277) -WDR5 (11091) -WDR54 (84058) -WDR55 (54853) -WDR61 (80349) -WDR7 (23335) -WDR70 (55100) -WDR74 (54663) -WDR75 (84128) -WDR77 (79084) -WDR82 (80335) -WDR83OS (51398) -WDR92 (116143) -WDTC1 (23038) -WEE1 (7465) -WNK1 (65125) -WRB (7485) -WTAP (9589) -XAB2 (56949) -XPO1 (7514) -XPO5 (57510) -XRCC1 (7515) -XRCC2 (7516) -XRCC3 (7517) -XRCC5 (7520) -XRCC6 (2547) -XRN2 (22803) -XYLT2 (64132) -YAE1 (57002) -YARS (8565) -YARS2 (51067) -YBX1 (4904) -YEATS2 (55689) -YEATS4 (8089) -YJU2 (55702) -YKT6 (10652) -YPEL1 (29799) -YPEL5 (51646) -YRDC (79693) -YTHDC1 (91746) -YY1 (7528) -ZBTB11 (27107) -ZBTB17 (7709) -ZC3H18 (124245) -ZC3H8 (84524) -ZCCHC9 (84240) -ZCRB1 (85437) -ZFC3H1 (196441) -ZFP69B (65243) -ZMAT2 (153527) -ZMAT5 (55954) -ZMYND8 (23613) -ZNF100 (163227) -ZNF131 (7690) -ZNF133 (7692) -ZNF207 (7756) -ZNF236 (7776) -ZNF253 (56242) -ZNF284 (342909) -ZNF317 (57693) -ZNF335 (63925) -ZNF407 (55628) -ZNF468 (90333) -ZNF492 (57615) -ZNF506 (440515) -ZNF525 (170958) -ZNF559 (84527) -ZNF574 (64763) -ZNF658 (26149) -ZNF674 (641339) -ZNF676 (163223) -ZNF687 (57592) -ZNF718 (255403) -ZNF720 (124411) -ZNF738 (148203) -ZNF763 (284390) -ZNF830 (91603) -ZNHIT2 (741) -ZNHIT3 (9326) -ZNHIT6 (54680) -ZNRD1 (30834) -ZPR1 (8882) -ZRANB2 (9406) -ZRSR2 (8233) -ZW10 (9183) -ZWINT (11130) diff --git a/inst/extdata/original_d.HeLa_annot b/inst/extdata/original_d.HeLa_annot new file mode 100644 index 0000000..e4df006 Binary files /dev/null and b/inst/extdata/original_d.HeLa_annot differ diff --git a/inst/rmd/scratch_gimap_annotation_review.Rmd b/inst/rmd/scratch_gimap_annotation_review.Rmd new file mode 100644 index 0000000..3db1b8f --- /dev/null +++ b/inst/rmd/scratch_gimap_annotation_review.Rmd @@ -0,0 +1,101 @@ +--- +title: "gimap_annotation_review" +output: html_document +date: "`r Sys.Date()`" +--- + +```{r} +library(tidyverse) +``` + +## Load in the GI Mapping annotation table + +```{r} +old_annot_df <- readRDS("../extdata/original_d.HeLa_annot") +devtools::load_all() +``` + +## Get the gimap annotation results + +### Get the gimap data setup + +```{r} +gimap_dataset <- get_example_data("gimap") +``` + +### Define the annotation function from the code + +```{r} +gimap_dataset <- gimap_dataset %>% + gimap_annotate() + +new_annot_df <- gimap_dataset$annotation +``` + +## Compare them + +```{r} +nrow(old_annot_df) +nrow(new_annot_df) +``` + +```{r} +joindf <- dplyr::full_join(old_annot_df, new_annot_df, + by = "pgRNA_id", + suffix = c("_old", "_new")) +``` + +```{r} +ggplot(joindf, aes(x = log2_cn_gene1, y = gene1_log2_cn)) + + geom_point() + + ylab("GI_Mapping Output Annotation File: gene1_log2_cn") + + xlab("gimap annotation log2_cn_gene1") + + theme_classic() + + geom_abline(intercept = 0, slope = 1) + +``` + +```{r} +ggplot(joindf, aes(x=log2_cn_gene2, y=gene2_log2_cn)) + + geom_point() + + ylab("GI_Mapping Output Annotation File: gene2_log2_cn") + + xlab("gimap annotation log2_cn_gene2") + + theme_classic() + + geom_abline(intercept = 0, slope = 1) + +``` + +```{r} +ggplot(joindf, aes(x=log2_tpm_gene2, y=gene2_log2_tpm)) + + geom_point() + + ylab("GI_Mapping Output Annotation File: gene2_log2_tpm") + + xlab("gimap annotation log2_tpm_gene2") + + theme_classic() + + geom_abline(intercept = 0, slope = 1) + +``` + +```{r} +ggplot(joindf, aes(x=log2_tpm_gene1, y=gene1_log2_tpm)) + + geom_point() + + ylab("GI_Mapping Output Annotation File: gene1_log2_tpm") + + xlab("gimap annotation log2_tpm_gene1") + + theme_classic() + + geom_abline(intercept = 0, slope = 1) + +``` + +|Which comparison | gimap # NA | GI Mapping # NA | # overlap based on pgRNA ID (using intersect) | +|:--------------:|:------------:|:---------------:|:---------:| +|Gene1 log2 tpm | `r sum(is.na(new_annot_df$log2_tpm_gene1)) +` | `r sum(is.na(old_annot_df$gene1_log2_tpm))` | `r length(intersect(new_annot_df$pgRNA_id[which(is.na(new_annot_df$log2_tpm_gene1))] +, old_annot_df$pgRNA_id[which(is.na(old_annot_df$gene1_log2_tpm))]))` | +|Gene 2 log2 tpm | `r sum(is.na(new_annot_df$log2_tpm_gene2)) +` | `r sum(is.na(old_annot_df$gene2_log2_tpm))` | `r length(intersect(new_annot_df$pgRNA_id[which(is.na(new_annot_df$log2_tpm_gene2))] +, old_annot_df$pgRNA_id[which(is.na(old_annot_df$gene2_log2_tpm))]))` | +|Gene 1 log2 CN | `r sum(is.na(new_annot_df$log2_cn_gene1)) +` | `r sum(is.na(old_annot_df$gene1_log2_cn))` | `r length(intersect(new_annot_df$pgRNA_id[which(is.na(new_annot_df$log2_cn_gene1))] +, old_annot_df$pgRNA_id[which(is.na(old_annot_df$gene1_log2_cn))]))` | +|Gene 2 log2 CN | `r sum(is.na(new_annot_df$log2_cn_gene2)) +` | `r sum(is.na(old_annot_df$gene2_log2_cn))` | `r length(intersect(new_annot_df$pgRNA_id[which(is.na(new_annot_df$log2_cn_gene2))] +, old_annot_df$pgRNA_id[which(is.na(old_annot_df$gene2_log2_cn))]))` | diff --git a/inst/rmd/scratch_gimap_annotation_review.html b/inst/rmd/scratch_gimap_annotation_review.html new file mode 100644 index 0000000..b31b3e1 --- /dev/null +++ b/inst/rmd/scratch_gimap_annotation_review.html @@ -0,0 +1,524 @@ + + + + + + + + + + + + + + +gimap_annotation_review + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
library(tidyverse)
+
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
+## ✔ dplyr     1.1.4     ✔ readr     2.1.5
+## ✔ forcats   1.0.0     ✔ stringr   1.5.1
+## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
+## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
+## ✔ purrr     1.0.2     
+## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
+## ✖ dplyr::filter() masks stats::filter()
+## ✖ dplyr::lag()    masks stats::lag()
+## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
+
+

Load in the GI Mapping annotation table

+
old_annot_df <- readRDS("../extdata/original_d.HeLa_annot")
+devtools::load_all()
+
## ℹ Loading gimap
+
+
+

Get the gimap annotation results

+
+

Get the gimap data setup

+
gimap_dataset <- get_example_data("gimap")
+
+
+

Define the annotation function from the code

+
gimap_dataset <- gimap_dataset %>% 
+  gimap_annotate()
+
+new_annot_df <- gimap_dataset$annotation
+
+
+
+

Compare them

+
nrow(old_annot_df)
+
## [1] 33170
+
nrow(new_annot_df)
+
## [1] 33170
+
joindf <- dplyr::full_join(old_annot_df, new_annot_df, 
+                           by = "pgRNA_id", 
+                           suffix = c("_old", "_new"))
+
ggplot(joindf, aes(x = log2_cn_gene1, y = gene1_log2_cn)) + 
+  geom_point() + 
+  ylab("GI_Mapping Output Annotation File: gene1_log2_cn") + 
+  xlab("gimap annotation log2_cn_gene1") + 
+  theme_classic() + 
+  geom_abline(intercept = 0, slope = 1)
+
## Warning: Removed 9002 rows containing missing values or values outside the scale range
+## (`geom_point()`).
+

+
ggplot(joindf, aes(x=log2_cn_gene2, y=gene2_log2_cn)) + 
+  geom_point() + 
+  ylab("GI_Mapping Output Annotation File: gene2_log2_cn") + 
+  xlab("gimap annotation log2_cn_gene2") + 
+  theme_classic() + 
+  geom_abline(intercept = 0, slope = 1)
+
## Warning: Removed 938 rows containing missing values or values outside the scale range
+## (`geom_point()`).
+

+
ggplot(joindf, aes(x=log2_tpm_gene2, y=gene2_log2_tpm)) + 
+  geom_point() + 
+  ylab("GI_Mapping Output Annotation File: gene2_log2_tpm") + 
+  xlab("gimap annotation log2_tpm_gene2") + 
+  theme_classic() + 
+  geom_abline(intercept = 0, slope = 1)
+
## Warning: Removed 1092 rows containing missing values or values outside the scale range
+## (`geom_point()`).
+

+
ggplot(joindf, aes(x=log2_tpm_gene1, y=gene1_log2_tpm)) + 
+  geom_point() + 
+  ylab("GI_Mapping Output Annotation File: gene1_log2_tpm") + 
+  xlab("gimap annotation log2_tpm_gene1") + 
+  theme_classic() + 
+  geom_abline(intercept = 0, slope = 1)
+
## Warning: Removed 9050 rows containing missing values or values outside the scale range
+## (`geom_point()`).
+

+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Which comparisongimap # NAGI Mapping # NA# overlap based on pgRNA ID (using intersect)
Gene1 log2 tpm9068769625
Gene 2 log2 tpm874750532
Gene 1 log2 CN9068697601
Gene 2 log2 CN874564500
+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/man/annotate.Rd b/man/annotate.Rd deleted file mode 100644 index 8d3cc31..0000000 --- a/man/annotate.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/03-annotate.R -\name{annotate} -\alias{annotate} -\title{This is a title for a function} -\usage{ -annotate() -} -\arguments{ -\item{parameter}{Here's a parameter let's describe it here} -} -\description{ -This is a function here's where we describe what it does -} -\examples{ -\dontrun{ - -} -} diff --git a/man/calc_fc.Rd b/man/calc_fc.Rd deleted file mode 100644 index 4609880..0000000 --- a/man/calc_fc.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/02-foldchange.R -\name{calc_fc} -\alias{calc_fc} -\title{This is a title for a function} -\usage{ -calc_fc() -} -\arguments{ -\item{parameter}{Here's a parameter let's describe it here} -} -\description{ -This is a function here's where we describe what it does -} -\examples{ -\dontrun{ - -} -} diff --git a/man/calc_gi.Rd b/man/calc_gi.Rd new file mode 100644 index 0000000..66e5a3d --- /dev/null +++ b/man/calc_gi.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/05-calculate_gi.R +\name{calc_gi} +\alias{calc_gi} +\title{This is a title for a function} +\usage{ +calc_gi(gimap_dataset) +} +\arguments{ +\item{gimap_dataset}{A special dataset structure that is setup using the `setup_data()` function.} + +\item{.data}{Data can be piped in with %>% or |> from function to function. But the data must still be a gimap_dataset} + +\item{test}{options include 'wilcoxon' and 't-test'. By default, both will be run.} + +\item{overwrite}{default is FALSE; whether to overwrite the QC Report file} + +\item{output_file}{default is `GI_Results`; name of the output GI results file} +} +\description{ +Create results table that has CRISPR scores, Wilcoxon rank-sum test and t tests. +} +\examples{ +\dontrun{ + +gimap_dataset <- get_example_data("gimap") + +# Highly recommended but not required +run_qc(gimap_dataset) + +gimap_dataset <- gimap_dataset \%>\% + gimap_filter() \%>\% + gimap_annotate() \%>\% + calc_lfc() \%>\% + calc_gi() + +} +} diff --git a/man/calc_lfc.Rd b/man/calc_lfc.Rd new file mode 100644 index 0000000..7ffd726 --- /dev/null +++ b/man/calc_lfc.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/04-foldchange.R +\name{calc_lfc} +\alias{calc_lfc} +\title{Calculate log fold change for a} +\usage{ +calc_lfc(.data = NULL, gimap_dataset, replicates = NULL, timepoints = NULL) +} +\arguments{ +\item{.data}{Data can be piped in with %>% or |> from function to function. But the data must still be a gimap_dataset} + +\item{gimap_dataset}{A special dataset structure that is setup using the `setup_data()` function.} + +\item{replicates}{Specifies the column name of the metadata set up in `$metadata$sample_metadata` that has a factor that represents the replicates.} + +\item{timepoints}{Specifies the column name of the metadata set up in `$metadata$sample_metadata` that has a factor that represents the timepoints.} +} +\description{ +This calculates the log fold change for a gimap dataset based on the annotation and metadata provided. +} +\examples{ +\dontrun{ + +gimap_dataset <- get_example_data("gimap") + +# Highly recommended but not required +run_qc(gimap_dataset) + +gimap_dataset <- gimap_dataset \%>\% + gimap_filter() \%>\% + gimap_annotate() \%>\% + calc_lfc() + +# To see results +gimap_dataset$log_fc +} +} diff --git a/man/calculate_gi.Rd b/man/calculate_gi.Rd deleted file mode 100644 index a282eaa..0000000 --- a/man/calculate_gi.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/04-calculate_gi.R -\name{calculate_gi} -\alias{calculate_gi} -\title{This is a title for a function} -\usage{ -calculate_gi() -} -\arguments{ -\item{parameter}{Here's a parameter let's describe it here} -} -\description{ -This is a function here's where we describe what it does -} -\examples{ -\dontrun{ - -} -} diff --git a/man/example_data.Rd b/man/example_data.Rd deleted file mode 100644 index 2b58c30..0000000 --- a/man/example_data.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{example_data} -\alias{example_data} -\title{This is a title for a function} -\usage{ -example_data() -} -\arguments{ -\item{parameter}{Here's a parameter let's describe it here} -} -\description{ -This is a function here's where we describe what it does -} -\examples{ -\dontrun{ - -pg_data <- example_data() -} -} diff --git a/man/example_data_folder.Rd b/man/example_data_folder.Rd new file mode 100644 index 0000000..6b5750a --- /dev/null +++ b/man/example_data_folder.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{example_data_folder} +\alias{example_data_folder} +\title{Get file path to an default credentials RDS} +\usage{ +example_data_folder() +} +\value{ +Returns the file path to folder where the example data is stored +} +\description{ +Get file path to an default credentials RDS +} diff --git a/man/get_example_data.Rd b/man/get_example_data.Rd new file mode 100644 index 0000000..83d0e89 --- /dev/null +++ b/man/get_example_data.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{get_example_data} +\alias{get_example_data} +\title{Returns example data for package} +\usage{ +get_example_data(which_data) +} +\arguments{ +\item{which_data}{options are "count" or "meta"; specifies which example dataset should be returned} +} +\description{ +This function loads and returns example data for the packagae. Which dataset is returned must be specified +} +\examples{ +\dontrun{ + +gimap_dataset <- get_example_data("gimap") +} +} diff --git a/man/gimap_annotate.Rd b/man/gimap_annotate.Rd new file mode 100644 index 0000000..87839ad --- /dev/null +++ b/man/gimap_annotate.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/03-annotate.R +\name{gimap_annotate} +\alias{gimap_annotate} +\title{Annotate gimap data} +\usage{ +gimap_annotate( + .data = NULL, + gimap_dataset, + cell_line = "HELA", + control_genes = NULL, + cn_annotate = TRUE, + annotation_file = NULL +) +} +\arguments{ +\item{.data}{Data can be piped in with %>% or |> from function to function. But the data must still be a gimap_dataset} + +\item{gimap_dataset}{A special dataset structure that is setup using the `setup_data()` function.} + +\item{cell_line}{which cell line are you using? Default is "HELA"} + +\item{control_genes}{A vector of gene symbols (e.g. AAMP) that should be labeled as control genes. These will be used for log fold change calculations. If no list is given then DepMap Public 23Q4 Achilles_common_essentials.csv is used https://depmap.org/portal/download/all/} + +\item{cn_annotate}{TRUE or FALSE you'd also like to have Copy number annotation from DepMap. These data are optional} + +\item{annotation_file}{If no file is given, will attempt to use the design file from https://media.addgene.org/cms/filer_public/a9/9a/a99a9328-324b-42ff-8ccc-30c544b899e4/pgrna_library.xlsx} +} +\description{ +In this function, a `gimap_dataset` is annotated as far as which genes should be used as controls. +} +\examples{ +\dontrun{ + +gimap_dataset <- get_example_data("gimap") + +# Highly recommended but not required +run_qc(gimap_dataset) + +gimap_dataset <- gimap_dataset \%>\% + gimap_filter() \%>\% + gimap_annotate() + +# To see anotations +gimap_dataset$annotation +} +} diff --git a/man/gimap_filter.Rd b/man/gimap_filter.Rd new file mode 100644 index 0000000..be927e7 --- /dev/null +++ b/man/gimap_filter.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/02-filter.R +\name{gimap_filter} +\alias{gimap_filter} +\title{A function to run filtering} +\usage{ +gimap_filter(.data = NULL, gimap_dataset, filter_type = "both") +} +\arguments{ +\item{.data}{Data can be piped in with %>% or |> from function to function. But the data must still be a gimap_dataset} + +\item{gimap_dataset}{A special dataset structure that is setup using the `setup_data()` function.} + +\item{filter_type}{Can be one of the following: `zero_count_only`, `low_plasmid_cpm_only` or `rep_variation`, `zero_in_last_time_point` or a vector that includes multiple of these filters. +You should decide on the appropriate filter based on the results of your QC report.} +} +\value{ +a filtered version of the gimap_dataset returned in the $filtered_data section +} +\description{ +This is a function here's where we describe what it does +} +\examples{ +\dontrun{ + +gimap_dataset <- get_example_data("gimap") + +# Highly recommended but not required +run_qc(gimap_dataset) + + +gimap_dataset <- gimap_filter(gimap_dataset) + +# To see filtered data +gimap_dataset$filtered_data + +} +} diff --git a/man/qc_cdf.Rd b/man/qc_cdf.Rd new file mode 100644 index 0000000..7fcc1f4 --- /dev/null +++ b/man/qc_cdf.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plots-qc.R +\name{qc_cdf} +\alias{qc_cdf} +\title{Create a CDF for the pgRNA normalized counts} +\usage{ +qc_cdf(gimap_dataset, wide_ar = 0.75) +} +\arguments{ +\item{gimap_dataset}{The special gimap_dataset from the `setup_data` function which contains the transformed data} + +\item{wide_ar}{aspect ratio, default is 0.75} + +\item{qc_obj}{The object that has the qc stuff stored} +} +\value{ +counts_cdf a ggplot +} +\description{ +This function uses pivot_longer to rearrange the data for plotting and then plots a CDF of the normalized counts +} +\examples{ +\dontrun{ + +} + +} diff --git a/man/qc_cor_heatmap.Rd b/man/qc_cor_heatmap.Rd new file mode 100644 index 0000000..1e2f5fc --- /dev/null +++ b/man/qc_cor_heatmap.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plots-qc.R +\name{qc_cor_heatmap} +\alias{qc_cor_heatmap} +\title{Create a correlation heatmap for the pgRNA CPMs} +\usage{ +qc_cor_heatmap(gimap_dataset, ...) +} +\arguments{ +\item{gimap_dataset}{The special gimap_dataset from the `setup_data` function which contains the transformed data} + +\item{...}{Additional arguments are passed in to the pheatmap function.} +} +\value{ +`sample_cor_heatmap` a pheatmap +} +\description{ +This function uses the `cor` function to find correlations between the sample CPM's and then plots a heatmap of these +} +\examples{ +\dontrun{ + +} + +} diff --git a/man/qc_plasmid_histogram.Rd b/man/qc_plasmid_histogram.Rd new file mode 100644 index 0000000..7fd6133 --- /dev/null +++ b/man/qc_plasmid_histogram.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plots-qc.R +\name{qc_plasmid_histogram} +\alias{qc_plasmid_histogram} +\title{Create a histogram with plasmid log2 CPM values and ascertain a cutoff for low values} +\usage{ +qc_plasmid_histogram(gimap_dataset, cutoff = NULL, wide_ar = 0.75) +} +\arguments{ +\item{gimap_dataset}{The special gimap_dataset from the `setup_data` function which contains the transformed data} + +\item{cutoff}{default is NULL, the cutoff for low log2 CPM values for the plasmid time period} + +\item{wide_ar}{aspect ratio, default is 0.75} +} +\value{ +a named list +} +\description{ +Find the distribution of plasmid (day0 data) pgRNA log2 CPM values, and ascertain a cutoff or filter for low log2 CPM values. +Assumes the first column of the dataset is the day0 data; do I need a better +method to tell, especially if there are reps? +} diff --git a/man/qc_sample_hist.Rd b/man/qc_sample_hist.Rd new file mode 100644 index 0000000..710e2c9 --- /dev/null +++ b/man/qc_sample_hist.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plots-qc.R +\name{qc_sample_hist} +\alias{qc_sample_hist} +\title{Create a histogram for the pgRNA log2 CPMs, faceted by sample} +\usage{ +qc_sample_hist(gimap_dataset, wide_ar = 0.75) +} +\arguments{ +\item{gimap_dataset}{The special gimap_dataset from the `setup_data` function which contains the transformed data} + +\item{wide_ar}{aspect ratio, default is 0.75} +} +\value{ +sample_cpm_histogram a ggplot +} +\description{ +This function uses pivot_longer to rearrange the data for plotting and then plots sample specific histograms of the pgRNA cpm's +} +\examples{ +\dontrun{ + +} + +} diff --git a/man/run_qc.Rd b/man/run_qc.Rd index 533be4d..36ed657 100644 --- a/man/run_qc.Rd +++ b/man/run_qc.Rd @@ -2,18 +2,38 @@ % Please edit documentation in R/01-qc.R \name{run_qc} \alias{run_qc} -\title{This is a title for a function} +\title{Run Quality Control Checks} \usage{ -run_qc() +run_qc( + gimap_dataset, + output_file = "./gimap_QC_Report.Rmd", + plots_dir = "./qc_plots", + overwrite = FALSE, + ... +) } \arguments{ -\item{parameter}{Here's a parameter let's describe it here} +\item{gimap_dataset}{A special dataset structure that is setup using the `setup_data()` function.} + +\item{output_file}{default is `QC_Report`; name of the output QC report file} + +\item{plots_dir}{default is `./qc_plots`; directory to save plots created with this function, if it doesn't exist already it will be created} + +\item{overwrite}{default is FALSE; whether to overwrite the QC Report file} + +\item{...}{additional parameters are sent to `rmarkdown::render()`} +} +\value{ +a QC report saved locally } \description{ -This is a function here's where we describe what it does +This function takes a `gimap_dataset` and creates a QC report } \examples{ \dontrun{ +gimap_dataset <- get_example_data("gimap") + +run_qc(gimap_dataset) } } diff --git a/man/setup_data.Rd b/man/setup_data.Rd index 9b2e945..b2c6319 100644 --- a/man/setup_data.Rd +++ b/man/setup_data.Rd @@ -4,24 +4,40 @@ \alias{setup_data} \title{Making a new gimap dataset} \usage{ -setup_data(counts = NULL, pg_metadata = NULL, sample_metadata = NULL) +setup_data( + counts = NULL, + pg_ids = NULL, + pg_metadata = NULL, + sample_metadata = NULL +) } \arguments{ \item{counts}{a matrix of data that contains the counts where rows are each paired_guide target and columns are each sample} -\item{pg_metadata}{metadata associated with the pgRNA constructs that correspond to the rows of the counts data} +\item{pg_ids}{the pgRNA IDs: metadata associated with the pgRNA constructs that correspond to the rows of the counts data} -\item{sample_metadata}{metadata associated with the samples of the dataset that correspond to the columns of the counts data} +\item{pg_metadata}{construct metadata} + +\item{sample_metadata}{metadata associated with the samples of the dataset that correspond to the columns of the counts data. +Should include a column that has replicate information as well as a column that contains timepoint information respectively (this will be used for log fold calculations). These columns should be factors.} +} +\value{ +A special gimap_dataset to be used with the other functions in this package. } \description{ This function allows people to have their data ready to be processed by gimap } \examples{ \dontrun{ -data <- example_data() \%>\% + +example_counts <- get_example_data("count") \%>\% dplyr::select(c("Day00_RepA", "Day05_RepA", "Day22_RepA", "Day22_RepB", "Day22_RepC")) \%>\% as.matrix() -counts_data <- setup_data(data) +gimap_dataset <- setup_data(counts = example_counts_data) + +# You can see what an example dataset looks like by pulling the example gimap_dataset: +gimap_dataset <- get_example_data("gimap") + } }