From cf6c6e77995913e0cbca565ba7310a33bba5cd22 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 16:05:35 -0400 Subject: [PATCH 01/42] Update filter_calls.R --- R/filter_calls.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/filter_calls.R b/R/filter_calls.R index e1a8ab0..c06e3e3 100644 --- a/R/filter_calls.R +++ b/R/filter_calls.R @@ -39,7 +39,7 @@ filter_calls = function( # for each patient produce the correct results ---------------------------- # x <- unique(master.ref$cmo_patient_id)[1] - all.fillout.dim <- lapply(unique(master.ref$cmo_patient_id),function(x){ + all.fillout.dim <- lapply(unique(master.ref[cmo_patient_id=='C-PXVUM9']$cmo_patient_id),function(x){ print(paste0('Processing patient ',x)) # Inputs and sanity checks ------------------------------------------------ fillouts.filenames <- list.files(paste0(results.dir,'/',x,'/'),'ORG-STD_genotyped.maf|ORG-SIMPLEX-DUPLEX_genotyped.maf',full.names = T) @@ -156,7 +156,8 @@ filter_calls = function( # final processing -------------------------------------------------------- # Save only the useful column - fillouts.dt <- fillouts.dt[DMP == 'Signed out' | fillouts.dt[,apply(.SD,1,function(x){any(x == 'Called')})]] + fillouts.dt <- fillouts.dt[DMP == 'Signed out' | fillouts.dt[,apply(.SD,1,function(x){any(x == 'Called')})]] + print(fillout.dt) # combining duplex and simplex counts lapply(plasma.samples,function(tmp.col.name){ # hotspot reads From e4038ac0995842966933a4ef4eb125c150d2f35a Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 16:08:30 -0400 Subject: [PATCH 02/42] Update filter_calls.R --- R/filter_calls.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/filter_calls.R b/R/filter_calls.R index c06e3e3..8c5e4fa 100644 --- a/R/filter_calls.R +++ b/R/filter_calls.R @@ -157,7 +157,7 @@ filter_calls = function( # final processing -------------------------------------------------------- # Save only the useful column fillouts.dt <- fillouts.dt[DMP == 'Signed out' | fillouts.dt[,apply(.SD,1,function(x){any(x == 'Called')})]] - print(fillout.dt) + print(fillouts.dt) # combining duplex and simplex counts lapply(plasma.samples,function(tmp.col.name){ # hotspot reads From 62a9c15e7947ea6cd0d2dd088002a12df7ec2a83 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 16:10:04 -0400 Subject: [PATCH 03/42] Update filter_calls.R --- R/filter_calls.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/filter_calls.R b/R/filter_calls.R index 8c5e4fa..0bd44f0 100644 --- a/R/filter_calls.R +++ b/R/filter_calls.R @@ -156,6 +156,8 @@ filter_calls = function( # final processing -------------------------------------------------------- # Save only the useful column + print(fillouts.dt) + print("#######") fillouts.dt <- fillouts.dt[DMP == 'Signed out' | fillouts.dt[,apply(.SD,1,function(x){any(x == 'Called')})]] print(fillouts.dt) # combining duplex and simplex counts From e6603dd290d69ceb368c81187247d3fa24691135 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 16:23:28 -0400 Subject: [PATCH 04/42] Update filter_calls.R --- R/filter_calls.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/filter_calls.R b/R/filter_calls.R index 0bd44f0..7941ec7 100644 --- a/R/filter_calls.R +++ b/R/filter_calls.R @@ -137,16 +137,16 @@ filter_calls = function( if(all(!c('unfilterednormal','normal_DMP') %in% sample.sheet$Sample_Type)){ tmp.col.name <- plasma.samples[1] lapply(plasma.samples,function(tmp.col.name){ - fillouts.dt[as.numeric(gsub("\\(|\\)",'',str_extract(get(tmp.col.name),"\\(.*.\\)"))) >= 0.3 | ExAC_AF >= 0.0001,eval(paste0(tmp.col.name,'.called')) := 'Not Called'] + #fillouts.dt[as.numeric(gsub("\\(|\\)",'',str_extract(get(tmp.col.name),"\\(.*.\\)"))) >= 0.3 | ExAC_AF >= 0.0001,eval(paste0(tmp.col.name,'.called')) := 'Not Called'] fillouts.dt[get(tmp.col.name) == '0/0(NaN)',eval(paste0(tmp.col.name,'.called')) := 'Not Covered'] }) }else{ lapply(plasma.samples,function(tmp.col.name){ lapply(normal.samples,function(tmp.col.name.normal){ # duplex tvar/nvar > 5 - fillouts.dt[(as.numeric(gsub("\\(|\\)",'',str_extract(get(tmp.col.name),"\\(.*.\\)")))/as.numeric(gsub("\\(|\\)",'',str_extract(get(tmp.col.name.normal),"\\(.*.\\)"))) < 5) | + fillouts.dt[(as.numeric(gsub("\\(|\\)",'',str_extract(get(tmp.col.name),"\\(.*.\\)")))/as.numeric(gsub("\\(|\\)",'',str_extract(get(tmp.col.name.normal),"\\(.*.\\)"))) < 2) | # if duplex have no reads, use simplex tvar - (as.numeric(gsub("\\(|\\)",'',str_extract(get(gsub('duplex','simplex',tmp.col.name)),"\\(.*.\\)")))/as.numeric(gsub("\\(|\\)",'',str_extract(get(tmp.col.name.normal),"\\(.*.\\)"))) < 5 & + (as.numeric(gsub("\\(|\\)",'',str_extract(get(gsub('duplex','simplex',tmp.col.name)),"\\(.*.\\)")))/as.numeric(gsub("\\(|\\)",'',str_extract(get(tmp.col.name.normal),"\\(.*.\\)"))) < 2 & as.numeric(gsub("/.*.$",'',get(tmp.col.name))) == 0), eval(paste0(tmp.col.name,'.called')) := 'Not Called'] fillouts.dt[get(tmp.col.name) == '0/0(NaN)',eval(paste0(tmp.col.name,'.called')) := 'Not Covered'] From e4b4e965ba037f1c6003e393de66a827d9bc648e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 16:25:16 -0400 Subject: [PATCH 05/42] Update filter_calls.R --- R/filter_calls.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/filter_calls.R b/R/filter_calls.R index 7941ec7..539b11a 100644 --- a/R/filter_calls.R +++ b/R/filter_calls.R @@ -39,7 +39,7 @@ filter_calls = function( # for each patient produce the correct results ---------------------------- # x <- unique(master.ref$cmo_patient_id)[1] - all.fillout.dim <- lapply(unique(master.ref[cmo_patient_id=='C-PXVUM9']$cmo_patient_id),function(x){ + all.fillout.dim <- lapply(unique(master.ref$cmo_patient_id),function(x){ print(paste0('Processing patient ',x)) # Inputs and sanity checks ------------------------------------------------ fillouts.filenames <- list.files(paste0(results.dir,'/',x,'/'),'ORG-STD_genotyped.maf|ORG-SIMPLEX-DUPLEX_genotyped.maf',full.names = T) @@ -156,10 +156,10 @@ filter_calls = function( # final processing -------------------------------------------------------- # Save only the useful column - print(fillouts.dt) - print("#######") + #print(fillouts.dt) + #print("#######") fillouts.dt <- fillouts.dt[DMP == 'Signed out' | fillouts.dt[,apply(.SD,1,function(x){any(x == 'Called')})]] - print(fillouts.dt) + #print(fillouts.dt) # combining duplex and simplex counts lapply(plasma.samples,function(tmp.col.name){ # hotspot reads From 03e8bd64f54d53943738ad82239e7e844966e455 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 16:38:40 -0400 Subject: [PATCH 06/42] Update plot_all_events.R --- R/plot_all_events.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 759fe3c..47324e8 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -192,9 +192,9 @@ plot_all_events = function( color = paste0(Hugo_Symbol,' ',ifelse(grepl('^p\\.',HGVSp_Short),HGVSp_Short,'')),group = paste0(Hugo_Symbol,'_',HGVSp_Short))) + geom_point(aes(x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count/t_total_count)), color = paste0(Hugo_Symbol,' ',ifelse(grepl('^p\\.',HGVSp_Short),HGVSp_Short,'')),shape = call_confidence),size = 1.5) + - labs(title=x,x='Time Point', y='VAF') + #scale_x_date(date_labels = "%Y %b %d") + + labs(title=x,x='Time Point', y='log10(VAF)') + #scale_x_date(date_labels = "%Y %b %d") + scale_shape_manual(values=status_id,name = 'Call Status') + scale_color_manual(values = getPalette(colourCount),name = 'Alteration') + - theme_minimal() + scale_y_log10() + + theme_minimal() + scale_y_log10() + scale_x_discrete() theme(panel.grid.major = element_blank(),legend.position="top",legend.box = "vertical", axis.text.x = element_text(angle=45, hjust=1, face = 'bold')) print(SNV.SV.plot) From bebba59cccf947389e1a94858a4b5b6b4c0a64de Mon Sep 17 00:00:00 2001 From: Youyun Zheng Date: Tue, 7 Jul 2020 16:41:42 -0400 Subject: [PATCH 07/42] Update plot_all_events.R adding changes to specify breaks on x axis when dealing w date variables --- R/plot_all_events.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 47324e8..a2782cd 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -194,7 +194,7 @@ plot_all_events = function( color = paste0(Hugo_Symbol,' ',ifelse(grepl('^p\\.',HGVSp_Short),HGVSp_Short,'')),shape = call_confidence),size = 1.5) + labs(title=x,x='Time Point', y='log10(VAF)') + #scale_x_date(date_labels = "%Y %b %d") + scale_shape_manual(values=status_id,name = 'Call Status') + scale_color_manual(values = getPalette(colourCount),name = 'Alteration') + - theme_minimal() + scale_y_log10() + scale_x_discrete() + theme_minimal() + scale_y_log10() + scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + theme(panel.grid.major = element_blank(),legend.position="top",legend.box = "vertical", axis.text.x = element_text(angle=45, hjust=1, face = 'bold')) print(SNV.SV.plot) @@ -210,7 +210,7 @@ plot_all_events = function( getPalette = colorRampPalette(brewer.pal(8, "Set2")) CNA.plot = ggplot(tmp.cna) + geom_bar(aes(x = Tumor_Sample_Barcode,y = abs(fc),fill = paste0(Hugo_Symbol,'_',CNA)),position="dodge", stat="identity") + - labs(x='Time Point', y='Absolute fc') + #scale_x_date(date_labels = "%Y %b %d") + + labs(x='Time Point', y='Absolute fc') + scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + scale_fill_manual(values = getPalette(colourCount),name = 'Alteration') + theme_minimal() + theme(panel.grid.major = element_blank(),legend.position="bottom",axis.text.x = element_text(angle=45, hjust=1,face = 'bold')) print(CNA.plot) From 8e0086eb7ff4e4839895115cbb204916bb1e4990 Mon Sep 17 00:00:00 2001 From: Youyun Zheng Date: Tue, 7 Jul 2020 16:57:41 -0400 Subject: [PATCH 08/42] Update plot_all_events.R hopefullly this works --- R/plot_all_events.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index a2782cd..262dfcd 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -178,6 +178,8 @@ plot_all_events = function( print(transform.vector) } tmp.table$Tumor_Sample_Barcode = transform.vector[tmp.table$Tumor_Sample_Barcode] + factor.levels = sort(tmp.table$Tumor_Sample_Barcode) + tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) if(nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)){ print('skiping to the next') @@ -204,7 +206,9 @@ plot_all_events = function( # expand table on all empty samples without any calls data.table() %>% dcast.data.table(Hugo_Symbol + CNA ~ Tumor_Sample_Barcode,drop = c(TRUE, FALSE),fill = 0,value.var = 'fc') %>% melt.data.table(id.vars = c('Hugo_Symbol','CNA'),variable.name = 'Tumor_Sample_Barcode',value.name = 'fc') %>% data.table() - tmp.cna$Tumor_Sample_Barcode = transform.vector[tmp.cna$Tumor_Sample_Barcode] + tmp.table$Tumor_Sample_Barcode = transform.vector[tmp.table$Tumor_Sample_Barcode] + factor.levels = sort(tmp.table$Tumor_Sample_Barcode) + tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) colourCount = nrow(unique(tmp.cna[,.(Hugo_Symbol,CNA)])) getPalette = colorRampPalette(brewer.pal(8, "Set2")) From 22381be542495c1fde085252097c3e118331197d Mon Sep 17 00:00:00 2001 From: Youyun Zheng Date: Tue, 7 Jul 2020 17:00:45 -0400 Subject: [PATCH 09/42] Update plot_all_events.R --- R/plot_all_events.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 262dfcd..64db1b9 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -178,7 +178,7 @@ plot_all_events = function( print(transform.vector) } tmp.table$Tumor_Sample_Barcode = transform.vector[tmp.table$Tumor_Sample_Barcode] - factor.levels = sort(tmp.table$Tumor_Sample_Barcode) + factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) if(nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)){ @@ -207,7 +207,7 @@ plot_all_events = function( data.table() %>% dcast.data.table(Hugo_Symbol + CNA ~ Tumor_Sample_Barcode,drop = c(TRUE, FALSE),fill = 0,value.var = 'fc') %>% melt.data.table(id.vars = c('Hugo_Symbol','CNA'),variable.name = 'Tumor_Sample_Barcode',value.name = 'fc') %>% data.table() tmp.table$Tumor_Sample_Barcode = transform.vector[tmp.table$Tumor_Sample_Barcode] - factor.levels = sort(tmp.table$Tumor_Sample_Barcode) + factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) colourCount = nrow(unique(tmp.cna[,.(Hugo_Symbol,CNA)])) From 6a694bc1c88de801b1684841fc09ff2ec53b7256 Mon Sep 17 00:00:00 2001 From: Youyun Zheng Date: Tue, 7 Jul 2020 17:05:06 -0400 Subject: [PATCH 10/42] Update plot_all_events.R try just scale x date --- R/plot_all_events.R | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 64db1b9..821069c 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -178,8 +178,9 @@ plot_all_events = function( print(transform.vector) } tmp.table$Tumor_Sample_Barcode = transform.vector[tmp.table$Tumor_Sample_Barcode] - factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) - tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) + #factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) + #print(factor.levels) + #tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) if(nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)){ print('skiping to the next') @@ -194,9 +195,9 @@ plot_all_events = function( color = paste0(Hugo_Symbol,' ',ifelse(grepl('^p\\.',HGVSp_Short),HGVSp_Short,'')),group = paste0(Hugo_Symbol,'_',HGVSp_Short))) + geom_point(aes(x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count/t_total_count)), color = paste0(Hugo_Symbol,' ',ifelse(grepl('^p\\.',HGVSp_Short),HGVSp_Short,'')),shape = call_confidence),size = 1.5) + - labs(title=x,x='Time Point', y='log10(VAF)') + #scale_x_date(date_labels = "%Y %b %d") + + labs(title=x,x='Time Point', y='log10(VAF)') + scale_x_date(date_labels = "%Y %b %d",breaks = sort(unique(tmp.table$Tumor_Sample_Barocde))) + scale_shape_manual(values=status_id,name = 'Call Status') + scale_color_manual(values = getPalette(colourCount),name = 'Alteration') + - theme_minimal() + scale_y_log10() + scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + + theme_minimal() + scale_y_log10() + #scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + theme(panel.grid.major = element_blank(),legend.position="top",legend.box = "vertical", axis.text.x = element_text(angle=45, hjust=1, face = 'bold')) print(SNV.SV.plot) @@ -207,14 +208,15 @@ plot_all_events = function( data.table() %>% dcast.data.table(Hugo_Symbol + CNA ~ Tumor_Sample_Barcode,drop = c(TRUE, FALSE),fill = 0,value.var = 'fc') %>% melt.data.table(id.vars = c('Hugo_Symbol','CNA'),variable.name = 'Tumor_Sample_Barcode',value.name = 'fc') %>% data.table() tmp.table$Tumor_Sample_Barcode = transform.vector[tmp.table$Tumor_Sample_Barcode] - factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) - tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) + #factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) + #tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) colourCount = nrow(unique(tmp.cna[,.(Hugo_Symbol,CNA)])) getPalette = colorRampPalette(brewer.pal(8, "Set2")) CNA.plot = ggplot(tmp.cna) + geom_bar(aes(x = Tumor_Sample_Barcode,y = abs(fc),fill = paste0(Hugo_Symbol,'_',CNA)),position="dodge", stat="identity") + - labs(x='Time Point', y='Absolute fc') + scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + + labs(x='Time Point', y='Absolute fc') + scale_x_date(date_labels = "%Y %b %d",breaks = sort(unique(tmp.table$Tumor_Sample_Barocde))) + + #scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + scale_fill_manual(values = getPalette(colourCount),name = 'Alteration') + theme_minimal() + theme(panel.grid.major = element_blank(),legend.position="bottom",axis.text.x = element_text(angle=45, hjust=1,face = 'bold')) print(CNA.plot) From da64bd86cf71a240818faaf282e97485ccd9cc34 Mon Sep 17 00:00:00 2001 From: Youyun Zheng Date: Tue, 7 Jul 2020 17:05:39 -0400 Subject: [PATCH 11/42] Update plot_all_events.R --- R/plot_all_events.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 821069c..f82c25b 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -178,8 +178,8 @@ plot_all_events = function( print(transform.vector) } tmp.table$Tumor_Sample_Barcode = transform.vector[tmp.table$Tumor_Sample_Barcode] - #factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) - #print(factor.levels) + factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) + print(factor.levels) #tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) if(nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)){ From 230b421d8695915bfc81ce83ef6bdf0be269a117 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 22:30:24 -0400 Subject: [PATCH 12/42] Update plot_all_events.R --- R/plot_all_events.R | 370 +++++++++++++++++++++++++------------------- 1 file changed, 207 insertions(+), 163 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index f82c25b..9ac3993 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -10,222 +10,266 @@ # helper methods ---------------------------------------------------------- # collapsing read counts from the same rearrangement events into one total count -collapse_AF = function(x){ +collapse_AF <- function(x) { # x = c("32-117-190(0.7842)|53-0-959(0.0553)","NA|16-0-1035(0.0155)","63-0-954(0.066)|NA" ) # x = c('3-5-1300') # x = c('NA|NA|NA|0-56-4183','NA|NA|NA|3-4-76') - print(paste0(x,collapse = "','")) + print(paste0(x, collapse = "','")) # number of samples - samples.num = attr(gregexpr('\\|',x[1])[[1]],"match.length") + samples.num <- attr(gregexpr("\\|", x[1])[[1]], "match.length") print(samples.num) # when not found, return -1 - if(samples.num != -1){ - paste0(round(apply(separate(data.frame(AF = x),'AF',paste0('timpoint_',c(1:(length(samples.num)+1))),sep = '\\|'),2,function(one.tp.afs){ - mean(unlist(lapply(one.tp.afs,function(tmp.af){ - if(tmp.af == 'NA'){return(0)} - else{ - read.counts = as.numeric(str_split(gsub('\\(.*','',tmp.af),'-')[[1]]) - return((read.counts[1]+read.counts[2])/read.counts[3]) + if (samples.num != -1) { + paste0(round(apply(separate(data.frame(AF = x), "AF", paste0("timpoint_", c(1:(length(samples.num) + 1))), sep = "\\|"), 2, function(one.tp.afs) { + mean(unlist(lapply(one.tp.afs, function(tmp.af) { + if (tmp.af == "NA") { + return(0) + } + else { + read.counts <- as.numeric(str_split(gsub("\\(.*", "", tmp.af), "-")[[1]]) + return((read.counts[1] + read.counts[2]) / read.counts[3]) } }))) - }),digits = 3),collapse = '|') - }else{ + }), digits = 3), collapse = "|") + } else { print(x) - as.character(round(mean(unlist(lapply(x,function(tmp.af){ - if(tmp.af == 'NA'){return(0)} - else{ - read.counts = as.numeric(str_split(gsub('\\(.*','',tmp.af),'-')[[1]]) - return((read.counts[1]+read.counts[2])/read.counts[3]) + as.character(round(mean(unlist(lapply(x, function(tmp.af) { + if (tmp.af == "NA") { + return(0) + } + else { + read.counts <- as.numeric(str_split(gsub("\\(.*", "", tmp.af), "-")[[1]]) + return((read.counts[1] + read.counts[2]) / read.counts[3]) } - }))),digits = 3)) + }))), digits = 3)) } } # convert naming to timepoint, get rid of uncovered impact and access calls -process_maf_for_graph = function(tmp.maf){ - print('convert naming to timepoint, get rid of uncovered impact and access calls') +process_maf_for_graph <- function(tmp.maf) { + print("convert naming to timepoint, get rid of uncovered impact and access calls") # tmp.maf = ret.054.calls # tumor sample - tumor.sample = structure(gsub('-','',str_extract(unique(tmp.maf$Tumor_Sample_Barcode[grep('IM[0-9]$',tmp.maf$Tumor_Sample_Barcode)]),'-T..-')), - names = as.character(unique(tmp.maf$Tumor_Sample_Barcode[grep('IM[0-9]$',tmp.maf$Tumor_Sample_Barcode)]))) + tumor.sample <- structure(gsub("-", "", str_extract(unique(tmp.maf$Tumor_Sample_Barcode[grep("IM[0-9]$", tmp.maf$Tumor_Sample_Barcode)]), "-T..-")), + names = as.character(unique(tmp.maf$Tumor_Sample_Barcode[grep("IM[0-9]$", tmp.maf$Tumor_Sample_Barcode)])) + ) print(tumor.sample) # rest of the samples are plasma - plasma.sample = setdiff(tmp.maf$Tumor_Sample_Barcode,names(tumor.sample)) + plasma.sample <- setdiff(tmp.maf$Tumor_Sample_Barcode, names(tumor.sample)) # filter for plasma sample only - tmp.maf = tmp.maf[Tumor_Sample_Barcode %in% plasma.sample] + tmp.maf <- tmp.maf[Tumor_Sample_Barcode %in% plasma.sample] # change samples into timepoint information - plasma.sample = structure(case_when( + plasma.sample <- structure(case_when( # some of the DA-ret sample need to be renamed - grepl('-T0._',plasma.sample) ~ gsub('-|_','',gsub('T','L0',str_extract(plasma.sample,'-T0._'))), + grepl("-T0._", plasma.sample) ~ gsub("-|_", "", gsub("T", "L0", str_extract(plasma.sample, "-T0._"))), # otherwise extract L00 something - TRUE ~ gsub('-','',str_extract(plasma.sample,'-L...-')) - ),names = plasma.sample) + TRUE ~ gsub("-", "", str_extract(plasma.sample, "-L...-")) + ), names = plasma.sample) print(plasma.sample) - sample.name.conversion = c(tumor.sample,plasma.sample) + sample.name.conversion <- c(tumor.sample, plasma.sample) print(sample.name.conversion) # get all not covered calls - not.covered.df = unique(tmp.maf[call_confidence == 'Not Covered',.N,.(Hugo_Symbol,Chromosome,Start_Position,End_Position,Variant_Classification, - HGVSp_Short,Reference_Allele,Tumor_Seq_Allele2)])[N > length(plasma.sample)/2] - only.covered.tmp.maf = anti_join(tmp.maf,not.covered.df,by = c('Hugo_Symbol','Chromosome','Start_Position','End_Position','Variant_Classification', - 'HGVSp_Short','Reference_Allele','Tumor_Seq_Allele2')) %>% data.table() + not.covered.df <- unique(tmp.maf[call_confidence == "Not Covered", .N, .( + Hugo_Symbol, Chromosome, Start_Position, End_Position, Variant_Classification, + HGVSp_Short, Reference_Allele, Tumor_Seq_Allele2 + )])[N > length(plasma.sample) / 2] + only.covered.tmp.maf <- anti_join(tmp.maf, not.covered.df, by = c( + "Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Variant_Classification", + "HGVSp_Short", "Reference_Allele", "Tumor_Seq_Allele2" + )) %>% data.table() # only the converted timepoint names # only.covered.tmp.maf$Tumor_Sample_Barcode = sample.name.conversion[as.character(only.covered.tmp.maf$Tumor_Sample_Barcode)] - if(any(grepl('__',only.covered.tmp.maf$Hugo_Symbol))){ - fusion.only.covered.tmp.maf = data.table(only.covered.tmp.maf)[grepl('__',Hugo_Symbol)] + if (any(grepl("__", only.covered.tmp.maf$Hugo_Symbol))) { + fusion.only.covered.tmp.maf <- data.table(only.covered.tmp.maf)[grepl("__", Hugo_Symbol)] # process original hugo_symbol column (sort two genes by name) - fusion.only.covered.tmp.maf$Hugo_Symbol = unlist(lapply(fusion.only.covered.tmp.maf$Hugo_Symbol,function(x){paste0(sort(str_split(x,'__')[[1]]),collapse = '-')})) - fusion.only.covered.tmp.maf$Chromosome = unlist(lapply(fusion.only.covered.tmp.maf$Chromosome,function(x){paste0(sort(str_split(x,'__')[[1]]),collapse = '-')})) + fusion.only.covered.tmp.maf$Hugo_Symbol <- unlist(lapply(fusion.only.covered.tmp.maf$Hugo_Symbol, function(x) { + paste0(sort(str_split(x, "__")[[1]]), collapse = "-") + })) + fusion.only.covered.tmp.maf$Chromosome <- unlist(lapply(fusion.only.covered.tmp.maf$Chromosome, function(x) { + paste0(sort(str_split(x, "__")[[1]]), collapse = "-") + })) # collapsing AF for rows of the same events (i.e. reciprocal rearrangement) while perserving the sample level seaparation in AF - fusion.only.covered.tmp.maf = fusion.only.covered.tmp.maf[,.(Start_Position = Start_Position[1],End_Position = End_Position[1],HGVSp_Short = HGVSp_Short[1], - Reference_Allele = Reference_Allele[1], Tumor_Seq_Allele2 = Tumor_Seq_Allele2[1], - ExAC_AF = ExAC_AF[1], Hotspot = Hotspot[1], DMP = DMP[1], duplex_support_num = duplex_support_num[1], - call_confidence = ifelse(any(call_confidence == 'Called'),'Called','Not Called'), - call_info = paste0(call_info,collapse = ' | '),CH = 'No', - t_alt_count = sum(t_alt_count,na.rm = T),t_total_count = sum(t_total_count,na.rm = T)), - .(Hugo_Symbol,Chromosome,Variant_Classification,Tumor_Sample_Barcode)] - only.covered.tmp.maf = only.covered.tmp.maf[-grep('__',Hugo_Symbol)] - only.covered.tmp.maf = rbind(only.covered.tmp.maf,fusion.only.covered.tmp.maf) + fusion.only.covered.tmp.maf <- fusion.only.covered.tmp.maf[ + , .( + Start_Position = Start_Position[1], End_Position = End_Position[1], HGVSp_Short = HGVSp_Short[1], + Reference_Allele = Reference_Allele[1], Tumor_Seq_Allele2 = Tumor_Seq_Allele2[1], + ExAC_AF = ExAC_AF[1], Hotspot = Hotspot[1], DMP = DMP[1], duplex_support_num = duplex_support_num[1], + call_confidence = ifelse(any(call_confidence == "Called"), "Called", "Not Called"), + call_info = paste0(call_info, collapse = " | "), CH = "No", + t_alt_count = sum(t_alt_count, na.rm = T), t_total_count = sum(t_total_count, na.rm = T) + ), + .(Hugo_Symbol, Chromosome, Variant_Classification, Tumor_Sample_Barcode) + ] + only.covered.tmp.maf <- only.covered.tmp.maf[-grep("__", Hugo_Symbol)] + only.covered.tmp.maf <- rbind(only.covered.tmp.maf, fusion.only.covered.tmp.maf) } - only.covered.tmp.maf$t_alt_count = ifelse(is.na(only.covered.tmp.maf$t_alt_count),0,only.covered.tmp.maf$t_alt_count) - only.covered.tmp.maf$t_total_count = ifelse(is.na(only.covered.tmp.maf$t_total_count),0,only.covered.tmp.maf$t_total_count) + only.covered.tmp.maf$t_alt_count <- ifelse(is.na(only.covered.tmp.maf$t_alt_count), 0, only.covered.tmp.maf$t_alt_count) + only.covered.tmp.maf$t_total_count <- ifelse(is.na(only.covered.tmp.maf$t_total_count), 0, only.covered.tmp.maf$t_total_count) return(only.covered.tmp.maf) } # melting genotype tables into maf-like format -table_to_maf = function(tmp.table,sample.table){ +table_to_maf <- function(tmp.table, sample.table) { # tmp.table = fillouts.dt # sample.table = sample.sheet # tmp.table = ret.006.table # sample.table = ret.006.sample.sheet # extract information for plasma and tumor - tmp.table = data.table(tmp.table) - lapply(sample.table[Sample_Type %in% c('duplex')]$Sample_Barcode,function(y){ - sample.call.status.colname = paste0(y,'___duplex.called') - sample.af.colname = paste0(y,'___total') - tmp.table[,eval(y) := paste0(get(sample.call.status.colname),' | ',get(sample.af.colname))] + tmp.table <- data.table(tmp.table) + lapply(sample.table[Sample_Type %in% c("duplex")]$Sample_Barcode, function(y) { + sample.call.status.colname <- paste0(y, "___duplex.called") + sample.af.colname <- paste0(y, "___total") + tmp.table[, eval(y) := paste0(get(sample.call.status.colname), " | ", get(sample.af.colname))] }) - lapply(sample.table[Sample_Type %in% c('DMP_Tumor')]$Sample_Barcode,function(y){ - tmp.table[,eval(y) := paste0(case_when( - !is.na(get('DMP')) & get(paste0(sample.table[Sample_Type %in% c('duplex')]$Sample_Barcode[1],'___duplex.called')) != 'Not Covered' ~ 'Called', - !is.na(get('DMP')) & get(paste0(sample.table[Sample_Type %in% c('duplex')]$Sample_Barcode[1],'___duplex.called')) == 'Not Covered' ~ 'Called (but not covered in ACCESS)', - is.na(get('DMP')) & as.numeric(gsub('/.*','',get(paste0(y,'___DMP_Tumor')))) > 3 ~ 'Genotyped', - TRUE ~ 'Not Called' - ),' | ',get(paste0(y,'___DMP_Tumor')))] + lapply(sample.table[Sample_Type %in% c("DMP_Tumor")]$Sample_Barcode, function(y) { + tmp.table[, eval(y) := paste0(case_when( + !is.na(get("DMP")) & get(paste0(sample.table[Sample_Type %in% c("duplex")]$Sample_Barcode[1], "___duplex.called")) != "Not Covered" ~ "Called", + !is.na(get("DMP")) & get(paste0(sample.table[Sample_Type %in% c("duplex")]$Sample_Barcode[1], "___duplex.called")) == "Not Covered" ~ "Called (but not covered in ACCESS)", + is.na(get("DMP")) & as.numeric(gsub("/.*", "", get(paste0(y, "___DMP_Tumor")))) > 3 ~ "Genotyped", + TRUE ~ "Not Called" + ), " | ", get(paste0(y, "___DMP_Tumor")))] }) - processed.tmp.table = tmp.table[,!grep('___',colnames(tmp.table)),with = F] %>% + processed.tmp.table <- tmp.table[, !grep("___", colnames(tmp.table)), with = F] %>% # melting data frame by tumor samples - melt(id.vars = c('Hugo_Symbol','Chromosome','Start_Position','End_Position','Variant_Classification','HGVSp_Short', - 'Reference_Allele','Tumor_Seq_Allele2','ExAC_AF','Hotspot','DMP','duplex_support_num','call_confidence','CH'), - variable.name = "Tumor_Sample_Barcode", value.name = "call_info") %>% - mutate(call_confidence = gsub(' \\| ','',str_extract(call_info,'.*.\\| ')),call_info = gsub('.*.\\| ','',call_info)) %>% rowwise() %>% - mutate(t_alt_count = ifelse(grepl('-[0-9]+-',call_info), - # SV parsing - sum(as.numeric(str_split(call_info,'-|\\(')[[1]][1:2])), - # SNV parsing - as.numeric(gsub(' |\\/.*.','',call_info))), - t_total_count = ifelse(grepl('-[0-9]+-',call_info), - # SV parsing - as.numeric(str_split(call_info,'-|\\(')[[1]][3]), - # SNV parsing - as.numeric(gsub('.*.\\/|\\(.*.','',call_info)))) %>% data.table() + melt( + id.vars = c( + "Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Variant_Classification", "HGVSp_Short", + "Reference_Allele", "Tumor_Seq_Allele2", "ExAC_AF", "Hotspot", "DMP", "duplex_support_num", "call_confidence", "CH" + ), + variable.name = "Tumor_Sample_Barcode", value.name = "call_info" + ) %>% + mutate(call_confidence = gsub(" \\| ", "", str_extract(call_info, ".*.\\| ")), call_info = gsub(".*.\\| ", "", call_info)) %>% + rowwise() %>% + mutate( + t_alt_count = ifelse(grepl("-[0-9]+-", call_info), + # SV parsing + sum(as.numeric(str_split(call_info, "-|\\(")[[1]][1:2])), + # SNV parsing + as.numeric(gsub(" |\\/.*.", "", call_info)) + ), + t_total_count = ifelse(grepl("-[0-9]+-", call_info), + # SV parsing + as.numeric(str_split(call_info, "-|\\(")[[1]][3]), + # SNV parsing + as.numeric(gsub(".*.\\/|\\(.*.", "", call_info)) + ) + ) %>% + data.table() return(processed.tmp.table) } # main graphing function -------------------------------------------------- #' @export -plot_all_events = function( - master.ref,results.dir, - criteria = 'stringent' -){ +plot_all_events <- function( + master.ref, results.dir, + criteria = "stringent") { # # test input section ----------------------------------------------------------- # master.ref = fread('/juno/work/bergerm1/bergerlab/zhengy1/access_data_analysis/data/example_master_file.csv') # results.dir = paste0('/juno/work/bergerm1/MSK-ACCESS/ACCESS-Projects/test_access/access_data_analysis/output_042020/') # # criteria <- 'permissive' # criteria <- 'stringent' - # + # # graph by patient -------------------------------------------------------- - output.dir = paste0(results.dir,'/plots/') + output.dir <- paste0(results.dir, "/plots/") dir.create(output.dir) # for plotting consistency - status_id = c('Called' = 19, 'Not Called' = 4, 'Signed out' = 15, - 'Not Signed out' = 13, 'Not Covered' = 8, 'Genotyped' = 17) - + status_id <- c( + "Called" = 19, "Not Called" = 4, "Signed out" = 15, + "Not Signed out" = 13, "Not Covered" = 8, "Genotyped" = 17 + ) + # snv_sv_table = list.files(paste0(results.dir,'/results_',criteria,'_combined/'),full.names = T) - lapply(unique(master.ref$cmo_patient_id),function(x){ + lapply(unique(master.ref$cmo_patient_id), function(x) { # THIS PLOTS PLASMA SAMPLES ONLY # SNV - tmp.table = fread(list.files(paste0(results.dir,'/results_',criteria,'_combined/'),x,full.names = T))[ - call_confidence == 'High' | grepl('Protein Fusion: in frame',HGVSp_Short) - ] - tmp.sample.sheets <- fread(paste0(results.dir,'/',x,'/',x,'_sample_sheet.tsv'))[,.(Sample_Barcode,cmo_patient_id,Sample_Type)] - tmp.table = table_to_maf(tmp.table,tmp.sample.sheets) - tmp.table = data.table(process_maf_for_graph(tmp.table)) - + tmp.table <- fread(list.files(paste0(results.dir, "/results_", criteria, "_combined/"), x, full.names = T))[ + call_confidence == "High" | grepl("Protein Fusion: in frame", HGVSp_Short) + ] + tmp.sample.sheets <- fread(paste0(results.dir, "/", x, "/", x, "_sample_sheet.tsv"))[, .(Sample_Barcode, cmo_patient_id, Sample_Type)] + tmp.table <- table_to_maf(tmp.table, tmp.sample.sheets) + tmp.table <- data.table(process_maf_for_graph(tmp.table)) + # CNA - tmp.cna = do.call(rbind,lapply(master.ref[cmo_patient_id == x]$cmo_sample_id_plasma,function(y){ - fread(paste0(results.dir,'/CNA_final_call_set/',y,'_cna_final_call_set.txt')) + tmp.cna <- do.call(rbind, lapply(master.ref[cmo_patient_id == x]$cmo_sample_id_plasma, function(y) { + fread(paste0(results.dir, "/CNA_final_call_set/", y, "_cna_final_call_set.txt")) })) - + # transform sample IDs into times - if(all(!is.na(as.Date(master.ref[cmo_patient_id == x]$collection_date,'%m/%d/%y')))){ - transform.vector = structure(as.Date(master.ref[cmo_patient_id == x]$collection_date,'%m/%d/%y'), - names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma) - print(transform.vector) - }else{ - transform.vector = structure(as.character(master.ref[cmo_patient_id == x]$collection_date), - names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma) + if (all(!is.na(as.Date(master.ref[cmo_patient_id == x]$collection_date, "%m/%d/%y")))) { + transform.vector <- structure(as.Date(master.ref[cmo_patient_id == x]$collection_date, "%m/%d/%y"), + names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma + ) + print(transform.vector) + } else { + transform.vector <- structure(as.character(master.ref[cmo_patient_id == x]$collection_date), + names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma + ) print(transform.vector) } - tmp.table$Tumor_Sample_Barcode = transform.vector[tmp.table$Tumor_Sample_Barcode] - factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) + tmp.table$Tumor_Sample_Barcode <- transform.vector[tmp.table$Tumor_Sample_Barcode] + factor.levels <- sort(unique(tmp.table$Tumor_Sample_Barcode)) print(factor.levels) - #tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) - - if(nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)){ - print('skiping to the next') - if(nrow(tmp.cna)) stop(paste0('Need to make CNA only file for: ',x)) + # tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) + + if (nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)) { + print("skiping to the next") + if (nrow(tmp.cna)) stop(paste0("Need to make CNA only file for: ", x)) return() } - - colourCount = nrow(unique(tmp.table[,.(Hugo_Symbol,HGVSp_Short)])) - getPalette = colorRampPalette(brewer.pal(8, "Set2")) - SNV.SV.plot = ggplot(tmp.table) + - geom_line(aes(x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count/t_total_count)), - color = paste0(Hugo_Symbol,' ',ifelse(grepl('^p\\.',HGVSp_Short),HGVSp_Short,'')),group = paste0(Hugo_Symbol,'_',HGVSp_Short))) + - geom_point(aes(x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count/t_total_count)), - color = paste0(Hugo_Symbol,' ',ifelse(grepl('^p\\.',HGVSp_Short),HGVSp_Short,'')),shape = call_confidence),size = 1.5) + - labs(title=x,x='Time Point', y='log10(VAF)') + scale_x_date(date_labels = "%Y %b %d",breaks = sort(unique(tmp.table$Tumor_Sample_Barocde))) + - scale_shape_manual(values=status_id,name = 'Call Status') + scale_color_manual(values = getPalette(colourCount),name = 'Alteration') + - theme_minimal() + scale_y_log10() + #scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + - theme(panel.grid.major = element_blank(),legend.position="top",legend.box = "vertical", - axis.text.x = element_text(angle=45, hjust=1, face = 'bold')) + + colourCount <- nrow(unique(tmp.table[, .(Hugo_Symbol, HGVSp_Short)])) + getPalette <- colorRampPalette(brewer.pal(8, "Set2")) + SNV.SV.plot <- ggplot(tmp.table) + + geom_line(aes( + x = factor(Tumor_Sample_Barcode), y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), group = paste0(Hugo_Symbol, "_", HGVSp_Short) + )) + + geom_point(aes( + x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), shape = call_confidence + ), size = 1.5) + + labs(title = x, x = "Time Point", y = "log10(VAF)") + + scale_x_date(date_labels = "%Y %b %d", breaks = "1 month") + + scale_shape_manual(values = status_id, name = "Call Status") + + scale_color_manual(values = getPalette(colourCount), name = "Alteration") + + theme_minimal() + + scale_y_log10() + # scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + + theme( + panel.grid.major = element_blank(), legend.position = "top", legend.box = "vertical", + axis.text.x = element_text(angle = 45, hjust = 1, face = "bold") + ) print(SNV.SV.plot) - - if(nrow(tmp.cna) > 0){ - tmp.cna = tmp.cna %>% mutate(Tumor_Sample_Barcode = factor(Tumor_Sample_Barcode,unique(tmp.sample.sheets[Sample_Type == 'duplex']$Sample_Barcode))) %>% + + if (nrow(tmp.cna) > 0) { + tmp.cna <- tmp.cna %>% + mutate(Tumor_Sample_Barcode = factor(Tumor_Sample_Barcode, unique(tmp.sample.sheets[Sample_Type == "duplex"]$Sample_Barcode))) %>% # expand table on all empty samples without any calls - data.table() %>% dcast.data.table(Hugo_Symbol + CNA ~ Tumor_Sample_Barcode,drop = c(TRUE, FALSE),fill = 0,value.var = 'fc') %>% - melt.data.table(id.vars = c('Hugo_Symbol','CNA'),variable.name = 'Tumor_Sample_Barcode',value.name = 'fc') %>% data.table() - tmp.table$Tumor_Sample_Barcode = transform.vector[tmp.table$Tumor_Sample_Barcode] - #factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) - #tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) - - colourCount = nrow(unique(tmp.cna[,.(Hugo_Symbol,CNA)])) - getPalette = colorRampPalette(brewer.pal(8, "Set2")) - CNA.plot = ggplot(tmp.cna) + - geom_bar(aes(x = Tumor_Sample_Barcode,y = abs(fc),fill = paste0(Hugo_Symbol,'_',CNA)),position="dodge", stat="identity") + - labs(x='Time Point', y='Absolute fc') + scale_x_date(date_labels = "%Y %b %d",breaks = sort(unique(tmp.table$Tumor_Sample_Barocde))) + - #scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + - scale_fill_manual(values = getPalette(colourCount),name = 'Alteration') + - theme_minimal() + theme(panel.grid.major = element_blank(),legend.position="bottom",axis.text.x = element_text(angle=45, hjust=1,face = 'bold')) + data.table() %>% + dcast.data.table(Hugo_Symbol + CNA ~ Tumor_Sample_Barcode, drop = c(TRUE, FALSE), fill = 0, value.var = "fc") %>% + melt.data.table(id.vars = c("Hugo_Symbol", "CNA"), variable.name = "Tumor_Sample_Barcode", value.name = "fc") %>% + data.table() + tmp.table$Tumor_Sample_Barcode <- transform.vector[tmp.table$Tumor_Sample_Barcode] + # factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) + # tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) + + colourCount <- nrow(unique(tmp.cna[, .(Hugo_Symbol, CNA)])) + getPalette <- colorRampPalette(brewer.pal(8, "Set2")) + CNA.plot <- ggplot(tmp.cna) + + geom_bar(aes(x = Tumor_Sample_Barcode, y = abs(fc), fill = paste0(Hugo_Symbol, "_", CNA)), position = "dodge", stat = "identity") + + labs(x = "Time Point", y = "Absolute fc") + + scale_x_date(date_labels = "%Y %b %d", breaks = "1 month") + + # scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + + scale_fill_manual(values = getPalette(colourCount), name = "Alteration") + + theme_minimal() + + theme(panel.grid.major = element_blank(), legend.position = "bottom", axis.text.x = element_text(angle = 45, hjust = 1, face = "bold")) print(CNA.plot) - - pdf(paste0(output.dir,'/',x,'_all_events.pdf'),width = 10,height = 7) - print(ggarrange(SNV.SV.plot,CNA.plot,ncol = 1,heights = c(2,1))) + + pdf(paste0(output.dir, "/", x, "_all_events.pdf"), width = 10, height = 7) + print(ggarrange(SNV.SV.plot, CNA.plot, ncol = 1, heights = c(2, 1))) dev.off() - }else{ - pdf(paste0(output.dir,'/',x,'_all_events.pdf'),width = 10,height = 7) + } else { + pdf(paste0(output.dir, "/", x, "_all_events.pdf"), width = 10, height = 7) print(SNV.SV.plot) dev.off() } @@ -245,22 +289,22 @@ suppressPackageStartupMessages({ }) if (!interactive()) { - - parser=ArgumentParser() - parser$add_argument('-m', '--masterref', type='character', help='File path to master reference file') - parser$add_argument('-o', '--resultsdir', type='character', help='Output directory') - parser$add_argument('-c', '--criteria', type='character', default = 'stringent', - help='Calling criteria [default]') - args=parser$parse_args() - - master.ref = args$masterref - results.dir = args$resultsdir - criteria = args$criteria - - if(!criteria %in% c('stringent','permissive')){ - stop('Criteria argument should be either stringent or permissive') + parser <- ArgumentParser() + parser$add_argument("-m", "--masterref", type = "character", help = "File path to master reference file") + parser$add_argument("-o", "--resultsdir", type = "character", help = "Output directory") + parser$add_argument("-c", "--criteria", + type = "character", default = "stringent", + help = "Calling criteria [default]" + ) + args <- parser$parse_args() + + master.ref <- args$masterref + results.dir <- args$resultsdir + criteria <- args$criteria + + if (!criteria %in% c("stringent", "permissive")) { + stop("Criteria argument should be either stringent or permissive") } - - suppressWarnings(plot_all_events(fread(master.ref),results.dir,criteria)) - -} + + suppressWarnings(plot_all_events(fread(master.ref), results.dir, criteria)) +} \ No newline at end of file From ce268d002cb880ff5214840106da23e4f78d575f Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 22:32:43 -0400 Subject: [PATCH 13/42] Update plot_all_events.R --- R/plot_all_events.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 9ac3993..c313aeb 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -222,7 +222,7 @@ plot_all_events <- function( getPalette <- colorRampPalette(brewer.pal(8, "Set2")) SNV.SV.plot <- ggplot(tmp.table) + geom_line(aes( - x = factor(Tumor_Sample_Barcode), y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), group = paste0(Hugo_Symbol, "_", HGVSp_Short) )) + geom_point(aes( From 4a212229bfb81b4c2cf91c6eb0fa997f41815e9d Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 22:36:20 -0400 Subject: [PATCH 14/42] Update plot_all_events.R --- R/plot_all_events.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index c313aeb..2a5b98c 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -222,19 +222,20 @@ plot_all_events <- function( getPalette <- colorRampPalette(brewer.pal(8, "Set2")) SNV.SV.plot <- ggplot(tmp.table) + geom_line(aes( - x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + x = factor(Tumor_Sample_Barcode), y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), group = paste0(Hugo_Symbol, "_", HGVSp_Short) )) + geom_point(aes( - x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + x = factor(Tumor_Sample_Barcode), y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), shape = call_confidence ), size = 1.5) + labs(title = x, x = "Time Point", y = "log10(VAF)") + - scale_x_date(date_labels = "%Y %b %d", breaks = "1 month") + + #scale_x_date(date_labels = "%Y %b %d", breaks = "1 month") + scale_shape_manual(values = status_id, name = "Call Status") + scale_color_manual(values = getPalette(colourCount), name = "Alteration") + theme_minimal() + - scale_y_log10() + # scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + + scale_y_log10() + + scale_x_continuos(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + theme( panel.grid.major = element_blank(), legend.position = "top", legend.box = "vertical", axis.text.x = element_text(angle = 45, hjust = 1, face = "bold") From fe0de012bf78e264749c1ae52a774ef688e99e42 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 22:37:15 -0400 Subject: [PATCH 15/42] Update plot_all_events.R --- R/plot_all_events.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 2a5b98c..7c9d5f5 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -235,7 +235,7 @@ plot_all_events <- function( scale_color_manual(values = getPalette(colourCount), name = "Alteration") + theme_minimal() + scale_y_log10() + - scale_x_continuos(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + + scale_x_continuous(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + theme( panel.grid.major = element_blank(), legend.position = "top", legend.box = "vertical", axis.text.x = element_text(angle = 45, hjust = 1, face = "bold") From 79b720e3f7694e9bc740f1996dc9cd00f01b2a45 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 22:37:53 -0400 Subject: [PATCH 16/42] Update plot_all_events.R --- R/plot_all_events.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 7c9d5f5..a9eda02 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -235,7 +235,7 @@ plot_all_events <- function( scale_color_manual(values = getPalette(colourCount), name = "Alteration") + theme_minimal() + scale_y_log10() + - scale_x_continuous(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + + scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + theme( panel.grid.major = element_blank(), legend.position = "top", legend.box = "vertical", axis.text.x = element_text(angle = 45, hjust = 1, face = "bold") From 8acc71714c8a57e5febe6b986b501b4400e8faee Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 22:45:55 -0400 Subject: [PATCH 17/42] Update plot_all_events.R --- R/plot_all_events.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index a9eda02..e91bc6b 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -211,6 +211,8 @@ plot_all_events <- function( factor.levels <- sort(unique(tmp.table$Tumor_Sample_Barcode)) print(factor.levels) # tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) + tmp.table$Tumor_Sample_Barcode = as.character(tmp.table$Tumor_Sample_Barcode,format = "%Y-%b-%d")) + if (nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)) { print("skiping to the next") @@ -222,11 +224,11 @@ plot_all_events <- function( getPalette <- colorRampPalette(brewer.pal(8, "Set2")) SNV.SV.plot <- ggplot(tmp.table) + geom_line(aes( - x = factor(Tumor_Sample_Barcode), y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), group = paste0(Hugo_Symbol, "_", HGVSp_Short) )) + geom_point(aes( - x = factor(Tumor_Sample_Barcode), y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), shape = call_confidence ), size = 1.5) + labs(title = x, x = "Time Point", y = "log10(VAF)") + From 88765506badbfaace77473c965f4c0ebffb55c40 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 22:46:34 -0400 Subject: [PATCH 18/42] Update plot_all_events.R --- R/plot_all_events.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index e91bc6b..e3eab15 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -211,7 +211,7 @@ plot_all_events <- function( factor.levels <- sort(unique(tmp.table$Tumor_Sample_Barcode)) print(factor.levels) # tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) - tmp.table$Tumor_Sample_Barcode = as.character(tmp.table$Tumor_Sample_Barcode,format = "%Y-%b-%d")) + tmp.table$Tumor_Sample_Barcode = as.character(tmp.table$Tumor_Sample_Barcode,format = "%Y-%b-%d") if (nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)) { From 33e16a974ae20ff05f8557da1f8090f1d7e2339b Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 22:51:04 -0400 Subject: [PATCH 19/42] Update plot_all_events.R --- R/plot_all_events.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index e3eab15..7db9a83 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -232,12 +232,12 @@ plot_all_events <- function( color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), shape = call_confidence ), size = 1.5) + labs(title = x, x = "Time Point", y = "log10(VAF)") + + scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + #scale_x_date(date_labels = "%Y %b %d", breaks = "1 month") + scale_shape_manual(values = status_id, name = "Call Status") + scale_color_manual(values = getPalette(colourCount), name = "Alteration") + theme_minimal() + - scale_y_log10() + - scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + + scale_y_log10() + theme( panel.grid.major = element_blank(), legend.position = "top", legend.box = "vertical", axis.text.x = element_text(angle = 45, hjust = 1, face = "bold") @@ -255,14 +255,14 @@ plot_all_events <- function( tmp.table$Tumor_Sample_Barcode <- transform.vector[tmp.table$Tumor_Sample_Barcode] # factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) # tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) - + tmp.table$Tumor_Sample_Barcode = as.character(tmp.table$Tumor_Sample_Barcode, format = "%Y-%b-%d") colourCount <- nrow(unique(tmp.cna[, .(Hugo_Symbol, CNA)])) getPalette <- colorRampPalette(brewer.pal(8, "Set2")) CNA.plot <- ggplot(tmp.cna) + geom_bar(aes(x = Tumor_Sample_Barcode, y = abs(fc), fill = paste0(Hugo_Symbol, "_", CNA)), position = "dodge", stat = "identity") + labs(x = "Time Point", y = "Absolute fc") + - scale_x_date(date_labels = "%Y %b %d", breaks = "1 month") + - # scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + + scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + + #scale_x_date(date_labels = "%Y %b %d", breaks = "1 month") + scale_fill_manual(values = getPalette(colourCount), name = "Alteration") + theme_minimal() + theme(panel.grid.major = element_blank(), legend.position = "bottom", axis.text.x = element_text(angle = 45, hjust = 1, face = "bold")) From f0ae09a5fe1ff29090982440806f3b0cb6414d2d Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 23:04:30 -0400 Subject: [PATCH 20/42] Update plot_all_events.R --- R/plot_all_events.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 7db9a83..e200148 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -210,8 +210,8 @@ plot_all_events <- function( tmp.table$Tumor_Sample_Barcode <- transform.vector[tmp.table$Tumor_Sample_Barcode] factor.levels <- sort(unique(tmp.table$Tumor_Sample_Barcode)) print(factor.levels) - # tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) - tmp.table$Tumor_Sample_Barcode = as.character(tmp.table$Tumor_Sample_Barcode,format = "%Y-%b-%d") + tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) + # tmp.table$Tumor_Sample_Barcode = as.character(tmp.table$Tumor_Sample_Barcode,format = "%Y-%b-%d") if (nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)) { @@ -224,11 +224,11 @@ plot_all_events <- function( getPalette <- colorRampPalette(brewer.pal(8, "Set2")) SNV.SV.plot <- ggplot(tmp.table) + geom_line(aes( - x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + x = factor(Tumor_Sample_Barcode), y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), group = paste0(Hugo_Symbol, "_", HGVSp_Short) )) + geom_point(aes( - x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + x = factor(Tumor_Sample_Barcode), y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), shape = call_confidence ), size = 1.5) + labs(title = x, x = "Time Point", y = "log10(VAF)") + @@ -254,12 +254,12 @@ plot_all_events <- function( data.table() tmp.table$Tumor_Sample_Barcode <- transform.vector[tmp.table$Tumor_Sample_Barcode] # factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) - # tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) - tmp.table$Tumor_Sample_Barcode = as.character(tmp.table$Tumor_Sample_Barcode, format = "%Y-%b-%d") + tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) + #tmp.table$Tumor_Sample_Barcode = as.character(tmp.table$Tumor_Sample_Barcode, format = "%Y-%b-%d") colourCount <- nrow(unique(tmp.cna[, .(Hugo_Symbol, CNA)])) getPalette <- colorRampPalette(brewer.pal(8, "Set2")) CNA.plot <- ggplot(tmp.cna) + - geom_bar(aes(x = Tumor_Sample_Barcode, y = abs(fc), fill = paste0(Hugo_Symbol, "_", CNA)), position = "dodge", stat = "identity") + + geom_bar(aes(x = factor(Tumor_Sample_Barcode), y = abs(fc), fill = paste0(Hugo_Symbol, "_", CNA)), position = "dodge", stat = "identity") + labs(x = "Time Point", y = "Absolute fc") + scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + #scale_x_date(date_labels = "%Y %b %d", breaks = "1 month") + From 958c800a1cc4620be612ad5059574aaa0812e269 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 23:09:53 -0400 Subject: [PATCH 21/42] Update plot_all_events.R --- R/plot_all_events.R | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index e200148..e3143e9 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -210,8 +210,8 @@ plot_all_events <- function( tmp.table$Tumor_Sample_Barcode <- transform.vector[tmp.table$Tumor_Sample_Barcode] factor.levels <- sort(unique(tmp.table$Tumor_Sample_Barcode)) print(factor.levels) - tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) - # tmp.table$Tumor_Sample_Barcode = as.character(tmp.table$Tumor_Sample_Barcode,format = "%Y-%b-%d") + #tmp.table$Tumor_Sample_Barcode <- factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) + tmp.table$dates <- as.character(tmp.table$Tumor_Sample_Barcode,format = "%Y-%b-%d") if (nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)) { @@ -224,15 +224,16 @@ plot_all_events <- function( getPalette <- colorRampPalette(brewer.pal(8, "Set2")) SNV.SV.plot <- ggplot(tmp.table) + geom_line(aes( - x = factor(Tumor_Sample_Barcode), y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + x = dates, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), group = paste0(Hugo_Symbol, "_", HGVSp_Short) )) + geom_point(aes( - x = factor(Tumor_Sample_Barcode), y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + x = dates, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), shape = call_confidence ), size = 1.5) + labs(title = x, x = "Time Point", y = "log10(VAF)") + - scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + + #scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + + scale_x_discrete(breaks = unique(tmp.table$Tumor_Sample_Barocde), labels = unique(tmp.table$Tumor_Sample_Barocde)) + #scale_x_date(date_labels = "%Y %b %d", breaks = "1 month") + scale_shape_manual(values = status_id, name = "Call Status") + scale_color_manual(values = getPalette(colourCount), name = "Alteration") + @@ -259,9 +260,10 @@ plot_all_events <- function( colourCount <- nrow(unique(tmp.cna[, .(Hugo_Symbol, CNA)])) getPalette <- colorRampPalette(brewer.pal(8, "Set2")) CNA.plot <- ggplot(tmp.cna) + - geom_bar(aes(x = factor(Tumor_Sample_Barcode), y = abs(fc), fill = paste0(Hugo_Symbol, "_", CNA)), position = "dodge", stat = "identity") + + geom_bar(aes(x = dates, y = abs(fc), fill = paste0(Hugo_Symbol, "_", CNA)), position = "dodge", stat = "identity") + labs(x = "Time Point", y = "Absolute fc") + - scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + + #scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + + scale_x_discrete(breaks = unique(tmp.table$Tumor_Sample_Barocde), labels = unique(tmp.table$Tumor_Sample_Barocde)) + #scale_x_date(date_labels = "%Y %b %d", breaks = "1 month") + scale_fill_manual(values = getPalette(colourCount), name = "Alteration") + theme_minimal() + From 31f83bd2ef76e19eaa5dfa34edc78e66128ec26e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 23:11:16 -0400 Subject: [PATCH 22/42] Update plot_all_events.R --- R/plot_all_events.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index e3143e9..9d9feb4 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -255,8 +255,8 @@ plot_all_events <- function( data.table() tmp.table$Tumor_Sample_Barcode <- transform.vector[tmp.table$Tumor_Sample_Barcode] # factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) - tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) - #tmp.table$Tumor_Sample_Barcode = as.character(tmp.table$Tumor_Sample_Barcode, format = "%Y-%b-%d") + # tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) + tmp.table$dates = as.character(tmp.table$Tumor_Sample_Barcode, format = "%Y-%b-%d") colourCount <- nrow(unique(tmp.cna[, .(Hugo_Symbol, CNA)])) getPalette <- colorRampPalette(brewer.pal(8, "Set2")) CNA.plot <- ggplot(tmp.cna) + From 61ef063f7a49fae51796e88e7d8b640e03ef2699 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 23:18:56 -0400 Subject: [PATCH 23/42] Update plot_all_events.R --- R/plot_all_events.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 9d9feb4..0352103 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -211,7 +211,7 @@ plot_all_events <- function( factor.levels <- sort(unique(tmp.table$Tumor_Sample_Barcode)) print(factor.levels) #tmp.table$Tumor_Sample_Barcode <- factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) - tmp.table$dates <- as.character(tmp.table$Tumor_Sample_Barcode,format = "%Y-%b-%d") + tmp.table$dates <- as.character(tmp.table$Tumor_Sample_Barcode,format = "%Y-%m-%d") if (nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)) { @@ -228,12 +228,12 @@ plot_all_events <- function( color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), group = paste0(Hugo_Symbol, "_", HGVSp_Short) )) + geom_point(aes( - x = dates, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + x = order(dates), y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), shape = call_confidence ), size = 1.5) + labs(title = x, x = "Time Point", y = "log10(VAF)") + #scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + - scale_x_discrete(breaks = unique(tmp.table$Tumor_Sample_Barocde), labels = unique(tmp.table$Tumor_Sample_Barocde)) + + scale_x_discrete(breaks = order(unique(tmp.table$Tumor_Sample_Barocde)), labels = order(unique(tmp.table$Tumor_Sample_Barocde))) + #scale_x_date(date_labels = "%Y %b %d", breaks = "1 month") + scale_shape_manual(values = status_id, name = "Call Status") + scale_color_manual(values = getPalette(colourCount), name = "Alteration") + @@ -253,17 +253,17 @@ plot_all_events <- function( dcast.data.table(Hugo_Symbol + CNA ~ Tumor_Sample_Barcode, drop = c(TRUE, FALSE), fill = 0, value.var = "fc") %>% melt.data.table(id.vars = c("Hugo_Symbol", "CNA"), variable.name = "Tumor_Sample_Barcode", value.name = "fc") %>% data.table() - tmp.table$Tumor_Sample_Barcode <- transform.vector[tmp.table$Tumor_Sample_Barcode] + tmp.cna$Tumor_Sample_Barcode <- transform.vector[tmp.table$Tumor_Sample_Barcode] # factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) # tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) - tmp.table$dates = as.character(tmp.table$Tumor_Sample_Barcode, format = "%Y-%b-%d") + tmp.cna$dates <- as.character(tmp.cna$Tumor_Sample_Barcode, format = "%Y-%b-%d") colourCount <- nrow(unique(tmp.cna[, .(Hugo_Symbol, CNA)])) getPalette <- colorRampPalette(brewer.pal(8, "Set2")) CNA.plot <- ggplot(tmp.cna) + - geom_bar(aes(x = dates, y = abs(fc), fill = paste0(Hugo_Symbol, "_", CNA)), position = "dodge", stat = "identity") + + geom_bar(aes(x = order(dates), y = abs(fc), fill = paste0(Hugo_Symbol, "_", CNA)), position = "dodge", stat = "identity") + labs(x = "Time Point", y = "Absolute fc") + #scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + - scale_x_discrete(breaks = unique(tmp.table$Tumor_Sample_Barocde), labels = unique(tmp.table$Tumor_Sample_Barocde)) + + scale_x_discrete(breaks = order(unique(tmp.cna$Tumor_Sample_Barocde)), labels = order(unique(tmp.cna$Tumor_Sample_Barocde))) + #scale_x_date(date_labels = "%Y %b %d", breaks = "1 month") + scale_fill_manual(values = getPalette(colourCount), name = "Alteration") + theme_minimal() + From 49c9dac7de364f787a56f92457cdc333a1721053 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 23:39:37 -0400 Subject: [PATCH 24/42] Update plot_all_events.R --- R/plot_all_events.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 0352103..638f13d 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -208,10 +208,10 @@ plot_all_events <- function( print(transform.vector) } tmp.table$Tumor_Sample_Barcode <- transform.vector[tmp.table$Tumor_Sample_Barcode] - factor.levels <- sort(unique(tmp.table$Tumor_Sample_Barcode)) - print(factor.levels) + #factor.levels <- sort(unique(tmp.table$Tumor_Sample_Barcode)) + #print(factor.levels) #tmp.table$Tumor_Sample_Barcode <- factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) - tmp.table$dates <- as.character(tmp.table$Tumor_Sample_Barcode,format = "%Y-%m-%d") + #tmp.table$dates <- as.character(tmp.table$Tumor_Sample_Barcode,format = "%Y-%m-%d") if (nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)) { @@ -224,16 +224,16 @@ plot_all_events <- function( getPalette <- colorRampPalette(brewer.pal(8, "Set2")) SNV.SV.plot <- ggplot(tmp.table) + geom_line(aes( - x = dates, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), group = paste0(Hugo_Symbol, "_", HGVSp_Short) )) + geom_point(aes( - x = order(dates), y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), shape = call_confidence ), size = 1.5) + labs(title = x, x = "Time Point", y = "log10(VAF)") + #scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + - scale_x_discrete(breaks = order(unique(tmp.table$Tumor_Sample_Barocde)), labels = order(unique(tmp.table$Tumor_Sample_Barocde))) + + #scale_x_discrete(breaks = order(unique(tmp.table$Tumor_Sample_Barocde)), labels = order(unique(tmp.table$Tumor_Sample_Barocde))) + #scale_x_date(date_labels = "%Y %b %d", breaks = "1 month") + scale_shape_manual(values = status_id, name = "Call Status") + scale_color_manual(values = getPalette(colourCount), name = "Alteration") + @@ -256,14 +256,14 @@ plot_all_events <- function( tmp.cna$Tumor_Sample_Barcode <- transform.vector[tmp.table$Tumor_Sample_Barcode] # factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) # tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) - tmp.cna$dates <- as.character(tmp.cna$Tumor_Sample_Barcode, format = "%Y-%b-%d") + # tmp.cna$dates <- as.character(tmp.cna$Tumor_Sample_Barcode, format = "%Y-%b-%d") colourCount <- nrow(unique(tmp.cna[, .(Hugo_Symbol, CNA)])) getPalette <- colorRampPalette(brewer.pal(8, "Set2")) CNA.plot <- ggplot(tmp.cna) + - geom_bar(aes(x = order(dates), y = abs(fc), fill = paste0(Hugo_Symbol, "_", CNA)), position = "dodge", stat = "identity") + + geom_bar(aes(x = Tumor_Sample_Barocde, y = abs(fc), fill = paste0(Hugo_Symbol, "_", CNA)), position = "dodge", stat = "identity") + labs(x = "Time Point", y = "Absolute fc") + - #scale_x_discrete(breaks = sort(unique(tmp.table$Tumor_Sample_Barocde)),labels = sort(unique(tmp.table$Tumor_Sample_Barocde))) + - scale_x_discrete(breaks = order(unique(tmp.cna$Tumor_Sample_Barocde)), labels = order(unique(tmp.cna$Tumor_Sample_Barocde))) + + #scale_x_discrete(breaks = sort(unique(tmp.table$,labels = sort(unique(tmp.table$Tumor_Sample_BaroTumor_Sample_Barocde))cde))) + + #scale_x_discrete(breaks = order(unique(tmp.cna$Tumor_Sample_Barocde)), labels = order(unique(tmp.cna$Tumor_Sample_Barocde))) + #scale_x_date(date_labels = "%Y %b %d", breaks = "1 month") + scale_fill_manual(values = getPalette(colourCount), name = "Alteration") + theme_minimal() + From 44a26a0dcff4be6397b3cf0734ac6b0f808de98a Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 23:40:34 -0400 Subject: [PATCH 25/42] Update plot_all_events.R --- R/plot_all_events.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 638f13d..a7fa25b 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -260,7 +260,7 @@ plot_all_events <- function( colourCount <- nrow(unique(tmp.cna[, .(Hugo_Symbol, CNA)])) getPalette <- colorRampPalette(brewer.pal(8, "Set2")) CNA.plot <- ggplot(tmp.cna) + - geom_bar(aes(x = Tumor_Sample_Barocde, y = abs(fc), fill = paste0(Hugo_Symbol, "_", CNA)), position = "dodge", stat = "identity") + + geom_bar(aes(x = Tumor_Sample_Barocode, y = abs(fc), fill = paste0(Hugo_Symbol, "_", CNA)), position = "dodge", stat = "identity") + labs(x = "Time Point", y = "Absolute fc") + #scale_x_discrete(breaks = sort(unique(tmp.table$,labels = sort(unique(tmp.table$Tumor_Sample_BaroTumor_Sample_Barocde))cde))) + #scale_x_discrete(breaks = order(unique(tmp.cna$Tumor_Sample_Barocde)), labels = order(unique(tmp.cna$Tumor_Sample_Barocde))) + From 23ca85999e894d27252293bee33f7fad2d926463 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 23:41:40 -0400 Subject: [PATCH 26/42] Update plot_all_events.R --- R/plot_all_events.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index a7fa25b..db76663 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -260,7 +260,7 @@ plot_all_events <- function( colourCount <- nrow(unique(tmp.cna[, .(Hugo_Symbol, CNA)])) getPalette <- colorRampPalette(brewer.pal(8, "Set2")) CNA.plot <- ggplot(tmp.cna) + - geom_bar(aes(x = Tumor_Sample_Barocode, y = abs(fc), fill = paste0(Hugo_Symbol, "_", CNA)), position = "dodge", stat = "identity") + + geom_bar(aes(x = Tumor_Sample_Barcode, y = abs(fc), fill = paste0(Hugo_Symbol, "_", CNA)), position = "dodge", stat = "identity") + labs(x = "Time Point", y = "Absolute fc") + #scale_x_discrete(breaks = sort(unique(tmp.table$,labels = sort(unique(tmp.table$Tumor_Sample_BaroTumor_Sample_Barocde))cde))) + #scale_x_discrete(breaks = order(unique(tmp.cna$Tumor_Sample_Barocde)), labels = order(unique(tmp.cna$Tumor_Sample_Barocde))) + From eab5433f1fde715d5771cb0edeabbc005f969a41 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 7 Jul 2020 23:44:19 -0400 Subject: [PATCH 27/42] Update plot_all_events.R --- R/plot_all_events.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index db76663..12597bd 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -253,7 +253,7 @@ plot_all_events <- function( dcast.data.table(Hugo_Symbol + CNA ~ Tumor_Sample_Barcode, drop = c(TRUE, FALSE), fill = 0, value.var = "fc") %>% melt.data.table(id.vars = c("Hugo_Symbol", "CNA"), variable.name = "Tumor_Sample_Barcode", value.name = "fc") %>% data.table() - tmp.cna$Tumor_Sample_Barcode <- transform.vector[tmp.table$Tumor_Sample_Barcode] + tmp.table$Tumor_Sample_Barcode <- transform.vector[tmp.table$Tumor_Sample_Barcode] # factor.levels = sort(unique(tmp.table$Tumor_Sample_Barcode)) # tmp.table$Tumor_Sample_Barcode = factor(as.character(tmp.table$Tumor_Sample_Barcode),levels = factor.levels) # tmp.cna$dates <- as.character(tmp.cna$Tumor_Sample_Barcode, format = "%Y-%b-%d") From a256cf658090595150fe9264f0543d6a4e20d042 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 10 Jul 2020 10:14:10 -0400 Subject: [PATCH 28/42] Update filter_calls.R --- R/filter_calls.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/filter_calls.R b/R/filter_calls.R index 539b11a..0991cc7 100644 --- a/R/filter_calls.R +++ b/R/filter_calls.R @@ -39,7 +39,7 @@ filter_calls = function( # for each patient produce the correct results ---------------------------- # x <- unique(master.ref$cmo_patient_id)[1] - all.fillout.dim <- lapply(unique(master.ref$cmo_patient_id),function(x){ + all.fillout.dim <- lapply(unique(master.ref[cmo_patient_id == 'C-DFJ7RT']$cmo_patient_id),function(x){ print(paste0('Processing patient ',x)) # Inputs and sanity checks ------------------------------------------------ fillouts.filenames <- list.files(paste0(results.dir,'/',x,'/'),'ORG-STD_genotyped.maf|ORG-SIMPLEX-DUPLEX_genotyped.maf',full.names = T) @@ -74,6 +74,7 @@ filter_calls = function( HGVSp_Short,Reference_Allele,Tumor_Seq_Allele2,t_var_freq,ExAC_AF) %>% data.table() return(maf.file) })) + print(fillouts.dt) # merging and melting ----------------------------------------------------- hotspot.maf <- fread(paste0(results.dir,'/',x,'/',x,'_all_unique_calls_hotspots.maf')) %>% rowwise() %>% transmute(Hugo_Symbol,Chromosome = as.character(Chromosome),Start_Position,End_Position,Variant_Classification, From 2e769ac8e8cea25bff00937b2f4091e229a6a2e6 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 10 Jul 2020 10:20:14 -0400 Subject: [PATCH 29/42] Update filter_calls.R --- R/filter_calls.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/filter_calls.R b/R/filter_calls.R index 0991cc7..2252451 100644 --- a/R/filter_calls.R +++ b/R/filter_calls.R @@ -63,6 +63,7 @@ filter_calls = function( melt.data.table(id.vars = melt.id.vars,variable.name = 'variable',value.name = 'value') %>% mutate(variable = gsub('fragment','_',variable)) %>% separate(variable,c('variable','Sample_Type'),sep = '___') %>% mutate(Tumor_Sample_Barcode = paste0(sample.name,'___',Sample_Type)) %>% select(-Sample_Type) %>% data.table() %>% + unique() %>% dcast.data.table(as.formula(paste0(paste0(melt.id.vars,collapse = ' + '),' ~ variable')),value.var = 'value') -> maf.file }else{ maf.file <- maf.file %>% mutate(Tumor_Sample_Barcode = paste0(sample.name,'___',sample.type)) %>% From 1ecd8dcaf4e0ce3dbe4033e2773e227d80390659 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 10 Jul 2020 10:24:15 -0400 Subject: [PATCH 30/42] Update filter_calls.R --- R/filter_calls.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/filter_calls.R b/R/filter_calls.R index 2252451..ac1e8fb 100644 --- a/R/filter_calls.R +++ b/R/filter_calls.R @@ -74,8 +74,7 @@ filter_calls = function( transmute(Hugo_Symbol,Tumor_Sample_Barcode,Chromosome = as.character(Chromosome),Start_Position,End_Position,Variant_Classification, HGVSp_Short,Reference_Allele,Tumor_Seq_Allele2,t_var_freq,ExAC_AF) %>% data.table() return(maf.file) - })) - print(fillouts.dt) + })) %>% unique() %>% data.table() # merging and melting ----------------------------------------------------- hotspot.maf <- fread(paste0(results.dir,'/',x,'/',x,'_all_unique_calls_hotspots.maf')) %>% rowwise() %>% transmute(Hugo_Symbol,Chromosome = as.character(Chromosome),Start_Position,End_Position,Variant_Classification, From 6545f171a6b59bc757d072cb7d7b1864c5106d09 Mon Sep 17 00:00:00 2001 From: Youyun Zheng Date: Mon, 20 Jul 2020 19:30:03 -0400 Subject: [PATCH 31/42] Update plot_all_events.R Expecting numeric format for collection date, which then gets sorted and discretized in ggplot --- R/plot_all_events.R | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 759fe3c..6026966 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -167,17 +167,11 @@ plot_all_events = function( fread(paste0(results.dir,'/CNA_final_call_set/',y,'_cna_final_call_set.txt')) })) - # transform sample IDs into times - if(all(!is.na(as.Date(master.ref[cmo_patient_id == x]$collection_date,'%m/%d/%y')))){ - transform.vector = structure(as.Date(master.ref[cmo_patient_id == x]$collection_date,'%m/%d/%y'), - names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma) - print(transform.vector) - }else{ - transform.vector = structure(as.character(master.ref[cmo_patient_id == x]$collection_date), - names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma) - print(transform.vector) - } - tmp.table$Tumor_Sample_Barcode = transform.vector[tmp.table$Tumor_Sample_Barcode] + + transform.vector = structure(as.numeric(master.ref[cmo_patient_id == x]$collection_date), + names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma) + print(transform.vector) + tmp.table$Tumor_Sample_Barcode = factor(transform.vector[tmp.table$Tumor_Sample_Barcode], levels = sort(transform.vector)) if(nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)){ print('skiping to the next') @@ -192,7 +186,7 @@ plot_all_events = function( color = paste0(Hugo_Symbol,' ',ifelse(grepl('^p\\.',HGVSp_Short),HGVSp_Short,'')),group = paste0(Hugo_Symbol,'_',HGVSp_Short))) + geom_point(aes(x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count/t_total_count)), color = paste0(Hugo_Symbol,' ',ifelse(grepl('^p\\.',HGVSp_Short),HGVSp_Short,'')),shape = call_confidence),size = 1.5) + - labs(title=x,x='Time Point', y='VAF') + #scale_x_date(date_labels = "%Y %b %d") + + labs(title=x,x='Time Point', y='VAF') + scale_shape_manual(values=status_id,name = 'Call Status') + scale_color_manual(values = getPalette(colourCount),name = 'Alteration') + theme_minimal() + scale_y_log10() + theme(panel.grid.major = element_blank(),legend.position="top",legend.box = "vertical", @@ -210,7 +204,7 @@ plot_all_events = function( getPalette = colorRampPalette(brewer.pal(8, "Set2")) CNA.plot = ggplot(tmp.cna) + geom_bar(aes(x = Tumor_Sample_Barcode,y = abs(fc),fill = paste0(Hugo_Symbol,'_',CNA)),position="dodge", stat="identity") + - labs(x='Time Point', y='Absolute fc') + #scale_x_date(date_labels = "%Y %b %d") + + labs(x='Time Point', y='Absolute fc') + scale_fill_manual(values = getPalette(colourCount),name = 'Alteration') + theme_minimal() + theme(panel.grid.major = element_blank(),legend.position="bottom",axis.text.x = element_text(angle=45, hjust=1,face = 'bold')) print(CNA.plot) From 7ba6b1e5c7e6f152465eb6027e8cbca751151cfa Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 21 Jul 2020 11:32:03 -0400 Subject: [PATCH 32/42] Update plot_all_events.R --- R/plot_all_events.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 97a5d01..bbb9d51 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -200,7 +200,7 @@ plot_all_events <- function( names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma) print(transform.vector) tmp.table$Tumor_Sample_Barcode = factor(transform.vector[tmp.table$Tumor_Sample_Barcode], levels = sort(transform.vector)) - + print(tmp.table) if(nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)){ print('skiping to the next') if(nrow(tmp.cna)) stop(paste0('Need to make CNA only file for: ',x)) From 7a170858dfd2502f7f179ad4b44d2a69586f5085 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 21 Jul 2020 11:45:50 -0400 Subject: [PATCH 33/42] Update plot_all_events.R --- R/plot_all_events.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index bbb9d51..1e46ce8 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -196,7 +196,7 @@ plot_all_events <- function( })) - transform.vector = structure(as.numeric(master.ref[cmo_patient_id == x]$collection_date), + transform.vector = structure(as.date(master.ref[cmo_patient_id == x]$collection_date,'%m/%d/%Y'), names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma) print(transform.vector) tmp.table$Tumor_Sample_Barcode = factor(transform.vector[tmp.table$Tumor_Sample_Barcode], levels = sort(transform.vector)) @@ -216,7 +216,7 @@ plot_all_events <- function( color = paste0(Hugo_Symbol,' ',ifelse(grepl('^p\\.',HGVSp_Short),HGVSp_Short,'')),shape = call_confidence),size = 1.5) + labs(title=x,x='Time Point', y='VAF') + scale_shape_manual(values=status_id,name = 'Call Status') + scale_color_manual(values = getPalette(colourCount),name = 'Alteration') + - theme_minimal() + scale_y_log10() + + theme_minimal() + scale_y_log10() + scale_x_date(date_minor_breaks = "1 day") + theme(panel.grid.major = element_blank(),legend.position="top",legend.box = "vertical", axis.text.x = element_text(angle=45, hjust=1, face = 'bold')) print(SNV.SV.plot) From 535721eb80f1d940d40368b7b1b84fec01bad62e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 21 Jul 2020 11:46:59 -0400 Subject: [PATCH 34/42] Update plot_all_events.R --- R/plot_all_events.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 1e46ce8..59d50f8 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -196,7 +196,7 @@ plot_all_events <- function( })) - transform.vector = structure(as.date(master.ref[cmo_patient_id == x]$collection_date,'%m/%d/%Y'), + transform.vector = structure(as.Date(master.ref[cmo_patient_id == x]$collection_date,'%m/%d/%Y'), names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma) print(transform.vector) tmp.table$Tumor_Sample_Barcode = factor(transform.vector[tmp.table$Tumor_Sample_Barcode], levels = sort(transform.vector)) From 1cd381cc450b187aca8e0d97f85dc2cfce9beec1 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 21 Jul 2020 12:00:45 -0400 Subject: [PATCH 35/42] Update plot_all_events.R --- R/plot_all_events.R | 84 ++++++++++++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 32 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 59d50f8..2bf70d8 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -194,48 +194,68 @@ plot_all_events <- function( tmp.cna <- do.call(rbind, lapply(master.ref[cmo_patient_id == x]$cmo_sample_id_plasma, function(y) { fread(paste0(results.dir, "/CNA_final_call_set/", y, "_cna_final_call_set.txt")) })) - - transform.vector = structure(as.Date(master.ref[cmo_patient_id == x]$collection_date,'%m/%d/%Y'), - names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma) - print(transform.vector) - tmp.table$Tumor_Sample_Barcode = factor(transform.vector[tmp.table$Tumor_Sample_Barcode], levels = sort(transform.vector)) + if (all(!is.na(as.Date(master.ref[cmo_patient_id == x]$collection_date, "%m/%d/%y")))) { + transform.vector <- structure(as.Date(master.ref[cmo_patient_id == x]$collection_date, "%m/%d/%y"), + names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma + ) + print(transform.vector) + } + else { + transform.vector <- structure(as.character(master.ref[cmo_patient_id == x]$collection_date), + names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma + ) + print(transform.vector) + } + tmp.table$Tumor_Sample_Barcode <- factor(transform.vector[tmp.table$Tumor_Sample_Barcode], levels = sort(transform.vector)) print(tmp.table) - if(nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)){ - print('skiping to the next') - if(nrow(tmp.cna)) stop(paste0('Need to make CNA only file for: ',x)) + if (nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)) { + print("skiping to the next") + if (nrow(tmp.cna)) stop(paste0("Need to make CNA only file for: ", x)) return() } - - colourCount = nrow(unique(tmp.table[,.(Hugo_Symbol,HGVSp_Short)])) - getPalette = colorRampPalette(brewer.pal(8, "Set2")) - SNV.SV.plot = ggplot(tmp.table) + - geom_line(aes(x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count/t_total_count)), - color = paste0(Hugo_Symbol,' ',ifelse(grepl('^p\\.',HGVSp_Short),HGVSp_Short,'')),group = paste0(Hugo_Symbol,'_',HGVSp_Short))) + - geom_point(aes(x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count/t_total_count)), - color = paste0(Hugo_Symbol,' ',ifelse(grepl('^p\\.',HGVSp_Short),HGVSp_Short,'')),shape = call_confidence),size = 1.5) + - labs(title=x,x='Time Point', y='VAF') + - scale_shape_manual(values=status_id,name = 'Call Status') + scale_color_manual(values = getPalette(colourCount),name = 'Alteration') + - theme_minimal() + scale_y_log10() + scale_x_date(date_minor_breaks = "1 day") + - theme(panel.grid.major = element_blank(),legend.position="top",legend.box = "vertical", - axis.text.x = element_text(angle=45, hjust=1, face = 'bold')) + + colourCount <- nrow(unique(tmp.table[, .(Hugo_Symbol, HGVSp_Short)])) + getPalette <- colorRampPalette(brewer.pal(8, "Set2")) + SNV.SV.plot <- ggplot(tmp.table) + + geom_line(aes( + x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), group = paste0(Hugo_Symbol, "_", HGVSp_Short) + )) + + geom_point(aes( + x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), shape = call_confidence + ), size = 1.5) + + labs(title = x, x = "Time Point", y = "VAF") + + scale_shape_manual(values = status_id, name = "Call Status") + + scale_color_manual(values = getPalette(colourCount), name = "Alteration") + + theme_minimal() + + scale_y_log10() + + scale_x_date(date_minor_breaks = "1 day") + + theme( + panel.grid.major = element_blank(), legend.position = "top", legend.box = "vertical", + axis.text.x = element_text(angle = 45, hjust = 1, face = "bold") + ) print(SNV.SV.plot) if (nrow(tmp.cna) > 0) { tmp.cna <- tmp.cna %>% mutate(Tumor_Sample_Barcode = factor(Tumor_Sample_Barcode, unique(tmp.sample.sheets[Sample_Type == "duplex"]$Sample_Barcode))) %>% # expand table on all empty samples without any calls - data.table() %>% dcast.data.table(Hugo_Symbol + CNA ~ Tumor_Sample_Barcode,drop = c(TRUE, FALSE),fill = 0,value.var = 'fc') %>% - melt.data.table(id.vars = c('Hugo_Symbol','CNA'),variable.name = 'Tumor_Sample_Barcode',value.name = 'fc') %>% data.table() - tmp.cna$Tumor_Sample_Barcode = transform.vector[tmp.cna$Tumor_Sample_Barcode] - - colourCount = nrow(unique(tmp.cna[,.(Hugo_Symbol,CNA)])) - getPalette = colorRampPalette(brewer.pal(8, "Set2")) - CNA.plot = ggplot(tmp.cna) + - geom_bar(aes(x = Tumor_Sample_Barcode,y = abs(fc),fill = paste0(Hugo_Symbol,'_',CNA)),position="dodge", stat="identity") + - labs(x='Time Point', y='Absolute fc') + - scale_fill_manual(values = getPalette(colourCount),name = 'Alteration') + - theme_minimal() + theme(panel.grid.major = element_blank(),legend.position="bottom",axis.text.x = element_text(angle=45, hjust=1,face = 'bold')) + data.table() %>% + dcast.data.table(Hugo_Symbol + CNA ~ Tumor_Sample_Barcode, drop = c(TRUE, FALSE), fill = 0, value.var = "fc") %>% + melt.data.table(id.vars = c("Hugo_Symbol", "CNA"), variable.name = "Tumor_Sample_Barcode", value.name = "fc") %>% + data.table() + tmp.cna$Tumor_Sample_Barcode <- transform.vector[tmp.cna$Tumor_Sample_Barcode] + + colourCount <- nrow(unique(tmp.cna[, .(Hugo_Symbol, CNA)])) + getPalette <- colorRampPalette(brewer.pal(8, "Set2")) + CNA.plot <- ggplot(tmp.cna) + + geom_bar(aes(x = Tumor_Sample_Barcode, y = abs(fc), fill = paste0(Hugo_Symbol, "_", CNA)), position = "dodge", stat = "identity") + + labs(x = "Time Point", y = "Absolute fc") + + scale_fill_manual(values = getPalette(colourCount), name = "Alteration") + + theme_minimal() + + theme(panel.grid.major = element_blank(), legend.position = "bottom", axis.text.x = element_text(angle = 45, hjust = 1, face = "bold")) print(CNA.plot) pdf(paste0(output.dir, "/", x, "_all_events.pdf"), width = 10, height = 7) From 0da85b84a1417f43ebe01c5575ec29d12c4c0c20 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 21 Jul 2020 12:23:50 -0400 Subject: [PATCH 36/42] Update plot_all_events.R --- R/plot_all_events.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 2bf70d8..ee23317 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -199,6 +199,7 @@ plot_all_events <- function( transform.vector <- structure(as.Date(master.ref[cmo_patient_id == x]$collection_date, "%m/%d/%y"), names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma ) + print("###Date Presentation:####") print(transform.vector) } else { From 6382d5b72a824c242f50780e004f40120b40d90e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 21 Jul 2020 12:25:58 -0400 Subject: [PATCH 37/42] Update plot_all_events.R --- R/plot_all_events.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index ee23317..3451ae9 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -208,7 +208,7 @@ plot_all_events <- function( ) print(transform.vector) } - tmp.table$Tumor_Sample_Barcode <- factor(transform.vector[tmp.table$Tumor_Sample_Barcode], levels = sort(transform.vector)) + tmp.table$Tumor_Sample_Barcode <- transform.vector[tmp.table$Tumor_Sample_Barcode] print(tmp.table) if (nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)) { print("skiping to the next") From b008222cfde2247a42bb3fe8f8ee6763e7422d2e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 21 Jul 2020 12:32:53 -0400 Subject: [PATCH 38/42] Update plot_all_events.R --- R/plot_all_events.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 3451ae9..9ce497e 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -232,7 +232,7 @@ plot_all_events <- function( scale_color_manual(values = getPalette(colourCount), name = "Alteration") + theme_minimal() + scale_y_log10() + - scale_x_date(date_minor_breaks = "1 day") + + scale_x_date(date_minor_breaks = "1 day", date_major_breaks = "1 week", date_labels = "%d") + theme( panel.grid.major = element_blank(), legend.position = "top", legend.box = "vertical", axis.text.x = element_text(angle = 45, hjust = 1, face = "bold") @@ -256,6 +256,7 @@ plot_all_events <- function( labs(x = "Time Point", y = "Absolute fc") + scale_fill_manual(values = getPalette(colourCount), name = "Alteration") + theme_minimal() + + scale_x_date(date_minor_breaks = "1 day", date_major_breaks = "1 week", date_labels = "%d") + theme(panel.grid.major = element_blank(), legend.position = "bottom", axis.text.x = element_text(angle = 45, hjust = 1, face = "bold")) print(CNA.plot) From 7be1f4660b9536f443f19fdaa53fce3abc597ac0 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 21 Jul 2020 12:34:43 -0400 Subject: [PATCH 39/42] Update plot_all_events.R --- R/plot_all_events.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 9ce497e..243ac70 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -232,7 +232,7 @@ plot_all_events <- function( scale_color_manual(values = getPalette(colourCount), name = "Alteration") + theme_minimal() + scale_y_log10() + - scale_x_date(date_minor_breaks = "1 day", date_major_breaks = "1 week", date_labels = "%d") + + scale_x_date(date_minor_breaks = "1 day", date_breaks = "1 week", date_labels = "%d") + theme( panel.grid.major = element_blank(), legend.position = "top", legend.box = "vertical", axis.text.x = element_text(angle = 45, hjust = 1, face = "bold") @@ -256,7 +256,7 @@ plot_all_events <- function( labs(x = "Time Point", y = "Absolute fc") + scale_fill_manual(values = getPalette(colourCount), name = "Alteration") + theme_minimal() + - scale_x_date(date_minor_breaks = "1 day", date_major_breaks = "1 week", date_labels = "%d") + + scale_x_date(date_minor_breaks = "1 day", date_breaks = "1 week", date_labels = "%d") + theme(panel.grid.major = element_blank(), legend.position = "bottom", axis.text.x = element_text(angle = 45, hjust = 1, face = "bold")) print(CNA.plot) From cf5eaa7fd94c0d51bab40f8e510727559b95b28b Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 21 Jul 2020 12:38:17 -0400 Subject: [PATCH 40/42] Update plot_all_events.R --- R/plot_all_events.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 243ac70..7ecedcc 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -232,7 +232,7 @@ plot_all_events <- function( scale_color_manual(values = getPalette(colourCount), name = "Alteration") + theme_minimal() + scale_y_log10() + - scale_x_date(date_minor_breaks = "1 day", date_breaks = "1 week", date_labels = "%d") + + scale_x_date(date_minor_breaks = "1 day", date_breaks = "1 week", date_labels = "%b %d") + theme( panel.grid.major = element_blank(), legend.position = "top", legend.box = "vertical", axis.text.x = element_text(angle = 45, hjust = 1, face = "bold") @@ -256,7 +256,7 @@ plot_all_events <- function( labs(x = "Time Point", y = "Absolute fc") + scale_fill_manual(values = getPalette(colourCount), name = "Alteration") + theme_minimal() + - scale_x_date(date_minor_breaks = "1 day", date_breaks = "1 week", date_labels = "%d") + + scale_x_date(date_minor_breaks = "1 day", date_breaks = "1 week", date_labels = "%b %d") + theme(panel.grid.major = element_blank(), legend.position = "bottom", axis.text.x = element_text(angle = 45, hjust = 1, face = "bold")) print(CNA.plot) From 43a7e9a5ea81075e2e73d50259a90347c0eb5896 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 21 Jul 2020 12:42:58 -0400 Subject: [PATCH 41/42] Update filter_calls.R --- R/filter_calls.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/filter_calls.R b/R/filter_calls.R index ac1e8fb..42486de 100644 --- a/R/filter_calls.R +++ b/R/filter_calls.R @@ -39,7 +39,7 @@ filter_calls = function( # for each patient produce the correct results ---------------------------- # x <- unique(master.ref$cmo_patient_id)[1] - all.fillout.dim <- lapply(unique(master.ref[cmo_patient_id == 'C-DFJ7RT']$cmo_patient_id),function(x){ + all.fillout.dim <- lapply(unique(master.ref[cmo_patient_id]$cmo_patient_id),function(x){ print(paste0('Processing patient ',x)) # Inputs and sanity checks ------------------------------------------------ fillouts.filenames <- list.files(paste0(results.dir,'/',x,'/'),'ORG-STD_genotyped.maf|ORG-SIMPLEX-DUPLEX_genotyped.maf',full.names = T) From 7241c343b9132afe43ebee3d79c1427c41c4d67b Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 21 Jul 2020 12:44:42 -0400 Subject: [PATCH 42/42] Update filter_calls.R --- R/filter_calls.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/filter_calls.R b/R/filter_calls.R index 42486de..acd4535 100644 --- a/R/filter_calls.R +++ b/R/filter_calls.R @@ -39,7 +39,7 @@ filter_calls = function( # for each patient produce the correct results ---------------------------- # x <- unique(master.ref$cmo_patient_id)[1] - all.fillout.dim <- lapply(unique(master.ref[cmo_patient_id]$cmo_patient_id),function(x){ + all.fillout.dim <- lapply(unique(master.ref$cmo_patient_id),function(x){ print(paste0('Processing patient ',x)) # Inputs and sanity checks ------------------------------------------------ fillouts.filenames <- list.files(paste0(results.dir,'/',x,'/'),'ORG-STD_genotyped.maf|ORG-SIMPLEX-DUPLEX_genotyped.maf',full.names = T)