diff --git a/R/filter_calls.R b/R/filter_calls.R index e1a8ab0..acd4535 100644 --- a/R/filter_calls.R +++ b/R/filter_calls.R @@ -63,6 +63,7 @@ filter_calls = function( melt.data.table(id.vars = melt.id.vars,variable.name = 'variable',value.name = 'value') %>% mutate(variable = gsub('fragment','_',variable)) %>% separate(variable,c('variable','Sample_Type'),sep = '___') %>% mutate(Tumor_Sample_Barcode = paste0(sample.name,'___',Sample_Type)) %>% select(-Sample_Type) %>% data.table() %>% + unique() %>% dcast.data.table(as.formula(paste0(paste0(melt.id.vars,collapse = ' + '),' ~ variable')),value.var = 'value') -> maf.file }else{ maf.file <- maf.file %>% mutate(Tumor_Sample_Barcode = paste0(sample.name,'___',sample.type)) %>% @@ -73,7 +74,7 @@ filter_calls = function( transmute(Hugo_Symbol,Tumor_Sample_Barcode,Chromosome = as.character(Chromosome),Start_Position,End_Position,Variant_Classification, HGVSp_Short,Reference_Allele,Tumor_Seq_Allele2,t_var_freq,ExAC_AF) %>% data.table() return(maf.file) - })) + })) %>% unique() %>% data.table() # merging and melting ----------------------------------------------------- hotspot.maf <- fread(paste0(results.dir,'/',x,'/',x,'_all_unique_calls_hotspots.maf')) %>% rowwise() %>% transmute(Hugo_Symbol,Chromosome = as.character(Chromosome),Start_Position,End_Position,Variant_Classification, @@ -137,16 +138,16 @@ filter_calls = function( if(all(!c('unfilterednormal','normal_DMP') %in% sample.sheet$Sample_Type)){ tmp.col.name <- plasma.samples[1] lapply(plasma.samples,function(tmp.col.name){ - fillouts.dt[as.numeric(gsub("\\(|\\)",'',str_extract(get(tmp.col.name),"\\(.*.\\)"))) >= 0.3 | ExAC_AF >= 0.0001,eval(paste0(tmp.col.name,'.called')) := 'Not Called'] + #fillouts.dt[as.numeric(gsub("\\(|\\)",'',str_extract(get(tmp.col.name),"\\(.*.\\)"))) >= 0.3 | ExAC_AF >= 0.0001,eval(paste0(tmp.col.name,'.called')) := 'Not Called'] fillouts.dt[get(tmp.col.name) == '0/0(NaN)',eval(paste0(tmp.col.name,'.called')) := 'Not Covered'] }) }else{ lapply(plasma.samples,function(tmp.col.name){ lapply(normal.samples,function(tmp.col.name.normal){ # duplex tvar/nvar > 5 - fillouts.dt[(as.numeric(gsub("\\(|\\)",'',str_extract(get(tmp.col.name),"\\(.*.\\)")))/as.numeric(gsub("\\(|\\)",'',str_extract(get(tmp.col.name.normal),"\\(.*.\\)"))) < 5) | + fillouts.dt[(as.numeric(gsub("\\(|\\)",'',str_extract(get(tmp.col.name),"\\(.*.\\)")))/as.numeric(gsub("\\(|\\)",'',str_extract(get(tmp.col.name.normal),"\\(.*.\\)"))) < 2) | # if duplex have no reads, use simplex tvar - (as.numeric(gsub("\\(|\\)",'',str_extract(get(gsub('duplex','simplex',tmp.col.name)),"\\(.*.\\)")))/as.numeric(gsub("\\(|\\)",'',str_extract(get(tmp.col.name.normal),"\\(.*.\\)"))) < 5 & + (as.numeric(gsub("\\(|\\)",'',str_extract(get(gsub('duplex','simplex',tmp.col.name)),"\\(.*.\\)")))/as.numeric(gsub("\\(|\\)",'',str_extract(get(tmp.col.name.normal),"\\(.*.\\)"))) < 2 & as.numeric(gsub("/.*.$",'',get(tmp.col.name))) == 0), eval(paste0(tmp.col.name,'.called')) := 'Not Called'] fillouts.dt[get(tmp.col.name) == '0/0(NaN)',eval(paste0(tmp.col.name,'.called')) := 'Not Covered'] @@ -156,7 +157,10 @@ filter_calls = function( # final processing -------------------------------------------------------- # Save only the useful column - fillouts.dt <- fillouts.dt[DMP == 'Signed out' | fillouts.dt[,apply(.SD,1,function(x){any(x == 'Called')})]] + #print(fillouts.dt) + #print("#######") + fillouts.dt <- fillouts.dt[DMP == 'Signed out' | fillouts.dt[,apply(.SD,1,function(x){any(x == 'Called')})]] + #print(fillouts.dt) # combining duplex and simplex counts lapply(plasma.samples,function(tmp.col.name){ # hotspot reads diff --git a/R/plot_all_events.R b/R/plot_all_events.R index 759fe3c..7ecedcc 100644 --- a/R/plot_all_events.R +++ b/R/plot_all_events.R @@ -10,216 +10,261 @@ # helper methods ---------------------------------------------------------- # collapsing read counts from the same rearrangement events into one total count -collapse_AF = function(x){ +collapse_AF <- function(x) { # x = c("32-117-190(0.7842)|53-0-959(0.0553)","NA|16-0-1035(0.0155)","63-0-954(0.066)|NA" ) # x = c('3-5-1300') # x = c('NA|NA|NA|0-56-4183','NA|NA|NA|3-4-76') - print(paste0(x,collapse = "','")) + print(paste0(x, collapse = "','")) # number of samples - samples.num = attr(gregexpr('\\|',x[1])[[1]],"match.length") + samples.num <- attr(gregexpr("\\|", x[1])[[1]], "match.length") print(samples.num) # when not found, return -1 - if(samples.num != -1){ - paste0(round(apply(separate(data.frame(AF = x),'AF',paste0('timpoint_',c(1:(length(samples.num)+1))),sep = '\\|'),2,function(one.tp.afs){ - mean(unlist(lapply(one.tp.afs,function(tmp.af){ - if(tmp.af == 'NA'){return(0)} - else{ - read.counts = as.numeric(str_split(gsub('\\(.*','',tmp.af),'-')[[1]]) - return((read.counts[1]+read.counts[2])/read.counts[3]) + if (samples.num != -1) { + paste0(round(apply(separate(data.frame(AF = x), "AF", paste0("timpoint_", c(1:(length(samples.num) + 1))), sep = "\\|"), 2, function(one.tp.afs) { + mean(unlist(lapply(one.tp.afs, function(tmp.af) { + if (tmp.af == "NA") { + return(0) + } + else { + read.counts <- as.numeric(str_split(gsub("\\(.*", "", tmp.af), "-")[[1]]) + return((read.counts[1] + read.counts[2]) / read.counts[3]) } }))) - }),digits = 3),collapse = '|') - }else{ + }), digits = 3), collapse = "|") + } else { print(x) - as.character(round(mean(unlist(lapply(x,function(tmp.af){ - if(tmp.af == 'NA'){return(0)} - else{ - read.counts = as.numeric(str_split(gsub('\\(.*','',tmp.af),'-')[[1]]) - return((read.counts[1]+read.counts[2])/read.counts[3]) + as.character(round(mean(unlist(lapply(x, function(tmp.af) { + if (tmp.af == "NA") { + return(0) + } + else { + read.counts <- as.numeric(str_split(gsub("\\(.*", "", tmp.af), "-")[[1]]) + return((read.counts[1] + read.counts[2]) / read.counts[3]) } - }))),digits = 3)) + }))), digits = 3)) } } # convert naming to timepoint, get rid of uncovered impact and access calls -process_maf_for_graph = function(tmp.maf){ - print('convert naming to timepoint, get rid of uncovered impact and access calls') +process_maf_for_graph <- function(tmp.maf) { + print("convert naming to timepoint, get rid of uncovered impact and access calls") # tmp.maf = ret.054.calls # tumor sample - tumor.sample = structure(gsub('-','',str_extract(unique(tmp.maf$Tumor_Sample_Barcode[grep('IM[0-9]$',tmp.maf$Tumor_Sample_Barcode)]),'-T..-')), - names = as.character(unique(tmp.maf$Tumor_Sample_Barcode[grep('IM[0-9]$',tmp.maf$Tumor_Sample_Barcode)]))) + tumor.sample <- structure(gsub("-", "", str_extract(unique(tmp.maf$Tumor_Sample_Barcode[grep("IM[0-9]$", tmp.maf$Tumor_Sample_Barcode)]), "-T..-")), + names = as.character(unique(tmp.maf$Tumor_Sample_Barcode[grep("IM[0-9]$", tmp.maf$Tumor_Sample_Barcode)])) + ) print(tumor.sample) # rest of the samples are plasma - plasma.sample = setdiff(tmp.maf$Tumor_Sample_Barcode,names(tumor.sample)) + plasma.sample <- setdiff(tmp.maf$Tumor_Sample_Barcode, names(tumor.sample)) # filter for plasma sample only - tmp.maf = tmp.maf[Tumor_Sample_Barcode %in% plasma.sample] + tmp.maf <- tmp.maf[Tumor_Sample_Barcode %in% plasma.sample] # change samples into timepoint information - plasma.sample = structure(case_when( + plasma.sample <- structure(case_when( # some of the DA-ret sample need to be renamed - grepl('-T0._',plasma.sample) ~ gsub('-|_','',gsub('T','L0',str_extract(plasma.sample,'-T0._'))), + grepl("-T0._", plasma.sample) ~ gsub("-|_", "", gsub("T", "L0", str_extract(plasma.sample, "-T0._"))), # otherwise extract L00 something - TRUE ~ gsub('-','',str_extract(plasma.sample,'-L...-')) - ),names = plasma.sample) + TRUE ~ gsub("-", "", str_extract(plasma.sample, "-L...-")) + ), names = plasma.sample) print(plasma.sample) - sample.name.conversion = c(tumor.sample,plasma.sample) + sample.name.conversion <- c(tumor.sample, plasma.sample) print(sample.name.conversion) # get all not covered calls - not.covered.df = unique(tmp.maf[call_confidence == 'Not Covered',.N,.(Hugo_Symbol,Chromosome,Start_Position,End_Position,Variant_Classification, - HGVSp_Short,Reference_Allele,Tumor_Seq_Allele2)])[N > length(plasma.sample)/2] - only.covered.tmp.maf = anti_join(tmp.maf,not.covered.df,by = c('Hugo_Symbol','Chromosome','Start_Position','End_Position','Variant_Classification', - 'HGVSp_Short','Reference_Allele','Tumor_Seq_Allele2')) %>% data.table() + not.covered.df <- unique(tmp.maf[call_confidence == "Not Covered", .N, .( + Hugo_Symbol, Chromosome, Start_Position, End_Position, Variant_Classification, + HGVSp_Short, Reference_Allele, Tumor_Seq_Allele2 + )])[N > length(plasma.sample) / 2] + only.covered.tmp.maf <- anti_join(tmp.maf, not.covered.df, by = c( + "Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Variant_Classification", + "HGVSp_Short", "Reference_Allele", "Tumor_Seq_Allele2" + )) %>% data.table() # only the converted timepoint names # only.covered.tmp.maf$Tumor_Sample_Barcode = sample.name.conversion[as.character(only.covered.tmp.maf$Tumor_Sample_Barcode)] - if(any(grepl('__',only.covered.tmp.maf$Hugo_Symbol))){ - fusion.only.covered.tmp.maf = data.table(only.covered.tmp.maf)[grepl('__',Hugo_Symbol)] + if (any(grepl("__", only.covered.tmp.maf$Hugo_Symbol))) { + fusion.only.covered.tmp.maf <- data.table(only.covered.tmp.maf)[grepl("__", Hugo_Symbol)] # process original hugo_symbol column (sort two genes by name) - fusion.only.covered.tmp.maf$Hugo_Symbol = unlist(lapply(fusion.only.covered.tmp.maf$Hugo_Symbol,function(x){paste0(sort(str_split(x,'__')[[1]]),collapse = '-')})) - fusion.only.covered.tmp.maf$Chromosome = unlist(lapply(fusion.only.covered.tmp.maf$Chromosome,function(x){paste0(sort(str_split(x,'__')[[1]]),collapse = '-')})) + fusion.only.covered.tmp.maf$Hugo_Symbol <- unlist(lapply(fusion.only.covered.tmp.maf$Hugo_Symbol, function(x) { + paste0(sort(str_split(x, "__")[[1]]), collapse = "-") + })) + fusion.only.covered.tmp.maf$Chromosome <- unlist(lapply(fusion.only.covered.tmp.maf$Chromosome, function(x) { + paste0(sort(str_split(x, "__")[[1]]), collapse = "-") + })) # collapsing AF for rows of the same events (i.e. reciprocal rearrangement) while perserving the sample level seaparation in AF - fusion.only.covered.tmp.maf = fusion.only.covered.tmp.maf[,.(Start_Position = Start_Position[1],End_Position = End_Position[1],HGVSp_Short = HGVSp_Short[1], - Reference_Allele = Reference_Allele[1], Tumor_Seq_Allele2 = Tumor_Seq_Allele2[1], - ExAC_AF = ExAC_AF[1], Hotspot = Hotspot[1], DMP = DMP[1], duplex_support_num = duplex_support_num[1], - call_confidence = ifelse(any(call_confidence == 'Called'),'Called','Not Called'), - call_info = paste0(call_info,collapse = ' | '),CH = 'No', - t_alt_count = sum(t_alt_count,na.rm = T),t_total_count = sum(t_total_count,na.rm = T)), - .(Hugo_Symbol,Chromosome,Variant_Classification,Tumor_Sample_Barcode)] - only.covered.tmp.maf = only.covered.tmp.maf[-grep('__',Hugo_Symbol)] - only.covered.tmp.maf = rbind(only.covered.tmp.maf,fusion.only.covered.tmp.maf) + fusion.only.covered.tmp.maf <- fusion.only.covered.tmp.maf[ + , .( + Start_Position = Start_Position[1], End_Position = End_Position[1], HGVSp_Short = HGVSp_Short[1], + Reference_Allele = Reference_Allele[1], Tumor_Seq_Allele2 = Tumor_Seq_Allele2[1], + ExAC_AF = ExAC_AF[1], Hotspot = Hotspot[1], DMP = DMP[1], duplex_support_num = duplex_support_num[1], + call_confidence = ifelse(any(call_confidence == "Called"), "Called", "Not Called"), + call_info = paste0(call_info, collapse = " | "), CH = "No", + t_alt_count = sum(t_alt_count, na.rm = T), t_total_count = sum(t_total_count, na.rm = T) + ), + .(Hugo_Symbol, Chromosome, Variant_Classification, Tumor_Sample_Barcode) + ] + only.covered.tmp.maf <- only.covered.tmp.maf[-grep("__", Hugo_Symbol)] + only.covered.tmp.maf <- rbind(only.covered.tmp.maf, fusion.only.covered.tmp.maf) } - only.covered.tmp.maf$t_alt_count = ifelse(is.na(only.covered.tmp.maf$t_alt_count),0,only.covered.tmp.maf$t_alt_count) - only.covered.tmp.maf$t_total_count = ifelse(is.na(only.covered.tmp.maf$t_total_count),0,only.covered.tmp.maf$t_total_count) + only.covered.tmp.maf$t_alt_count <- ifelse(is.na(only.covered.tmp.maf$t_alt_count), 0, only.covered.tmp.maf$t_alt_count) + only.covered.tmp.maf$t_total_count <- ifelse(is.na(only.covered.tmp.maf$t_total_count), 0, only.covered.tmp.maf$t_total_count) return(only.covered.tmp.maf) } # melting genotype tables into maf-like format -table_to_maf = function(tmp.table,sample.table){ +table_to_maf <- function(tmp.table, sample.table) { # tmp.table = fillouts.dt # sample.table = sample.sheet # tmp.table = ret.006.table # sample.table = ret.006.sample.sheet # extract information for plasma and tumor - tmp.table = data.table(tmp.table) - lapply(sample.table[Sample_Type %in% c('duplex')]$Sample_Barcode,function(y){ - sample.call.status.colname = paste0(y,'___duplex.called') - sample.af.colname = paste0(y,'___total') - tmp.table[,eval(y) := paste0(get(sample.call.status.colname),' | ',get(sample.af.colname))] + tmp.table <- data.table(tmp.table) + lapply(sample.table[Sample_Type %in% c("duplex")]$Sample_Barcode, function(y) { + sample.call.status.colname <- paste0(y, "___duplex.called") + sample.af.colname <- paste0(y, "___total") + tmp.table[, eval(y) := paste0(get(sample.call.status.colname), " | ", get(sample.af.colname))] }) - lapply(sample.table[Sample_Type %in% c('DMP_Tumor')]$Sample_Barcode,function(y){ - tmp.table[,eval(y) := paste0(case_when( - !is.na(get('DMP')) & get(paste0(sample.table[Sample_Type %in% c('duplex')]$Sample_Barcode[1],'___duplex.called')) != 'Not Covered' ~ 'Called', - !is.na(get('DMP')) & get(paste0(sample.table[Sample_Type %in% c('duplex')]$Sample_Barcode[1],'___duplex.called')) == 'Not Covered' ~ 'Called (but not covered in ACCESS)', - is.na(get('DMP')) & as.numeric(gsub('/.*','',get(paste0(y,'___DMP_Tumor')))) > 3 ~ 'Genotyped', - TRUE ~ 'Not Called' - ),' | ',get(paste0(y,'___DMP_Tumor')))] + lapply(sample.table[Sample_Type %in% c("DMP_Tumor")]$Sample_Barcode, function(y) { + tmp.table[, eval(y) := paste0(case_when( + !is.na(get("DMP")) & get(paste0(sample.table[Sample_Type %in% c("duplex")]$Sample_Barcode[1], "___duplex.called")) != "Not Covered" ~ "Called", + !is.na(get("DMP")) & get(paste0(sample.table[Sample_Type %in% c("duplex")]$Sample_Barcode[1], "___duplex.called")) == "Not Covered" ~ "Called (but not covered in ACCESS)", + is.na(get("DMP")) & as.numeric(gsub("/.*", "", get(paste0(y, "___DMP_Tumor")))) > 3 ~ "Genotyped", + TRUE ~ "Not Called" + ), " | ", get(paste0(y, "___DMP_Tumor")))] }) - processed.tmp.table = tmp.table[,!grep('___',colnames(tmp.table)),with = F] %>% + processed.tmp.table <- tmp.table[, !grep("___", colnames(tmp.table)), with = F] %>% # melting data frame by tumor samples - melt(id.vars = c('Hugo_Symbol','Chromosome','Start_Position','End_Position','Variant_Classification','HGVSp_Short', - 'Reference_Allele','Tumor_Seq_Allele2','ExAC_AF','Hotspot','DMP','duplex_support_num','call_confidence','CH'), - variable.name = "Tumor_Sample_Barcode", value.name = "call_info") %>% - mutate(call_confidence = gsub(' \\| ','',str_extract(call_info,'.*.\\| ')),call_info = gsub('.*.\\| ','',call_info)) %>% rowwise() %>% - mutate(t_alt_count = ifelse(grepl('-[0-9]+-',call_info), - # SV parsing - sum(as.numeric(str_split(call_info,'-|\\(')[[1]][1:2])), - # SNV parsing - as.numeric(gsub(' |\\/.*.','',call_info))), - t_total_count = ifelse(grepl('-[0-9]+-',call_info), - # SV parsing - as.numeric(str_split(call_info,'-|\\(')[[1]][3]), - # SNV parsing - as.numeric(gsub('.*.\\/|\\(.*.','',call_info)))) %>% data.table() + melt( + id.vars = c( + "Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Variant_Classification", "HGVSp_Short", + "Reference_Allele", "Tumor_Seq_Allele2", "ExAC_AF", "Hotspot", "DMP", "duplex_support_num", "call_confidence", "CH" + ), + variable.name = "Tumor_Sample_Barcode", value.name = "call_info" + ) %>% + mutate(call_confidence = gsub(" \\| ", "", str_extract(call_info, ".*.\\| ")), call_info = gsub(".*.\\| ", "", call_info)) %>% + rowwise() %>% + mutate( + t_alt_count = ifelse(grepl("-[0-9]+-", call_info), + # SV parsing + sum(as.numeric(str_split(call_info, "-|\\(")[[1]][1:2])), + # SNV parsing + as.numeric(gsub(" |\\/.*.", "", call_info)) + ), + t_total_count = ifelse(grepl("-[0-9]+-", call_info), + # SV parsing + as.numeric(str_split(call_info, "-|\\(")[[1]][3]), + # SNV parsing + as.numeric(gsub(".*.\\/|\\(.*.", "", call_info)) + ) + ) %>% + data.table() return(processed.tmp.table) } # main graphing function -------------------------------------------------- #' @export -plot_all_events = function( - master.ref,results.dir, - criteria = 'stringent' -){ +plot_all_events <- function( + master.ref, results.dir, + criteria = "stringent") { # # test input section ----------------------------------------------------------- # master.ref = fread('/juno/work/bergerm1/bergerlab/zhengy1/access_data_analysis/data/example_master_file.csv') # results.dir = paste0('/juno/work/bergerm1/MSK-ACCESS/ACCESS-Projects/test_access/access_data_analysis/output_042020/') # # criteria <- 'permissive' # criteria <- 'stringent' - # + # # graph by patient -------------------------------------------------------- - output.dir = paste0(results.dir,'/plots/') + output.dir <- paste0(results.dir, "/plots/") dir.create(output.dir) # for plotting consistency - status_id = c('Called' = 19, 'Not Called' = 4, 'Signed out' = 15, - 'Not Signed out' = 13, 'Not Covered' = 8, 'Genotyped' = 17) - + status_id <- c( + "Called" = 19, "Not Called" = 4, "Signed out" = 15, + "Not Signed out" = 13, "Not Covered" = 8, "Genotyped" = 17 + ) + # snv_sv_table = list.files(paste0(results.dir,'/results_',criteria,'_combined/'),full.names = T) - lapply(unique(master.ref$cmo_patient_id),function(x){ + lapply(unique(master.ref$cmo_patient_id), function(x) { # THIS PLOTS PLASMA SAMPLES ONLY # SNV - tmp.table = fread(list.files(paste0(results.dir,'/results_',criteria,'_combined/'),x,full.names = T))[ - call_confidence == 'High' | grepl('Protein Fusion: in frame',HGVSp_Short) - ] - tmp.sample.sheets <- fread(paste0(results.dir,'/',x,'/',x,'_sample_sheet.tsv'))[,.(Sample_Barcode,cmo_patient_id,Sample_Type)] - tmp.table = table_to_maf(tmp.table,tmp.sample.sheets) - tmp.table = data.table(process_maf_for_graph(tmp.table)) - + tmp.table <- fread(list.files(paste0(results.dir, "/results_", criteria, "_combined/"), x, full.names = T))[ + call_confidence == "High" | grepl("Protein Fusion: in frame", HGVSp_Short) + ] + tmp.sample.sheets <- fread(paste0(results.dir, "/", x, "/", x, "_sample_sheet.tsv"))[, .(Sample_Barcode, cmo_patient_id, Sample_Type)] + tmp.table <- table_to_maf(tmp.table, tmp.sample.sheets) + tmp.table <- data.table(process_maf_for_graph(tmp.table)) + # CNA - tmp.cna = do.call(rbind,lapply(master.ref[cmo_patient_id == x]$cmo_sample_id_plasma,function(y){ - fread(paste0(results.dir,'/CNA_final_call_set/',y,'_cna_final_call_set.txt')) + tmp.cna <- do.call(rbind, lapply(master.ref[cmo_patient_id == x]$cmo_sample_id_plasma, function(y) { + fread(paste0(results.dir, "/CNA_final_call_set/", y, "_cna_final_call_set.txt")) })) - - # transform sample IDs into times - if(all(!is.na(as.Date(master.ref[cmo_patient_id == x]$collection_date,'%m/%d/%y')))){ - transform.vector = structure(as.Date(master.ref[cmo_patient_id == x]$collection_date,'%m/%d/%y'), - names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma) - print(transform.vector) - }else{ - transform.vector = structure(as.character(master.ref[cmo_patient_id == x]$collection_date), - names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma) + + if (all(!is.na(as.Date(master.ref[cmo_patient_id == x]$collection_date, "%m/%d/%y")))) { + transform.vector <- structure(as.Date(master.ref[cmo_patient_id == x]$collection_date, "%m/%d/%y"), + names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma + ) + print("###Date Presentation:####") + print(transform.vector) + } + else { + transform.vector <- structure(as.character(master.ref[cmo_patient_id == x]$collection_date), + names = master.ref[cmo_patient_id == x]$cmo_sample_id_plasma + ) print(transform.vector) } - tmp.table$Tumor_Sample_Barcode = transform.vector[tmp.table$Tumor_Sample_Barcode] - - if(nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)){ - print('skiping to the next') - if(nrow(tmp.cna)) stop(paste0('Need to make CNA only file for: ',x)) + tmp.table$Tumor_Sample_Barcode <- transform.vector[tmp.table$Tumor_Sample_Barcode] + print(tmp.table) + if (nrow(tmp.table) == 0 | all(tmp.table$t_alt_count == 0)) { + print("skiping to the next") + if (nrow(tmp.cna)) stop(paste0("Need to make CNA only file for: ", x)) return() } - - colourCount = nrow(unique(tmp.table[,.(Hugo_Symbol,HGVSp_Short)])) - getPalette = colorRampPalette(brewer.pal(8, "Set2")) - SNV.SV.plot = ggplot(tmp.table) + - geom_line(aes(x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count/t_total_count)), - color = paste0(Hugo_Symbol,' ',ifelse(grepl('^p\\.',HGVSp_Short),HGVSp_Short,'')),group = paste0(Hugo_Symbol,'_',HGVSp_Short))) + - geom_point(aes(x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count/t_total_count)), - color = paste0(Hugo_Symbol,' ',ifelse(grepl('^p\\.',HGVSp_Short),HGVSp_Short,'')),shape = call_confidence),size = 1.5) + - labs(title=x,x='Time Point', y='VAF') + #scale_x_date(date_labels = "%Y %b %d") + - scale_shape_manual(values=status_id,name = 'Call Status') + scale_color_manual(values = getPalette(colourCount),name = 'Alteration') + - theme_minimal() + scale_y_log10() + - theme(panel.grid.major = element_blank(),legend.position="top",legend.box = "vertical", - axis.text.x = element_text(angle=45, hjust=1, face = 'bold')) + + colourCount <- nrow(unique(tmp.table[, .(Hugo_Symbol, HGVSp_Short)])) + getPalette <- colorRampPalette(brewer.pal(8, "Set2")) + SNV.SV.plot <- ggplot(tmp.table) + + geom_line(aes( + x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), group = paste0(Hugo_Symbol, "_", HGVSp_Short) + )) + + geom_point(aes( + x = Tumor_Sample_Barcode, y = ifelse(t_total_count == 0, 0, as.numeric(t_alt_count / t_total_count)), + color = paste0(Hugo_Symbol, " ", ifelse(grepl("^p\\.", HGVSp_Short), HGVSp_Short, "")), shape = call_confidence + ), size = 1.5) + + labs(title = x, x = "Time Point", y = "VAF") + + scale_shape_manual(values = status_id, name = "Call Status") + + scale_color_manual(values = getPalette(colourCount), name = "Alteration") + + theme_minimal() + + scale_y_log10() + + scale_x_date(date_minor_breaks = "1 day", date_breaks = "1 week", date_labels = "%b %d") + + theme( + panel.grid.major = element_blank(), legend.position = "top", legend.box = "vertical", + axis.text.x = element_text(angle = 45, hjust = 1, face = "bold") + ) print(SNV.SV.plot) - - if(nrow(tmp.cna) > 0){ - tmp.cna = tmp.cna %>% mutate(Tumor_Sample_Barcode = factor(Tumor_Sample_Barcode,unique(tmp.sample.sheets[Sample_Type == 'duplex']$Sample_Barcode))) %>% + + if (nrow(tmp.cna) > 0) { + tmp.cna <- tmp.cna %>% + mutate(Tumor_Sample_Barcode = factor(Tumor_Sample_Barcode, unique(tmp.sample.sheets[Sample_Type == "duplex"]$Sample_Barcode))) %>% # expand table on all empty samples without any calls - data.table() %>% dcast.data.table(Hugo_Symbol + CNA ~ Tumor_Sample_Barcode,drop = c(TRUE, FALSE),fill = 0,value.var = 'fc') %>% - melt.data.table(id.vars = c('Hugo_Symbol','CNA'),variable.name = 'Tumor_Sample_Barcode',value.name = 'fc') %>% data.table() - tmp.cna$Tumor_Sample_Barcode = transform.vector[tmp.cna$Tumor_Sample_Barcode] - - colourCount = nrow(unique(tmp.cna[,.(Hugo_Symbol,CNA)])) - getPalette = colorRampPalette(brewer.pal(8, "Set2")) - CNA.plot = ggplot(tmp.cna) + - geom_bar(aes(x = Tumor_Sample_Barcode,y = abs(fc),fill = paste0(Hugo_Symbol,'_',CNA)),position="dodge", stat="identity") + - labs(x='Time Point', y='Absolute fc') + #scale_x_date(date_labels = "%Y %b %d") + - scale_fill_manual(values = getPalette(colourCount),name = 'Alteration') + - theme_minimal() + theme(panel.grid.major = element_blank(),legend.position="bottom",axis.text.x = element_text(angle=45, hjust=1,face = 'bold')) + data.table() %>% + dcast.data.table(Hugo_Symbol + CNA ~ Tumor_Sample_Barcode, drop = c(TRUE, FALSE), fill = 0, value.var = "fc") %>% + melt.data.table(id.vars = c("Hugo_Symbol", "CNA"), variable.name = "Tumor_Sample_Barcode", value.name = "fc") %>% + data.table() + tmp.cna$Tumor_Sample_Barcode <- transform.vector[tmp.cna$Tumor_Sample_Barcode] + + colourCount <- nrow(unique(tmp.cna[, .(Hugo_Symbol, CNA)])) + getPalette <- colorRampPalette(brewer.pal(8, "Set2")) + CNA.plot <- ggplot(tmp.cna) + + geom_bar(aes(x = Tumor_Sample_Barcode, y = abs(fc), fill = paste0(Hugo_Symbol, "_", CNA)), position = "dodge", stat = "identity") + + labs(x = "Time Point", y = "Absolute fc") + + scale_fill_manual(values = getPalette(colourCount), name = "Alteration") + + theme_minimal() + + scale_x_date(date_minor_breaks = "1 day", date_breaks = "1 week", date_labels = "%b %d") + + theme(panel.grid.major = element_blank(), legend.position = "bottom", axis.text.x = element_text(angle = 45, hjust = 1, face = "bold")) print(CNA.plot) - - pdf(paste0(output.dir,'/',x,'_all_events.pdf'),width = 10,height = 7) - print(ggarrange(SNV.SV.plot,CNA.plot,ncol = 1,heights = c(2,1))) + + pdf(paste0(output.dir, "/", x, "_all_events.pdf"), width = 10, height = 7) + print(ggarrange(SNV.SV.plot, CNA.plot, ncol = 1, heights = c(2, 1))) dev.off() - }else{ - pdf(paste0(output.dir,'/',x,'_all_events.pdf'),width = 10,height = 7) + } else { + pdf(paste0(output.dir, "/", x, "_all_events.pdf"), width = 10, height = 7) print(SNV.SV.plot) dev.off() } @@ -239,22 +284,22 @@ suppressPackageStartupMessages({ }) if (!interactive()) { - - parser=ArgumentParser() - parser$add_argument('-m', '--masterref', type='character', help='File path to master reference file') - parser$add_argument('-o', '--resultsdir', type='character', help='Output directory') - parser$add_argument('-c', '--criteria', type='character', default = 'stringent', - help='Calling criteria [default]') - args=parser$parse_args() - - master.ref = args$masterref - results.dir = args$resultsdir - criteria = args$criteria - - if(!criteria %in% c('stringent','permissive')){ - stop('Criteria argument should be either stringent or permissive') + parser <- ArgumentParser() + parser$add_argument("-m", "--masterref", type = "character", help = "File path to master reference file") + parser$add_argument("-o", "--resultsdir", type = "character", help = "Output directory") + parser$add_argument("-c", "--criteria", + type = "character", default = "stringent", + help = "Calling criteria [default]" + ) + args <- parser$parse_args() + + master.ref <- args$masterref + results.dir <- args$resultsdir + criteria <- args$criteria + + if (!criteria %in% c("stringent", "permissive")) { + stop("Criteria argument should be either stringent or permissive") } - - suppressWarnings(plot_all_events(fread(master.ref),results.dir,criteria)) - -} + + suppressWarnings(plot_all_events(fread(master.ref), results.dir, criteria)) +} \ No newline at end of file