diff --git a/R/utilities.R b/R/utilities.R
index 95183de..ab2c36d 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -3713,6 +3713,69 @@ collate_pga <- function(
 
 }
 
+#' @title Collate PGA results per chromosome for samples with CN data.
+#'
+#' @description Expand a metadata table horizontally with PGA_chr metrics.
+#'
+#' @details Helper function called by `collate_results`, not meant for out-of-package usage.
+#'
+#' @param these_samples_metadata The metadata to be expanded with sample_id column.
+#' @param this_seq_type Seq type for returned CN segments. One of "genome" (default) or "capture".
+#'
+#' @noRd
+#'
+#' @return data frame
+#' @import dplyr
+#'
+#' @examples
+#' # For genomes
+#' meta <- get_gambl_metadata()
+#' pga_metrics <- collate_pga_chr(these_samples_metadata = meta)
+#' # For exomes
+#' meta_capture <- get_gambl_metadata(seq_type_filter = "capture")
+#' pga_metrics_capture <- collate_pga_chr(these_samples_metadata = meta_capture)
+#'
+collate_pga_chr <- function(
+  these_samples_metadata,
+  this_seq_type = "genome"
+) {
+  
+  message(
+    "Collating the PGA results per chromosome..."
+  )
+  # Currently only works for genomes
+  if(! this_seq_type %in% c("genome", "capture")) {
+    stop("Please provide a valid seq_type (\"genome\" or \"capture\").")
+  }
+  
+  # Default to all samples if sample table is missing
+  if (missing(these_samples_metadata)) {
+    message("No sample table was provided. Defaulting to all metadata ...")
+    these_samples_metadata <- get_gambl_metadata(
+      seq_type_filter = this_seq_type
+    )
+  }
+  
+  # Get the CN segments
+  multi_sample_seg <- get_sample_cn_segments(
+    sample_list = these_samples_metadata$sample_id,
+    multiple_samples = TRUE,
+    this_seq_type = this_seq_type
+  ) %>%
+    dplyr::rename("sample" = "ID")
+  
+  these_samples_pga <- calculate_pga_chr(
+    this_seg = multi_sample_seg
+  )
+  
+  these_samples_metadata <- left_join(
+    these_samples_metadata,
+    these_samples_pga
+  )
+  
+  return(these_samples_metadata)
+  
+}
 
 #' @title Standardize Chromosome Prefix.
 #'
@@ -3902,6 +3965,173 @@ calculate_pga = function(this_seg,
 }
 
 
+
+#' @title Calculate proportion of each chromosome altered by CNV.
+#'
+#' @description `calculate_pga_chr` returns a data.frame with estimated proportion of each chromosome altered for each sample.
+#'
+#' @details This function calculates the percent of genome altered (PGA) by CNV. It takes into account the total length of
+#' sample's CNV and relates it to the total chromosome length to return the proportion affected by CNV. The input is expected to be a seg file.
+#' The path to a local SEG file can be provided instead. If The custom seg file is provided, the minimum required columns are
+#' sample, chrom, start, end, and log.ratio. The function can work with either individual or multi-sample seg files. The telomeres are always
+#' excluded from calculation, and centromeres/sex chromosomes can be optionally included or excluded.
+#'
+#' @param this_seg Input data frame of seg file.
+#' @param seg_path Optionally, specify the path to a local seg file.
+#' @param projection Argument specifying the projection of seg file, which will determine chr prefix, chromosome coordinates, and genome size. Default is grch37, but hg38 is also accepted.
+#' @param cutoff The minimum log.ratio for the segment to be considered as CNV. Default is 0.56, which is 1 copy. This value is expected to be a positive float of log.ratio for both deletions and amplifications.
+#' @param exclude_sex Boolean argument specifying whether to exclude sex chromosomes from calculation. Default is TRUE.
+#' @param exclude_centromeres Boolean argument specifying whether to exclude centromeres from calculation. Default is TRUE.
+#'
+#' @return data frame
+#'
+#' @import dplyr readr tidyr
+#' @export
+#'
+#' @examples
+#' sample_seg = get_sample_cn_segments(this_sample_id = "14-36022T")
+#' sample_seg = dplyr::rename(sample_seg, "sample" = "ID")
+#'
+#' calculate_pga_chr(this_seg = sample_seg)
+#'
+#' calculate_pga_chr(this_seg = sample_seg,
+#'               exclude_sex = FALSE)
+#'
+#' one_sample = get_sample_cn_segments(this_sample_id = "14-36022T")
+#' one_sample = dplyr::rename(one_sample, "sample" = "ID")
+#'
+#' another_sample = get_sample_cn_segments(this_sample_id = "BLGSP-71-21-00243-01A-11E")
+#' another_sample = dplyr::rename(another_sample, "sample" = "ID")
+#'
+#' multi_sample_seg = rbind(one_sample, another_sample)
+#'
+#' calculate_pga_chr(this_seg = multi_sample_seg)
+#'
+calculate_pga_chr = function(this_seg,
+                         seg_path,
+                         projection = "grch37",
+                         cutoff = 0.56,
+                         exclude_sex = TRUE,
+                         exclude_centromeres = TRUE) {
+  # check for required argument
+  if (missing(this_seg) & missing (seg_path)) {
+    stop("Please provide the data frame of seg file or path to the local seg.")
+  }
+  
+  # ensure the specified projection is correct and define chromosome coordinates
+  if (projection == "grch37") {
+    chr_coordinates = chromosome_arms_grch37
+  } else if (projection == "hg38") {
+    chr_coordinates = chromosome_arms_hg38
+  } else {
+    stop(
+      "You specified projection that is currently not supported. Please provide seg files in either hg38 or grch37."
+    )
+  }
+  
+  all_chr <- c(1:22,"X","Y") %>% matrix() %>% 
+    as.data.frame(row.names = NULL) %>% 
+    as_tibble() %>% 
+    rename("chrom" = "V1")
+  
+  # exclude sex chromosomes
+  if (exclude_sex) {
+    chr_coordinates = chr_coordinates %>%
+      dplyr::filter(!grepl("X|Y", chromosome))
+    all_chr = all_chr %>%
+      dplyr::filter(!grepl("X|Y", chrom))
+  }
+  
+  # does the user's seg file contain centromeres?
+  if (exclude_centromeres) {
+    chr_coordinates = chr_coordinates %>%
+      group_by(chromosome) %>%
+      mutate(start = min(start),
+             end = max(end)) %>%
+      ungroup %>%
+      distinct(chromosome, start, end)
+  }
+  
+  # prepare for the overlaps
+  chr_coordinates = chr_coordinates  %>%
+    rename("arm_start" = "start",
+           "arm_end" = "end",
+           "chrom" = "chromosome") %>%
+    mutate(
+      chr_len = arm_start + arm_end
+    )
+  
+  # work out the seg file
+  if (!missing(seg_path)) {
+    message(paste0("Reading the seg file from ", seg_path))
+    this_seg = suppressMessages(read_tsv(seg_path))
+  }
+  
+  # preserve the sample ids to account later for those with 0 PGA
+  sample_set = this_seg %>% distinct(sample)
+  
+  this_seg = this_seg %>%
+    dplyr::filter(abs(log.ratio) >= cutoff) %>%
+    dplyr::relocate(sample, .after = last_col())
+  
+  # ensure consistent chromosome prefixing
+  if (projection == "grch37") {
+    this_seg$chrom = gsub("chr", "", this_seg$chrom)
+  } else {
+    this_seg$chrom = gsub("chr", "", this_seg$chrom) # if there is a mish-mash of prefixes, strip them all
+    this_seg$chrom = paste0("chr", this_seg$chrom)
+  }
+  
+  # exclude sex chromosomes
+  if (exclude_sex) {
+    this_seg = this_seg %>%
+      dplyr::filter(!grepl("X|Y", chrom))
+  }
+  
+  # prepare for the overlaps
+  this_seg = inner_join(
+    this_seg,
+    chr_coordinates,
+    by = "chrom",
+    relationship = "many-to-many"
+  )
+  
+  # what are the segments that overlap?
+  this_seg = this_seg %>%
+    dplyr::filter(start <= arm_end & arm_start <= end) %>%
+    arrange(sample, chrom, start)
+  
+  # calculate total length of CNV per chr
+  affected_regions = this_seg %>%
+    dplyr::mutate(size = end - start) %>%
+    group_by(sample, chrom) %>%
+    summarise(total = sum(size), 
+              chrom_len = unique(chr.len))
+  
+  affected_regions$PGA.chr = affected_regions$total / affected_regions$chrom_len
+  
+  # add chr with 0
+  affected_regions = affected_regions %>% select(!all_of(c("total", "chrom_len"))) %>% 
+    dplyr::mutate(PGA.chr = round(PGA.chr, 3)) %>%
+    pivot_wider(names_from = chrom, values_from = PGA.chr)
+  
+  
+  # now add any samples that can have 0 PGA
+  affected_regions = base::merge(sample_set,
+                                 affected_regions,
+                                 all.x = TRUE)
+  
+  affected_regions[is.na(affected_regions)] <- 0
+  
+  affected_regions = affected_regions %>%
+    rename("sample_id" = "sample")
+  colnames(affected_regions)[2:ncol(affected_regions)] <- paste0("chr",colnames(affected_regions)[2:ncol(affected_regions)],"_pga")
+
+  return(affected_regions)
+  
+}
+
+
 #' @title Adjust ploidy for samples with CNV data.
 #'
 #' @description `adjust_ploidy` returns a seg file with log.ratios adjusted to the overall sample ploidy.
diff --git a/R/viz.R b/R/viz.R
index 7775fe6..deda9e1 100644
--- a/R/viz.R
+++ b/R/viz.R
@@ -22,7 +22,8 @@ colour_aliases = list("COO_consensus" = "coo", "COO" = "coo", "DHITsig_consensus
 #' @param this_maf Specify custom MAF data frame of mutations.
 #' @param maf_path Specify path to MAF file if it is not already loaded into data frame.
 #' @param zoom_in_region Provide a specific region in the format "chromosome:start-end" to zoom in to a specific region.
-#' @param label_sv Boolean argument to specify whether label SVs or not. Only supported if a specific chromosome or zoom in region are specified.
+#' @param label_sv Boolean argument to specify whether label SVs or not with green line on rainfall plot. 
+#' @param annotate_sv Boolean argument to specify whether to restrict SVs to those annotated with the annotate_sv function (i.e. relevant oncogenes). 
 #' @param seq_type Specify one of "genome" or "capture" when relying on the function to obtain mutations from a region (i.e. if you haven't provided a MAF or single sample_id)
 #'
 #' @return a ggplot2 plot. Print it using print() or save it using ggsave()
@@ -42,26 +43,27 @@ colour_aliases = list("COO_consensus" = "coo", "COO" = "coo", "DHITsig_consensus
 #'                    seq_type = "genome")
 #'
 prettyRainfallPlot = function(this_sample_id,
-                              label_ashm_genes = TRUE,
-                              projection = "grch37",
-                              chromosome,
-                              this_maf,
-                              maf_path,
-                              zoom_in_region,
-                              seq_type,
-                              label_sv = FALSE) {
+                               label_ashm_genes = TRUE,
+                               projection = "grch37",
+                               chromosome,
+                               this_maf,
+                               maf_path,
+                               zoom_in_region,
+                               seq_type,
+                               label_sv = FALSE,
+                               annotate_sv = TRUE) {
   if (missing(this_sample_id)) {
     warning("No sample_id was provided. Using all mutations in the MAF within your region!")
     if(missing(zoom_in_region)){
       stop("Must provide a zoom_in_region to plot when showing data from more than one patient")
     }
   }
-
+  
   # allow user to specify chromosome prefix inconsistent with chromosome names
   if (!missing(chromosome)) {
     chromosome = standardize_chr_prefix(incoming_vector = chromosome, projection = projection)
   }
-
+  
   # allow to zoom in to a specific region
   if (!missing(zoom_in_region)) {
     region = zoom_in_region
@@ -71,7 +73,7 @@ prettyRainfallPlot = function(this_sample_id,
     zoom_in_region$start = as.numeric(zoom_in_region$start)
     zoom_in_region$end = as.numeric(zoom_in_region$end)
   }
-
+  
   if (label_ashm_genes) {
     if (projection == "grch37") {
       ashm_regions = GAMBLR.data::grch37_ashm_regions %>%
@@ -104,7 +106,7 @@ prettyRainfallPlot = function(this_sample_id,
       group_by(gene) %>%
       slice_head() %>%
       ungroup()
-
+    
     # this will be needed for consistent labeling with rainfall plots
     ashm_regions = ashm_regions %>%
       arrange(match(
@@ -113,8 +115,8 @@ prettyRainfallPlot = function(this_sample_id,
       ))
     ashm_regions = ashm_regions %>%
       mutate(Chromosome_f = factor(Chromosome, levels = unique(ashm_regions$Chromosome)))
-  }
-
+  
+  
   # if user is subsetting by chromosome or zooming in to a specific region, it is possible there are no aSHM features to show
   # handle this case separately
   if (nrow(ashm_regions) == 0) {
@@ -123,6 +125,7 @@ prettyRainfallPlot = function(this_sample_id,
     )
     label_ashm_genes = FALSE
   }
+  }
 
   # get ssm for the requested sample
   if (!missing(this_maf)) {
@@ -136,7 +139,7 @@ prettyRainfallPlot = function(this_sample_id,
     }
   } else if (!missing (maf_path)) {
     message ("Path to custom MAF file was provided, reading SSM using the custom path ...")
-
+    
     this_maf = suppressMessages(read_tsv(maf_path))
     if(!missing(this_sample_id)){
       this_maf = this_maf %>% dplyr::filter(Tumor_Sample_Barcode %in% this_sample_id)
@@ -154,7 +157,7 @@ prettyRainfallPlot = function(this_sample_id,
     message(paste("Will use all mutations for",seq_type, "in this region:",zoom_in_region))
     these_ssm = get_ssm_by_region(region = region,seq_type = seq_type,projection=projection)
   }
-
+  
   # do rainfall calculation using lag
   rainfall_points = dplyr::select(
     these_ssm,
@@ -179,13 +182,13 @@ prettyRainfallPlot = function(this_sample_id,
       "InDel"
       )
     ) %>%
-    dplyr::mutate(IMD = log(IMD)) %>%
+    dplyr::mutate(IMD = log10(IMD)) %>%
     ungroup() %>%
     drop_na(IMD) # for the first point of each chromosome, NAs are produced generating a warning message
-
+  
   # collapse substitutions into classes
   rainfall_points$Substitution = rainfall_conv[as.character(rainfall_points$Substitution)]
-
+  
   # ensure order of grids in the plot is sorted
   rainfall_points = rainfall_points %>%
     arrange(match(
@@ -207,23 +210,21 @@ prettyRainfallPlot = function(this_sample_id,
       )
     )
   }
-
+  
   # if user is subsetting by chromosome or zooming in to a specific region, are there any SSM left to plot?
   if (nrow(rainfall_points) == 0) {
     stop("After subsetting to a regions you requested to plot, there are no SSM to display.")
   }
-
+  
   # label SVs if user wants to overlap this data
   if (!missing(chromosome) & label_sv) {
     sv_chromosome = chromosome
   } else if (!missing(zoom_in_region) & label_sv) {
     sv_chromosome = zoom_in_region$chromosome
   } else if (label_sv) {
-    stop(
-      "Labeling SV is only supported when a particular chromosome or zoomed region is plotted."
-    )
+    sv_chromosome = 1:22
   }
-
+  
   if (label_sv) {
     message("Getting combined manta + GRIDSS SVs using GAMBLR ...")
     these_sv = get_combined_sv(these_sample_ids  = this_sample_id)
@@ -232,11 +233,23 @@ prettyRainfallPlot = function(this_sample_id,
         rename("SOMATIC_SCORE" = "SCORE")
     }
     # annotate SV
-    these_sv = annotate_sv(these_sv)
-
+    if (annotate_sv){
+      these_sv = annotate_sv(these_sv, genome_build = projection)
+    } else {
+      these_sv = these_sv %>%
+        dplyr::rename(chrom1 = "CHROM_A",
+                      start1 = "START_A",
+                      end1 = "END_A",
+                      chrom2 = "CHROM_B",
+                      start2 = "START_B",
+                      end2 = "END_B") %>%
+        mutate(gene = 1:nrow(these_sv),
+               partner = letters[gene],
+               fusion = paste(gene, partner, sep="-"))
+    }
     # make SVs a long df with 1 record per SV corresponding to the strand
     sv_to_label =
-      melt(
+      reshape2::melt(
         these_sv %>% select(
           chrom1,
           start1,
@@ -263,12 +276,12 @@ prettyRainfallPlot = function(this_sample_id,
         value.name = "Chromosome"
       ) %>%
       dplyr::filter(Chromosome %in% sv_chromosome)
-
+    
     # are there any SVs on this chromosome/region?
     if (nrow(sv_to_label) > 0) {
       sv_to_label =
         sv_to_label %>%
-        melt(
+        reshape2::melt(
           .,
           id.vars = c(
             "tumour_sample_id",
@@ -296,7 +309,7 @@ prettyRainfallPlot = function(this_sample_id,
       )
       label_sv = FALSE
     }
-
+    
     # when we are plotting region and not whole chromosome, ensure SV is within that region
     if (!missing(zoom_in_region) & label_sv) {
       sv_to_label = dplyr::filter(
@@ -314,20 +327,25 @@ prettyRainfallPlot = function(this_sample_id,
         label_sv = FALSE
       }
     }
-
+    
     sv_to_label = sv_to_label %>%
       mutate(Chromosome_f = factor(Chromosome))
   }
-
-  p = ggplot(rainfall_points) +
-    geom_point(aes(x = Start_Position, y = IMD, color = Substitution)) +
+  
+   p = ggplot(rainfall_points, aes(x = Start_Position, y = IMD)) +
     scale_color_manual(values = get_gambl_colours("rainfall")) +
-    ylab("log(IMD)") +
+    ylab(expression(log[10](IMD))) +
     theme_Morons() +
-    facet_wrap( ~ Chromosome_f, scales = "free_x") +
+    facet_grid(. ~ Chromosome_f, scales = "free_x",  space = "free_x", switch="x") +
     ggtitle(this_sample_id) +
-    theme(plot.title = element_text(hjust = 0)) # left-align title plot
-
+    theme(plot.title = element_text(hjust = 0),  # left-align title plot
+          axis.title.x = element_blank(), axis.text.x = element_blank(), axis.text.y = element_text(size = 16, colour = "black"),
+          axis.ticks.x = element_blank(), axis.ticks.y = element_line(colour = "black"),
+          panel.spacing.x = unit(0.1, "lines"), panel.border = element_blank(), text = element_text(size = 16, colour = "black", family="sans"),
+          strip.background = element_blank(), 
+          strip.placement = "outside",
+          panel.grid = element_blank())
+  
   if (label_ashm_genes) {
     p = p +
       ggrepel::geom_text_repel(
@@ -344,7 +362,7 @@ prettyRainfallPlot = function(this_sample_id,
         segment.angle = 25
       )
   }
-
+  
   if (label_sv) {
     p = p +
       geom_vline(
@@ -352,20 +370,22 @@ prettyRainfallPlot = function(this_sample_id,
         aes(xintercept = Start_Position),
         color = "lightgreen",
         alpha = .7
-      ) +
-      geom_text(data = sv_to_label,
-                aes(End_Position, 15, label = fusion, color = "lightgreen"))
-  }
-
-  # show x-axis coordinates if zooming in to a specific region, but not if looking chromosome/genome-wide
-  if (missing(zoom_in_region)) {
-    p = p + guides(x = "none")
+      ) 
   }
+  
+  if(annotate_sv) {
+    max_val = max(rainfall_points$IMD)
+    p = p +
+      geom_text(data = sv_to_label,
+                aes(End_Position, max_val+1, label = fusion, color = "lightgreen"),
+                show.legend = FALSE)
+  }  
 
+  p = p + geom_point(inherit.aes=TRUE, aes(color = Substitution))
+  
   return(p)
 }
 
-
 gene_mutation_tally = function(maf_df,these_samples_metadata,these_genes,grouping_variable="cohort"){
   meta = dplyr::select(these_samples_metadata,sample_id,{{grouping_variable}})
   maf_filt = dplyr::filter(maf_df,Hugo_Symbol %in% these_genes, Variant_Classification %in% coding_class) %>%