Skip to content

Commit

Permalink
small changes
Browse files Browse the repository at this point in the history
  • Loading branch information
kubranarci committed Dec 21, 2023
1 parent 84b8baa commit 4a81ce4
Show file tree
Hide file tree
Showing 23 changed files with 215 additions and 55 deletions.
3 changes: 1 addition & 2 deletions bin/correctGCBias.R
Original file line number Diff line number Diff line change
Expand Up @@ -574,5 +574,4 @@ png(file=outputfile_rep, width=1000, height=1500, type='cairo')

dev.off()

}

}
3 changes: 2 additions & 1 deletion bin/correctGCBias_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ checkControl <- function(coverage, covIndex){
#check for second Peak
maxPeak <- which(dens$y==max(dens$y[zeroCrossings]))
secondPeak <- maximaInCross[ which( dens$y[maximaInCross] >= 0.1*dens$y[maxPeak] & dens$y[maximaInCross] != dens$y[maxPeak] )[1] ]


cat(paste0("maxPeak: ", maxPeak))
if ( 0.5*( round(2*dens$x[maxPeak])) != 1 | ( ! is.na( secondPeak ) ) ) {
cat( paste(chr, "warning indicator for contaminated sample or sample swap!\n") )
if( is.na(secondPeak) ){
Expand Down
17 changes: 4 additions & 13 deletions bin/estimateHRDScore.sh
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,6 @@ do
exit 2
fi

HRDFile=${pid}_HRDscore_${ploidyFactor}_${tcc}.txt
HRD_DETAILS_FILE=${pid}_HRDscore_contributingSegments_${ploidyFactor}_${tcc}.txt
LST_DETAILS_FILE=${pid}_LSTscore_contributingSegments_${ploidyFactor}_${tcc}.CentromerReduced.txt
MERGED_REDUCED_FILE=${pid}_comb_pro_extra${ploidyFactor}_${tcc}.smoothed.CentromerReduced.txt
echo "before hdr estimation"
HRD_estimation.R \
$combProFileNoArtifacts \
Expand All @@ -138,10 +134,10 @@ do
$ploidy \
$tcc \
$pid \
${HRDFile}.tmp \
${HRD_DETAILS_FILE}.tmp \
${LST_DETAILS_FILE}.tmp \
${MERGED_REDUCED_FILE}.tmp \
${pid}_HRDscore_${ploidyFactor}_${tcc}.txt \
${pid}_HRDscore_contributingSegments_${ploidyFactor}_${tcc}.txt \
${pid}_LSTscore_contributingSegments_${ploidyFactor}_${tcc}.CentromerReduced.txt \
${pid}_comb_pro_extra${ploidyFactor}_${tcc}.smoothed.CentromerReduced.txt \
${centromers} \
${cytobandsFile} \
.
Expand All @@ -162,11 +158,6 @@ do
exit 2
fi

mv ${HRDFile}.tmp ${HRDFile}
mv ${HRD_DETAILS_FILE}.tmp ${HRD_DETAILS_FILE}
mv ${LST_DETAILS_FILE}.tmp ${LST_DETAILS_FILE}
mv ${MERGED_REDUCED_FILE}.tmp ${MERGED_REDUCED_FILE}
rm ${combProFile}.tmp
done
if [[ "$?" != 0 ]]
then
Expand Down
2 changes: 2 additions & 0 deletions bin/manual_pruning.R
Original file line number Diff line number Diff line change
Expand Up @@ -759,6 +759,7 @@ test_new$maxStop =NA
swapAlleles <- function(segments, data, chr, blockPre, blockPost){

blockFile <- paste0( blockPre, chr, ".", blockPost)
cat(paste0("blockFile: ",blockFile, "\n\n"))
blocks <- read.table( blockFile, header=F)
colnames(blocks) <- c('chr', 'start', 'end', 'length')

Expand Down Expand Up @@ -896,6 +897,7 @@ for (chr in seq_len(maxChr) ) {
if ( chr <= max(chromosomes) ) {
#adjust allele frequencies
dataAll[[chr]]$adjusted <- NA
cat(paste0("chr: ",chr, "\n\n"))
dataAll[[chr]] <- swapAlleles( test_new[selSeg,], dataAll[[chr]], chr, blockPre, blockSuf)

selRem <- which( dataAll[[chr]]$betaN > 0.3 & dataAll[[chr]]$betaN < 0.7 & is.na(dataAll[[chr]]$adjusted) )
Expand Down
Binary file modified bin/python_modules/Options.pyc
Binary file not shown.
Binary file modified bin/python_modules/Tabfile.pyc
Binary file not shown.
Binary file modified bin/python_modules/__init__.pyc
Binary file not shown.
109 changes: 98 additions & 11 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,112 @@
Defines input files and everything required to run a fast and simple pipeline test.

Use as follows:
nextflow run nf-core/aceseq -profile test,<docker/singularity> --outdir <OUTDIR>
nextflow run main.nf -profile test,singularity --outdir <OUTDIR>

----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'
config_profile_contact = 'Kübra Narcı kuebra.narci@dkfz-heidelberg.de'
config_profile_name = 'Test profiler'
config_profile_description = 'Test dataset for to check pipeline function'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'
max_cpus = 16
max_memory = '100.GB'
max_time = '8.h'

// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
input = 'testdata/samplesheet.csv'

// workflow parameters
outdir = "test_results"
estimatesex = false
createbafplots = false

minHT = 0

// correctGC options
lowess_f = 5
scale_factor = 0
covplot_ylims = 4
gc_bias_json_key = "gc-bias"
minLim = 0.47
maxLim = 0.53
min_length_purity = 1000000
min_hetSNPs_purity = 0 // default 500 !!
dh_stop = "max"
min_length_dh_stop = 1000000
dh_zero = "no"
purity_min = 0.3
purity_max = 1.0
ploidy_min = 1.0
ploidy_max = 6.5
local_minium_upper_boundary_shift = 0.1

// clusterAndPruneSegments
min_seg_length_prune = 100
min_num_SNPs = 1
clustering = "no"
min_cluster_number = 1
min_membership = 0.001
min_distance = 0.0005

legacyMode = false

// Reference //
data_path = "/omics/odcf/reference_data/legacy/ngs_share/assemblies/hg19_GRCh37_1000genomes"
fasta = '/omics/odcf/reference_data/legacy/ngs_share/assemblies/hg19_GRCh37_1000genomes/sequence/1KGRef/hs37d5.fa'
fasta_fai = '/omics/odcf/reference_data/legacy/ngs_share/assemblies/hg19_GRCh37_1000genomes/sequence/1KGRef/hs37d5.fa.fai'
chrom_sizes = '/omics/odcf/reference_data/legacy/ngs_share/assemblies/hg19_GRCh37_1000genomes/stats/hs37d5.fa.chrLenOnlyACGT_realChromosomes.tab'
chr_prefix = ""

// Beagle reference
beagle_reference = "${params.data_path}/tools_data/Beagle"
beagle_genetic_map = "${params.data_path}/tools_data/genetic_maps"
beagle_ref_ext = "bref3" // vcf | bref | bref3
beagle_map_ext = "map"

// Annotation files
dbsnp_snv = "${params.data_path}/databases/dbSNP/dbSNP_135/00-All.SNV.vcf.gz"
mapability_file = "${params.data_path}/databases/UCSC/wgEncodeCrgMapabilityAlign100mer_chr.bedGraph.gz"
replication_time_file = "${params.data_path}/databases/ENCODE/ReplicationTime_10cellines_mean_10KB.Rda"
gc_content_file = "${params.data_path}/stats/hg19_GRch37_100genomes_gc_content_10kb.txt"
gene_annotation_file = "${params.data_path}/tools_data/ACEseq/INFORM_druggable_genes.csv"

// get breakpoints/ PSCBS gaps
centromer_file = "${params.data_path}/stats/hg19_gaps.txt"

// HDR estimation
blacklist_file = "assets/artifact.homoDels.potentialArtifacts.txt"
cytobands_file = "assets/hg19_cytoBand.txt"
}

// Perform work directory cleanup when the run has succesfully completed
cleanup = true

// Reduce the job submit rate to about 5 per second, this way the server won't be bombarded with jobs

singularity {
enabled = true
cacheDir = "/omics/groups/OE0608/internal/kubran/singularity"
autoMounts = true
runOptions = "-B /omics/groups -B /omics/odcf/analysis -B /omics/odcf/project -B /omics/odcf/reference_data"
}
env {
SINGULARITY_CACHEDIR="/omics/groups/OE0608/internal/kubran/singularity"
SINGULARITY_LIBRARYDIR="/omics/groups/OE0608/internal/kubran/singularity/library"
}

process {
executor = 'lsf'
scratch = '$SCRATCHDIR/$LSB_JOBID'

// Genome references
genome = 'R64-1-1'
}
executor {
name = 'lsf'
perTaskReserve = false
perJobMemLimit = true
submitRateLimit = '30 sec'
queueSize=50
}
96 changes: 85 additions & 11 deletions conf/test_full.config
Original file line number Diff line number Diff line change
@@ -1,24 +1,98 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running full-size tests
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a full size pipeline test.
Defines input files and everything required to run a fast and simple pipeline test.

Use as follows:
nextflow run nf-core/aceseq -profile test_full,<docker/singularity> --outdir <OUTDIR>
nextflow run main.nf -profile test_full,singularity --outdir <OUTDIR>

----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Full test profile'
config_profile_description = 'Full test dataset to check pipeline function'
config_profile_contact = 'Kübra Narcı kuebra.narci@dkfz-heidelberg.de'
config_profile_name = 'Test profiler'
config_profile_description = 'Test dataset for to check pipeline function'

// Input data for full size test
// TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
// Limit resources so that this can run on GitHub Actions
max_cpus = 16
max_memory = '100.GB'
max_time = '8.h'

// Input data
input = 'assets/samplesheet_37_full.csv'

// workflow parameters
outdir = "test_full"

// correctGC options
minLim = 0.47
maxLim = 0.53
min_length_purity = 1000000
min_hetSNPs_purity = 0 // default 500 !!
dh_stop = "max"
min_length_dh_stop = 1000000
dh_zero = "no"
purity_min = 0.3
purity_max = 1.0
ploidy_min = 1.0
ploidy_max = 6.5
local_minium_upper_boundary_shift = 0.1

// Reference //
data_path = "/omics/odcf/reference_data/legacy/ngs_share/assemblies/hg19_GRCh37_1000genomes"
fasta = '/omics/odcf/reference_data/legacy/ngs_share/assemblies/hg19_GRCh37_1000genomes/sequence/1KGRef/hs37d5.fa'
fasta_fai = '/omics/odcf/reference_data/legacy/ngs_share/assemblies/hg19_GRCh37_1000genomes/sequence/1KGRef/hs37d5.fa.fai'
chrom_sizes = '/omics/odcf/reference_data/legacy/ngs_share/assemblies/hg19_GRCh37_1000genomes/stats/hs37d5.fa.chrLenOnlyACGT_realChromosomes.tab'
chr_prefix = ""

// Beagle reference
beagle_reference = "${params.data_path}/tools_data/Beagle"
beagle_genetic_map = "${params.data_path}/tools_data/genetic_maps"
beagle_ref_ext = "bref3" // vcf | bref | bref3
beagle_map_ext = "map"

// Annotation files
dbsnp_snv = "${params.data_path}/databases/dbSNP/dbSNP_135/00-All.SNV.vcf.gz"
mapability_file = "${params.data_path}/databases/UCSC/wgEncodeCrgMapabilityAlign100mer_chr.bedGraph.gz"
replication_time_file = "${params.data_path}/databases/ENCODE/ReplicationTime_10cellines_mean_10KB.Rda"
gc_content_file = "${params.data_path}/stats/hg19_GRch37_100genomes_gc_content_10kb.txt"
gene_annotation_file = "${params.data_path}/tools_data/ACEseq/INFORM_druggable_genes.csv"

// get breakpoints/ PSCBS gaps
centromer_file = "${params.data_path}/stats/hg19_gaps.txt"

// HDR estimation
blacklist_file = "assets/artifact.homoDels.potentialArtifacts.txt"
cytobands_file = "assets/hg19_cytoBand.txt"
}

// Perform work directory cleanup when the run has succesfully completed
cleanup = true

// Reduce the job submit rate to about 5 per second, this way the server won't be bombarded with jobs

singularity {
enabled = true
cacheDir = "/omics/groups/OE0608/internal/kubran/singularity"
autoMounts = true
runOptions = "-B /omics/groups -B /omics/odcf/analysis -B /omics/odcf/project -B /omics/odcf/reference_data"
}
env {
SINGULARITY_CACHEDIR="/omics/groups/OE0608/internal/kubran/singularity"
SINGULARITY_LIBRARYDIR="/omics/groups/OE0608/internal/kubran/singularity/library"
}

process {
executor = 'lsf'
scratch = '$SCRATCHDIR/$LSB_JOBID'

// Genome references
genome = 'R64-1-1'
}
executor {
name = 'lsf'
perTaskReserve = false
perJobMemLimit = true
submitRateLimit = '30 sec'
queueSize=50
}
2 changes: 1 addition & 1 deletion modules/local/annotate_cnv.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// This needs to run per cnv.tab.gz !
process ANNOTATE_CNV {
tag "$meta.id"
tag "$meta.id chr$intervals"
label 'process_high_cpu_low_memory'

conda (params.enable_conda ? "" : null)
Expand Down
8 changes: 4 additions & 4 deletions modules/local/cluster_segments.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ process CLUSTER_SEGMENTS {
val(chr_prefix)

output:
tuple val(meta), path('*normal.txt') , emit: clustered_segments
tuple val(meta), path('*all_seg_2.txt.gz'), path('*all_seg_2.txt.gz.tbi') , emit: snp_update2
tuple val(meta), path('*.pdf')
path "versions.yml" , emit: versions
tuple val(meta), path('*normal.txt') , emit: clustered_segments
tuple val(meta), path('*seg_2.txt.gz'), path('*seg_2.txt.gz.tbi') , emit: snp_update2
tuple val(meta), path('*.pdf') , optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
2 changes: 1 addition & 1 deletion modules/local/create_fake_samples.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process CREATE_FAKE_SAMPLES {
tag "$meta.id"
tag "$meta.id chr$intervals"
label 'process_low'

conda (params.enable_conda ? "" : null)
Expand Down
2 changes: 1 addition & 1 deletion modules/local/embed_haplotypes.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process EMBED_HAPLOTYPES {
tag "$meta.id"
tag "$meta.id chr$intervals"
label 'process_high_cpu_low_memory'

conda (params.enable_conda ? "" : null)
Expand Down
2 changes: 1 addition & 1 deletion modules/local/fake_control.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process FAKE_CONTROL {
tag "$meta.id"
tag "$meta.id chr$intervals"
label 'process_single'

conda (params.enable_conda ? "" : null)
Expand Down
3 changes: 2 additions & 1 deletion modules/local/gc_bias.nf
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
//only works with v0 !
process GC_BIAS {
tag "$meta.id"
label 'process_single'

conda (params.enable_conda ? "" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'docker://kubran/odcf_aceseqcalling:v5':'kubran/odcf_aceseqcalling:v5' }"
'docker://kubran/odcf_aceseqcalling:v0':'kubran/odcf_aceseqcalling:v0' }"

input:
tuple val(meta), path(cnv_pos)
Expand Down
4 changes: 2 additions & 2 deletions modules/local/group_haplotypes.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process GROUP_HAPLOTYPES {
tag "$meta.id"
label 'process_single'
tag "$meta.id chr$intervals"
label 'process_low'

conda (params.enable_conda ? "" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
Expand Down
2 changes: 1 addition & 1 deletion modules/local/merge_cnv.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ process MERGE_CNV {
'docker://kubran/odcf_aceseqcalling:v5':'kubran/odcf_aceseqcalling:v5' }"

input:
tuple val(meta) , path(cnv)
tuple val(meta) , path(cnv)
val(chr_prefix)

output:
Expand Down
2 changes: 1 addition & 1 deletion modules/local/win_generator.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process WIN_GENERATOR {
tag "$meta.id"
tag "$meta.id chr$intervals"
label 'process_low'

conda (params.enable_conda ? "" : null)
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/bcftools/mpileup/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process BCFTOOLS_MPILEUP {
tag "$meta.id"
tag "$meta.id chr$intervals"
label 'process_medium'

conda (params.enable_conda ? "bioconda::bcftools=1.9" : null)
Expand Down
Loading

0 comments on commit 4a81ce4

Please sign in to comment.