From 4a81ce46d6fae60669d03cf8c185885fbe6c44c9 Mon Sep 17 00:00:00 2001
From: kubranarci <kbrnrc@gmail.com>
Date: Thu, 21 Dec 2023 14:06:00 +0100
Subject: [PATCH] small changes

---
 bin/correctGCBias.R                      |   3 +-
 bin/correctGCBias_functions.R            |   3 +-
 bin/estimateHRDScore.sh                  |  17 +---
 bin/manual_pruning.R                     |   2 +
 bin/python_modules/Options.pyc           | Bin 908 -> 890 bytes
 bin/python_modules/Tabfile.pyc           | Bin 2802 -> 2712 bytes
 bin/python_modules/__init__.pyc          | Bin 173 -> 164 bytes
 conf/test.config                         | 109 ++++++++++++++++++++---
 conf/test_full.config                    |  96 +++++++++++++++++---
 modules/local/annotate_cnv.nf            |   2 +-
 modules/local/cluster_segments.nf        |   8 +-
 modules/local/create_fake_samples.nf     |   2 +-
 modules/local/embed_haplotypes.nf        |   2 +-
 modules/local/fake_control.nf            |   2 +-
 modules/local/gc_bias.nf                 |   3 +-
 modules/local/group_haplotypes.nf        |   4 +-
 modules/local/merge_cnv.nf               |   2 +-
 modules/local/win_generator.nf           |   2 +-
 modules/nf-core/bcftools/mpileup/main.nf |   2 +-
 modules/nf-core/beagle/main.nf           |   2 +-
 modules/nf-core/samtools/mpileup/main.nf |   2 +-
 run_aceseq.sh                            |   2 +-
 smaldata.sh                              |   5 ++
 23 files changed, 215 insertions(+), 55 deletions(-)
 create mode 100644 smaldata.sh

diff --git a/bin/correctGCBias.R b/bin/correctGCBias.R
index 8a1789b..615d324 100755
--- a/bin/correctGCBias.R
+++ b/bin/correctGCBias.R
@@ -574,5 +574,4 @@ png(file=outputfile_rep, width=1000, height=1500, type='cairo')
 
 dev.off()
 
-}
-
+}
\ No newline at end of file
diff --git a/bin/correctGCBias_functions.R b/bin/correctGCBias_functions.R
index fc0e8b3..d9d3548 100755
--- a/bin/correctGCBias_functions.R
+++ b/bin/correctGCBias_functions.R
@@ -160,7 +160,8 @@ checkControl <- function(coverage, covIndex){
     #check for second Peak
     maxPeak <- which(dens$y==max(dens$y[zeroCrossings])) 
     secondPeak <- maximaInCross[ which( dens$y[maximaInCross] >= 0.1*dens$y[maxPeak] & dens$y[maximaInCross] != dens$y[maxPeak] )[1] ]
-    
+   
+    cat(paste0("maxPeak: ", maxPeak)) 
     if (  0.5*( round(2*dens$x[maxPeak])) != 1  | ( ! is.na( secondPeak ) ) ) {
       cat( paste(chr, "warning indicator for contaminated sample or sample swap!\n") )
       if( is.na(secondPeak) ){
diff --git a/bin/estimateHRDScore.sh b/bin/estimateHRDScore.sh
index 9d78817..12c5328 100755
--- a/bin/estimateHRDScore.sh
+++ b/bin/estimateHRDScore.sh
@@ -126,10 +126,6 @@ do
 		exit 2
 	fi
 
-	HRDFile=${pid}_HRDscore_${ploidyFactor}_${tcc}.txt
-	HRD_DETAILS_FILE=${pid}_HRDscore_contributingSegments_${ploidyFactor}_${tcc}.txt
-	LST_DETAILS_FILE=${pid}_LSTscore_contributingSegments_${ploidyFactor}_${tcc}.CentromerReduced.txt
-	MERGED_REDUCED_FILE=${pid}_comb_pro_extra${ploidyFactor}_${tcc}.smoothed.CentromerReduced.txt
 	echo "before hdr estimation"
 	HRD_estimation.R \
 		$combProFileNoArtifacts \
@@ -138,10 +134,10 @@ do
 		$ploidy \
 		$tcc \
 		$pid \
-		${HRDFile}.tmp \
-		${HRD_DETAILS_FILE}.tmp \
-		${LST_DETAILS_FILE}.tmp \
-		${MERGED_REDUCED_FILE}.tmp \
+		${pid}_HRDscore_${ploidyFactor}_${tcc}.txt \
+		${pid}_HRDscore_contributingSegments_${ploidyFactor}_${tcc}.txt \
+		${pid}_LSTscore_contributingSegments_${ploidyFactor}_${tcc}.CentromerReduced.txt \
+		${pid}_comb_pro_extra${ploidyFactor}_${tcc}.smoothed.CentromerReduced.txt \
 		${centromers} \
 		${cytobandsFile} \
 		.
@@ -162,11 +158,6 @@ do
 		exit 2
 	fi
 
-	mv ${HRDFile}.tmp ${HRDFile}
-	mv ${HRD_DETAILS_FILE}.tmp ${HRD_DETAILS_FILE}
-	mv ${LST_DETAILS_FILE}.tmp ${LST_DETAILS_FILE}
-	mv ${MERGED_REDUCED_FILE}.tmp ${MERGED_REDUCED_FILE}
-	rm ${combProFile}.tmp
 done
 if [[ "$?" != 0 ]]
 then
diff --git a/bin/manual_pruning.R b/bin/manual_pruning.R
index 93285fa..26c251a 100755
--- a/bin/manual_pruning.R
+++ b/bin/manual_pruning.R
@@ -759,6 +759,7 @@ test_new$maxStop =NA
 swapAlleles <- function(segments, data, chr, blockPre, blockPost){
 
   blockFile <- paste0( blockPre, chr, ".", blockPost)
+  cat(paste0("blockFile: ",blockFile, "\n\n"))
   blocks <- read.table( blockFile, header=F)
   colnames(blocks) <- c('chr', 'start', 'end', 'length')
   
@@ -896,6 +897,7 @@ for (chr in  seq_len(maxChr) ) {
 		if ( chr <= max(chromosomes) ) {
 		#adjust allele frequencies
 			dataAll[[chr]]$adjusted <- NA
+      cat(paste0("chr: ",chr, "\n\n"))
 			dataAll[[chr]] <- swapAlleles( test_new[selSeg,], dataAll[[chr]], chr, blockPre, blockSuf)
    
 			selRem <- which( dataAll[[chr]]$betaN > 0.3 & dataAll[[chr]]$betaN < 0.7 & is.na(dataAll[[chr]]$adjusted) )
diff --git a/bin/python_modules/Options.pyc b/bin/python_modules/Options.pyc
index a9ce6e3a358a8ce21b2136bb4cdd672c1e1e8163..8f4eb3f62301538edcd160d47463985aaa123616 100644
GIT binary patch
delta 60
zcmeBS|Ha16{F#@_>VDNm_Ch8`pUE=J8k~7)x{1lD#i@mp!<gkKUt;paB5no%Ibafr

delta 78
zcmeyx*2B)u{F#^Q?wOd4?1fB>p_40_G=z){<MZ;%^z+hm6O&VmQwwzsC$D14z$Ym`
IIhk1;0Q_YcQvd(}

diff --git a/bin/python_modules/Tabfile.pyc b/bin/python_modules/Tabfile.pyc
index b6fe525a76dc97f08da080c7357b4e789111115b..f04c1505652678287e090c05021ef994d0bdec87 100644
GIT binary patch
delta 258
zcmew)IzyD5`7<w<)%~iC>@iG?K9dDGG&u9pbQ6<Pi&G0HE3(N=-osRZO?-1NGdCj^
z3HiyNSiYc(Z&qZ}#im$p@^bb}bj4s>GdV)Ah|5nF=QPJ64ss>O<n^4cSmfj<`*E3K
L5to~Mhf53qgP}`M

delta 344
zcmbOs`bm_X`7<xq-7_&8*<+X(LnjMxY6uw_#^>di>F1^CCMKsArxxlOPIlx}o4lK;
z0G|T+%{9zijQCW@O}@tR6|aKL(ro;AjZmH3$ex7H2>Hn-9KrY$$W6Y^VS!IUHa<zY
Q$$vP_@F`H9+|DHk0I%<6&Hw-a

diff --git a/bin/python_modules/__init__.pyc b/bin/python_modules/__init__.pyc
index 01b37e20f5d853f5b274243f36540085f68cc31c..0e960e7192b9205ea29b2c53acf4ea6570d78934 100644
GIT binary patch
delta 33
pcmZ3>xP+0N`7<w<_5G@e>}HI<6YVrO^U`z^lT(XR3nx~X0syXY3dsNf

delta 42
ycmZ3&xR#Nf`7<xq-7_&0+07WkCfaET85zds<(KK_rRgRnrxvFc>KabWHw6GDh7IKa

diff --git a/conf/test.config b/conf/test.config
index 80c47a7..ce4e7c3 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -5,25 +5,112 @@
     Defines input files and everything required to run a fast and simple pipeline test.
 
     Use as follows:
-        nextflow run nf-core/aceseq -profile test,<docker/singularity> --outdir <OUTDIR>
+        nextflow run main.nf -profile test,singularity --outdir <OUTDIR>
 
 ----------------------------------------------------------------------------------------
 */
 
 params {
-    config_profile_name        = 'Test profile'
-    config_profile_description = 'Minimal test dataset to check pipeline function'
+    config_profile_contact     = 'Kübra Narcı kuebra.narci@dkfz-heidelberg.de'
+    config_profile_name        = 'Test profiler'
+    config_profile_description = 'Test dataset for to check pipeline function'
 
     // Limit resources so that this can run on GitHub Actions
-    max_cpus   = 2
-    max_memory = '6.GB'
-    max_time   = '6.h'
+    max_cpus   = 16
+    max_memory = '100.GB'
+    max_time   = '8.h'
 
     // Input data
-    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
-    // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input  = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
+    input  = 'testdata/samplesheet.csv'
+
+    // workflow parameters
+    outdir                     = "test_results"
+    estimatesex                = false
+    createbafplots             = false
+
+    minHT                      = 0
+
+    // correctGC options
+    lowess_f                   = 5
+    scale_factor               = 0
+    covplot_ylims              = 4
+    gc_bias_json_key           = "gc-bias"
+    minLim                     = 0.47
+    maxLim                     = 0.53
+    min_length_purity          = 1000000
+    min_hetSNPs_purity         = 0       // default 500 !!
+    dh_stop                    = "max"
+    min_length_dh_stop         = 1000000
+    dh_zero                    = "no"
+    purity_min                 = 0.3
+    purity_max                 = 1.0
+    ploidy_min                 = 1.0
+    ploidy_max                 = 6.5
+    local_minium_upper_boundary_shift = 0.1
+
+    // clusterAndPruneSegments
+    min_seg_length_prune       = 100
+    min_num_SNPs               = 1
+    clustering                 = "no"
+    min_cluster_number         = 1
+    min_membership             = 0.001
+    min_distance               = 0.0005
+
+    legacyMode                 = false
+
+     // Reference //
+    data_path                  = "/omics/odcf/reference_data/legacy/ngs_share/assemblies/hg19_GRCh37_1000genomes"
+    fasta                      = '/omics/odcf/reference_data/legacy/ngs_share/assemblies/hg19_GRCh37_1000genomes/sequence/1KGRef/hs37d5.fa'
+    fasta_fai                  = '/omics/odcf/reference_data/legacy/ngs_share/assemblies/hg19_GRCh37_1000genomes/sequence/1KGRef/hs37d5.fa.fai'
+    chrom_sizes                = '/omics/odcf/reference_data/legacy/ngs_share/assemblies/hg19_GRCh37_1000genomes/stats/hs37d5.fa.chrLenOnlyACGT_realChromosomes.tab'
+    chr_prefix                 = ""
+
+    // Beagle reference
+    beagle_reference           = "${params.data_path}/tools_data/Beagle"
+    beagle_genetic_map         = "${params.data_path}/tools_data/genetic_maps"
+    beagle_ref_ext             = "bref3" // vcf | bref | bref3
+    beagle_map_ext             = "map"
+
+    // Annotation files
+    dbsnp_snv                  = "${params.data_path}/databases/dbSNP/dbSNP_135/00-All.SNV.vcf.gz"
+    mapability_file            = "${params.data_path}/databases/UCSC/wgEncodeCrgMapabilityAlign100mer_chr.bedGraph.gz"
+    replication_time_file      = "${params.data_path}/databases/ENCODE/ReplicationTime_10cellines_mean_10KB.Rda"
+    gc_content_file            = "${params.data_path}/stats/hg19_GRch37_100genomes_gc_content_10kb.txt"
+    gene_annotation_file       = "${params.data_path}/tools_data/ACEseq/INFORM_druggable_genes.csv"
+
+    // get breakpoints/ PSCBS gaps
+    centromer_file             = "${params.data_path}/stats/hg19_gaps.txt"
+
+    // HDR estimation
+    blacklist_file             = "assets/artifact.homoDels.potentialArtifacts.txt"
+    cytobands_file             = "assets/hg19_cytoBand.txt"
+ }
+
+// Perform work directory cleanup when the run has succesfully completed
+ cleanup = true
+
+// Reduce the job submit rate to about 5 per second, this way the server won't be bombarded with jobs
+
+singularity {
+  enabled = true
+  cacheDir = "/omics/groups/OE0608/internal/kubran/singularity"
+  autoMounts = true
+  runOptions = "-B /omics/groups -B /omics/odcf/analysis -B /omics/odcf/project -B /omics/odcf/reference_data"
+}
+env {
+    SINGULARITY_CACHEDIR="/omics/groups/OE0608/internal/kubran/singularity"
+    SINGULARITY_LIBRARYDIR="/omics/groups/OE0608/internal/kubran/singularity/library"
+}
+
+process {
+  executor = 'lsf'
+  scratch = '$SCRATCHDIR/$LSB_JOBID'
 
-    // Genome references
-    genome = 'R64-1-1'
 }
+executor {
+  name = 'lsf'
+  perTaskReserve = false
+  perJobMemLimit = true
+  submitRateLimit = '30 sec'
+  queueSize=50
+  }
diff --git a/conf/test_full.config b/conf/test_full.config
index 8450d95..fe5265f 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -1,24 +1,98 @@
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Nextflow config file for running full-size tests
+    Nextflow config file for running minimal tests
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Defines input files and everything required to run a full size pipeline test.
+    Defines input files and everything required to run a fast and simple pipeline test.
 
     Use as follows:
-        nextflow run nf-core/aceseq -profile test_full,<docker/singularity> --outdir <OUTDIR>
+        nextflow run main.nf -profile test_full,singularity --outdir <OUTDIR>
 
 ----------------------------------------------------------------------------------------
 */
 
 params {
-    config_profile_name        = 'Full test profile'
-    config_profile_description = 'Full test dataset to check pipeline function'
+    config_profile_contact     = 'Kübra Narcı kuebra.narci@dkfz-heidelberg.de'
+    config_profile_name        = 'Test profiler'
+    config_profile_description = 'Test dataset for to check pipeline function'
 
-    // Input data for full size test
-    // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
-    // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 16
+    max_memory = '100.GB'
+    max_time   = '8.h'
+
+    // Input data
+    input  = 'assets/samplesheet_37_full.csv'
+
+    // workflow parameters
+    outdir                     = "test_full"
+
+    // correctGC options
+    minLim                     = 0.47
+    maxLim                     = 0.53
+    min_length_purity          = 1000000
+    min_hetSNPs_purity         = 0       // default 500 !!
+    dh_stop                    = "max"
+    min_length_dh_stop         = 1000000
+    dh_zero                    = "no"
+    purity_min                 = 0.3
+    purity_max                 = 1.0
+    ploidy_min                 = 1.0
+    ploidy_max                 = 6.5
+    local_minium_upper_boundary_shift = 0.1
+
+     // Reference //
+    data_path                  = "/omics/odcf/reference_data/legacy/ngs_share/assemblies/hg19_GRCh37_1000genomes"
+    fasta                      = '/omics/odcf/reference_data/legacy/ngs_share/assemblies/hg19_GRCh37_1000genomes/sequence/1KGRef/hs37d5.fa'
+    fasta_fai                  = '/omics/odcf/reference_data/legacy/ngs_share/assemblies/hg19_GRCh37_1000genomes/sequence/1KGRef/hs37d5.fa.fai'
+    chrom_sizes                = '/omics/odcf/reference_data/legacy/ngs_share/assemblies/hg19_GRCh37_1000genomes/stats/hs37d5.fa.chrLenOnlyACGT_realChromosomes.tab'
+    chr_prefix                 = ""
+
+    // Beagle reference
+    beagle_reference           = "${params.data_path}/tools_data/Beagle"
+    beagle_genetic_map         = "${params.data_path}/tools_data/genetic_maps"
+    beagle_ref_ext             = "bref3" // vcf | bref | bref3
+    beagle_map_ext             = "map"
+
+    // Annotation files
+    dbsnp_snv                  = "${params.data_path}/databases/dbSNP/dbSNP_135/00-All.SNV.vcf.gz"
+    mapability_file            = "${params.data_path}/databases/UCSC/wgEncodeCrgMapabilityAlign100mer_chr.bedGraph.gz"
+    replication_time_file      = "${params.data_path}/databases/ENCODE/ReplicationTime_10cellines_mean_10KB.Rda"
+    gc_content_file            = "${params.data_path}/stats/hg19_GRch37_100genomes_gc_content_10kb.txt"
+    gene_annotation_file       = "${params.data_path}/tools_data/ACEseq/INFORM_druggable_genes.csv"
+
+    // get breakpoints/ PSCBS gaps
+    centromer_file             = "${params.data_path}/stats/hg19_gaps.txt"
+
+    // HDR estimation
+    blacklist_file             = "assets/artifact.homoDels.potentialArtifacts.txt"
+    cytobands_file             = "assets/hg19_cytoBand.txt"
+ }
+
+// Perform work directory cleanup when the run has succesfully completed
+ cleanup = true
+
+// Reduce the job submit rate to about 5 per second, this way the server won't be bombarded with jobs
+
+singularity {
+  enabled = true
+  cacheDir = "/omics/groups/OE0608/internal/kubran/singularity"
+  autoMounts = true
+  runOptions = "-B /omics/groups -B /omics/odcf/analysis -B /omics/odcf/project -B /omics/odcf/reference_data"
+}
+env {
+    SINGULARITY_CACHEDIR="/omics/groups/OE0608/internal/kubran/singularity"
+    SINGULARITY_LIBRARYDIR="/omics/groups/OE0608/internal/kubran/singularity/library"
+}
+
+process {
+  executor = 'lsf'
+  scratch = '$SCRATCHDIR/$LSB_JOBID'
 
-    // Genome references
-    genome = 'R64-1-1'
 }
+executor {
+  name = 'lsf'
+  perTaskReserve = false
+  perJobMemLimit = true
+  submitRateLimit = '30 sec'
+  queueSize=50
+  }
diff --git a/modules/local/annotate_cnv.nf b/modules/local/annotate_cnv.nf
index 9f919ba..c74b08c 100644
--- a/modules/local/annotate_cnv.nf
+++ b/modules/local/annotate_cnv.nf
@@ -1,6 +1,6 @@
 // This needs to run per cnv.tab.gz !
 process ANNOTATE_CNV {
-    tag "$meta.id"
+    tag "$meta.id chr$intervals"
     label 'process_high_cpu_low_memory'
 
     conda     (params.enable_conda ? "" : null)
diff --git a/modules/local/cluster_segments.nf b/modules/local/cluster_segments.nf
index d93a864..6f26256 100644
--- a/modules/local/cluster_segments.nf
+++ b/modules/local/cluster_segments.nf
@@ -12,10 +12,10 @@ process CLUSTER_SEGMENTS {
     val(chr_prefix)
 
     output:
-    tuple val(meta), path('*normal.txt')                                      , emit: clustered_segments   
-    tuple val(meta), path('*all_seg_2.txt.gz'), path('*all_seg_2.txt.gz.tbi') , emit: snp_update2
-    tuple val(meta), path('*.pdf')
-    path  "versions.yml"                                                       , emit: versions
+    tuple val(meta), path('*normal.txt')                              , emit: clustered_segments   
+    tuple val(meta), path('*seg_2.txt.gz'), path('*seg_2.txt.gz.tbi') , emit: snp_update2
+    tuple val(meta), path('*.pdf')                                    , optional: true 
+    path  "versions.yml"                                              , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
diff --git a/modules/local/create_fake_samples.nf b/modules/local/create_fake_samples.nf
index c2f9620..3f52146 100644
--- a/modules/local/create_fake_samples.nf
+++ b/modules/local/create_fake_samples.nf
@@ -1,5 +1,5 @@
 process CREATE_FAKE_SAMPLES {
-    tag "$meta.id"
+    tag "$meta.id chr$intervals"
     label 'process_low'
 
     conda (params.enable_conda ? "" : null)
diff --git a/modules/local/embed_haplotypes.nf b/modules/local/embed_haplotypes.nf
index 0cb49b9..9b80f9b 100644
--- a/modules/local/embed_haplotypes.nf
+++ b/modules/local/embed_haplotypes.nf
@@ -1,5 +1,5 @@
 process EMBED_HAPLOTYPES {
-    tag "$meta.id"
+    tag "$meta.id chr$intervals"
     label 'process_high_cpu_low_memory'
 
     conda (params.enable_conda ? "" : null)
diff --git a/modules/local/fake_control.nf b/modules/local/fake_control.nf
index dd7a48d..13fc406 100644
--- a/modules/local/fake_control.nf
+++ b/modules/local/fake_control.nf
@@ -1,5 +1,5 @@
 process FAKE_CONTROL {
-    tag "$meta.id"
+    tag "$meta.id chr$intervals"
     label 'process_single'
 
     conda     (params.enable_conda ? "" : null)
diff --git a/modules/local/gc_bias.nf b/modules/local/gc_bias.nf
index d59e9e9..4ba2b47 100644
--- a/modules/local/gc_bias.nf
+++ b/modules/local/gc_bias.nf
@@ -1,10 +1,11 @@
+//only works with v0 !
 process GC_BIAS {
     tag "$meta.id"
     label 'process_single'
 
     conda     (params.enable_conda ? "" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'docker://kubran/odcf_aceseqcalling:v5':'kubran/odcf_aceseqcalling:v5' }"
+        'docker://kubran/odcf_aceseqcalling:v0':'kubran/odcf_aceseqcalling:v0' }"
     
     input:
     tuple val(meta), path(cnv_pos)
diff --git a/modules/local/group_haplotypes.nf b/modules/local/group_haplotypes.nf
index 062ea11..30eaab7 100644
--- a/modules/local/group_haplotypes.nf
+++ b/modules/local/group_haplotypes.nf
@@ -1,6 +1,6 @@
 process GROUP_HAPLOTYPES {
-    tag "$meta.id"
-    label 'process_single'
+    tag "$meta.id chr$intervals"
+    label 'process_low'
 
     conda (params.enable_conda ? "" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
diff --git a/modules/local/merge_cnv.nf b/modules/local/merge_cnv.nf
index e8d35e9..4ddc851 100644
--- a/modules/local/merge_cnv.nf
+++ b/modules/local/merge_cnv.nf
@@ -7,7 +7,7 @@ process MERGE_CNV {
         'docker://kubran/odcf_aceseqcalling:v5':'kubran/odcf_aceseqcalling:v5' }"
 
     input:
-    tuple val(meta)        , path(cnv)
+    tuple val(meta) , path(cnv)
     val(chr_prefix)
 
     output:
diff --git a/modules/local/win_generator.nf b/modules/local/win_generator.nf
index a36db69..595748f 100644
--- a/modules/local/win_generator.nf
+++ b/modules/local/win_generator.nf
@@ -1,5 +1,5 @@
 process WIN_GENERATOR {
-    tag "$meta.id"
+    tag "$meta.id chr$intervals" 
     label 'process_low'
 
     conda (params.enable_conda ? "" : null)
diff --git a/modules/nf-core/bcftools/mpileup/main.nf b/modules/nf-core/bcftools/mpileup/main.nf
index 07e9fa9..ec3dc12 100644
--- a/modules/nf-core/bcftools/mpileup/main.nf
+++ b/modules/nf-core/bcftools/mpileup/main.nf
@@ -1,5 +1,5 @@
 process BCFTOOLS_MPILEUP {
-    tag "$meta.id"
+    tag "$meta.id chr$intervals"
     label 'process_medium'
 
     conda (params.enable_conda ? "bioconda::bcftools=1.9" : null)
diff --git a/modules/nf-core/beagle/main.nf b/modules/nf-core/beagle/main.nf
index f32d72a..d0e357a 100644
--- a/modules/nf-core/beagle/main.nf
+++ b/modules/nf-core/beagle/main.nf
@@ -1,5 +1,5 @@
 process BEAGLE5_BEAGLE {
-    tag "$meta.id"
+    tag "$meta.id chr$intervals"
     label 'process_high'
 
     conda "bioconda::beagle=5.2_21Apr21.304"
diff --git a/modules/nf-core/samtools/mpileup/main.nf b/modules/nf-core/samtools/mpileup/main.nf
index 1f185a0..d10ef55 100644
--- a/modules/nf-core/samtools/mpileup/main.nf
+++ b/modules/nf-core/samtools/mpileup/main.nf
@@ -1,5 +1,5 @@
 process SAMTOOLS_MPILEUP {
-    tag "$meta.id"
+    tag "$meta.id chr$intervals"
     label 'process_medium'
 
     conda "bioconda::samtools=1.9"
diff --git a/run_aceseq.sh b/run_aceseq.sh
index 9cd9a30..9ce4fa3 100644
--- a/run_aceseq.sh
+++ b/run_aceseq.sh
@@ -1,3 +1,3 @@
 #!/bin/bash
 module load  nextflow/22.10.7
-nextflow run main.nf -profile dkfz_cluster_hg38,singularity --input assets/samplesheet_37_full.csv
\ No newline at end of file
+nextflow run main.nf -profile test,singularity
\ No newline at end of file
diff --git a/smaldata.sh b/smaldata.sh
new file mode 100644
index 0000000..e149b01
--- /dev/null
+++ b/smaldata.sh
@@ -0,0 +1,5 @@
+module load samtools
+samtools view --threads 4 -b /omics/odcf/analysis/OE0526_projects/public_data_analyses/seqc2/sequencing/exon_sequencing/results_per_pid/SEQC2_LL1/alignment_hg37/tumor01_SEQC2_LL1_merged.mdup.bam -L seqc2_testdata_aceseq/seq2_testdata.bed > seqc2_testdata_aceseq/SEQC2_LL1_T_small.bam
+samtools view --threads 4 -b /omics/odcf/analysis/OE0526_projects/public_data_analyses/seqc2/sequencing/exon_sequencing/results_per_pid/SEQC2_LL1/alignment_hg37/control01_SEQC2_LL1_merged.mdup.bam -L seqc2_testdata_aceseq/seq2_testdata.bed > seqc2_testdata_aceseq/SEQC2_LL1_C_small.bam
+samtools index -b seqc2_testdata_aceseq/SEQC2_LL1_T_small.bam
+samtools index -b seqc2_testdata_aceseq/SEQC2_LL1_C_small.bam
\ No newline at end of file