test/data.yaml

# Directory for all output results and analysis reports
workdir: ./workspace_mouse_demo
# Directory for temporary files.
tempdir: ./workspace_mouse_demo/.tmp
# Max number of cores for all the running jobs at the same time
cores: 120

barcode: '-NNNNN'

# # BUILD-IN SETTINGS:
# # (At most of time, you do not need to change them.)
#
# # keep run mapping bam file and reference index files.
# # cache these files, so you do not need to re-run some analysis when you add more sequening reads
# keep_internal: false
# # keep dicarded reads (untrimmed, too-short, unmapped...), for debugging purpose.
# # believe me, you do not need these files. Set it as `false` to save storage.
# keep_discarded: false
#
# # cutoff for pre-filtering.
# cutoff:
#   ## processing reads
#   # STAR `outFilterMatchNminOverLread`
#   min_match_prop: 0.8
#   ## calling sites
#   # prefilter sites show >= x gaps in total among all samples in each group
#   min_group_gap: 5
#   # prefilter sites show >= x sequencing coverage in total among all samples in each group
#   min_group_depth: 10
#   # prefilter sites show >= x deletion ratio among all samples in each group
#   min_group_ratio: 0.01
#   # only analysis putative sites that show pass prefilter in >= x groups
#   min_group_num: 1

# NOTE: relative path of reference and sample data should relative to current config file, rather than workdir
reference:
  # Optional
  # skip contamination removal if this section is not provided
  contamination:
    fa: ./ref/contamination.fa
    #  optional. if bowtie2 index is not provided, the pipeline will auto generate a internal one.
    # bt2: ./ref/contamination
  # Required
  genes:
    fa: ./ref/genes.fa
    #  optional. if bowtie2 index is not provided, the pipeline will auto generate a internal one.
    # bt2: ./ref/genes
  # Required
  genome:
    fa: /data/reference/genome/Mus_musculus/GRCm39.fa
    star: /data/reference/genome/Mus_musculus/star/GRCm39.release108

samples:
  # sample name can be any string,
  # but it is a good habit to use letters and hyphen only, no space, underscore or other special symbol.
  mESCWT-rep1-input:
    data:
      - R1: ./data/IP16_run1.fastq.gz
    group: mESCWT
    treated: false
  mESCWT-rep2-input:
    data:
      - R1: ./data/IP17_run1.fastq.gz
    group: mESCWT
    treated: false
  mESCWT-rep3-input:
    data:
      - R1: ./data/IP18_run1.fastq.gz
    group: mESCWT
    treated: false
  mESCWT-rep1-treated:
    data:
      - R1: ./data/IP4_run1.fastq.gz
    group: mESCWT
    treated: true
  mESCWT-rep2-treated:
    data:
      - R1: ./data/IP5_run1.fastq.gz
    group: mESCWT
    treated: true
  mESCWT-rep3-treated:
    data:
      - R1: ./data/IP6_run1.fastq.gz
    group: mESCWT
    treated: true