-
Notifications
You must be signed in to change notification settings - Fork 4
/
data.yaml
85 lines (81 loc) · 2.68 KB
/
data.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# Directory for all output results and analysis reports
workdir: ./workspace_mouse_demo
# Directory for temporary files.
tempdir: ./workspace_mouse_demo/.tmp
# Max number of cores for all the running jobs at the same time
cores: 120
barcode: '-NNNNN'
# # BUILD-IN SETTINGS:
# # (At most of time, you do not need to change them.)
#
# # keep run mapping bam file and reference index files.
# # cache these files, so you do not need to re-run some analysis when you add more sequening reads
# keep_internal: false
# # keep dicarded reads (untrimmed, too-short, unmapped...), for debugging purpose.
# # believe me, you do not need these files. Set it as `false` to save storage.
# keep_discarded: false
#
# # cutoff for pre-filtering.
# cutoff:
# ## processing reads
# # STAR `outFilterMatchNminOverLread`
# min_match_prop: 0.8
# ## calling sites
# # prefilter sites show >= x gaps in total among all samples in each group
# min_group_gap: 5
# # prefilter sites show >= x sequencing coverage in total among all samples in each group
# min_group_depth: 10
# # prefilter sites show >= x deletion ratio among all samples in each group
# min_group_ratio: 0.01
# # only analysis putative sites that show pass prefilter in >= x groups
# min_group_num: 1
# NOTE: relative path of reference and sample data should relative to current config file, rather than workdir
reference:
# Optional
# skip contamination removal if this section is not provided
contamination:
fa: ./ref/contamination.fa
# optional. if bowtie2 index is not provided, the pipeline will auto generate a internal one.
# bt2: ./ref/contamination
# Required
genes:
fa: ./ref/genes.fa
# optional. if bowtie2 index is not provided, the pipeline will auto generate a internal one.
# bt2: ./ref/genes
# Required
genome:
fa: /data/reference/genome/Mus_musculus/GRCm39.fa
star: /data/reference/genome/Mus_musculus/star/GRCm39.release108
samples:
# sample name can be any string,
# but it is a good habit to use letters and hyphen only, no space, underscore or other special symbol.
mESCWT-rep1-input:
data:
- R1: ./data/IP16_run1.fastq.gz
group: mESCWT
treated: false
mESCWT-rep2-input:
data:
- R1: ./data/IP17_run1.fastq.gz
group: mESCWT
treated: false
mESCWT-rep3-input:
data:
- R1: ./data/IP18_run1.fastq.gz
group: mESCWT
treated: false
mESCWT-rep1-treated:
data:
- R1: ./data/IP4_run1.fastq.gz
group: mESCWT
treated: true
mESCWT-rep2-treated:
data:
- R1: ./data/IP5_run1.fastq.gz
group: mESCWT
treated: true
mESCWT-rep3-treated:
data:
- R1: ./data/IP6_run1.fastq.gz
group: mESCWT
treated: true