forked from gladshire/Semblans
-
Notifications
You must be signed in to change notification settings - Fork 0
/
CONFIG.ini
171 lines (124 loc) · 5.63 KB
/
CONFIG.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# Configuration file for Semblans run
# User should fill these fields with desired options
[General]
# Specifies name of corresponding project and its directory
project_name = project
# Specifies path to parent directory of Semblans project
# e.g. In this case, output files will be stored in ./project_1/
output_directory = ./
# Defines an NCBI API key, necessary for faster SRA downloads if not using local runs
#
# More information on NCBI API keys can be found here:
# https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/
ncbi_api_key =
# Specifies path to a reference proteome requried for post-assembly processing
# More information on this can be found in Semblans' documentation
reference_proteome_path = ../ensembl_plant.pep.all.fa
# Specifies the name of the log file for this configuration file's associated Semblans run
# Semblans will create this log file and write verbose output to it for later reference
log_file = log.txt
# Tells Semblans to perform the entire assembly process on one short-read run before
# moving onto the next. By default, this is false, meaning Semblans will perform the same
# assembly step on every run before moving onto the next step
serial_processing = true
; =======================================================================================
# THIS SECTION CONTAINS OPTIONS FOR INCLUDING / EXCLUDING CERTAIN STEPS IN PIPELINE
# FIELDS SHOULD BE EITHER 'true' OR 'false', INFORMING SEMBLANS OF USER'S PREFERENCES
[Pipeline]
# Informs Semblans whether to perform pre-assembly error correction of base pairs
error_correction = false
# Informs Semblans whether to trim adapter sequences from data from read data
trim_adapter_seqs = true
# Informs Semblans whether to remove reads with foreign sequences, as identified by the
# database names listed in the [Kraken2 filter order] section of this config file
filter_foreign_reads = true
# Informs Semblans whether to remove overrepresented reads prior to assembly
remove_overrepresented = true
# Informs Semblans whether to remove sequences identified as chimeras
remove_chimera_reads = true
# Informs Semblans whether to perform cluster-based filtering of reads to minimize redundancy
cluster_filtering = true
# Informs Semblans whether to perform transcript annotation using HMMER with the PANTHER
# protein database
annotate_transcripts = false
; =======================================================================================
# THIS SECTION IS WHERE USER CAN SPECIFY THE PATHS OF LOCAL SEQUENCE DATA FOR ASSEMBLY
#
# Runs should be defined line-wise.
# For SINGLE-END, simply write the corresponding file path.
# For PAIRED-END, write the forward and reverse file paths, separated by a single space
#
# User must enter FULL file paths
[Local files]
#/path/to/local_single_end.fastq
#/path/to/local_paired_end_1.fastq /path/to/local_paired_end_2.fastq
; =======================================================================================
# THIS SECTION IS WHERE USER CAN SPECIFY NCBI SRA ACCESSIONS FOR NON-LOCAL SEQUENCE DATA
# OBTAINED THROUGH NCBI. SEMBLANS WILL AUTOMATICALLY DOWNLOAD THIS DATA INTO THE PROJECT
# DIRECTORY
#
# NCBI SRA data can be found here: https://www.ncbi.nlm.nih.gov/sra
[SRA accessions]
#SRR5944352
#SRR2088030
SRR5062042
#SRR16526565
#SRR12783070
#SRR5279415
#SRR5738849
#SRR16526565
#SRR2088030
#SRR12783070
; =======================================================================================
# THIS SECTION IS WHERE USER CAN DEFINE GROUPS OF SRAs / FILES FOR MULTI-ASSEMBLIES.
# IF USER WISHES TO GENERATE DE-NOVO ASSEMBLY USING MULTIPLE FILES OR RUNS, THEY SHOULD
# DEFINE THE GROUP ACCORDING TO THE PROVIDED EXAMPLE.
#
#[Assembly groups]
# AssemblyFileName = SRR21719547_7227_Drosophila_melanogaster_1.fastq, SRR21719547_7227_Drosophila_melanogaster_2.fastq, SRR21719548_7227_Drosophila_melanogaster_1.fastq, SRR21719548_7227_Drosophila_melanogaster_2.fastq
; =======================================================================================
# GRANULAR SETTINGS FOR ERROR-CORRECTION USING RCORRECTOR
[Rcorrector settings]
kmer_length = 23
max_corrections_per_kmer_window = 4
weak_kmer_proportion_threshold = 0.95
; =======================================================================================
# GRANULAR SETTINGS FOR ADAPTER-TRIMMING USING TRIMMOMATIC
[Trimmomatic settings]
adapter_seqs = Truseq_all
max_allowed_seed_mismatch = 2
min_score_paired = 30
min_score_single = 10
sliding_window_size = 4
sliding_window_min_quality = 5
min_quality_leading = 5
min_quality_trailing = 5
min_read_length = 25
cut_number_bp_from_front = 0
; =======================================================================================
# THIS SECTION CONTAINS OPTIONS FOR FOREIGN SEQUENCE REMOVAL USING KRAKEN2
# EXPLANATIONS FOR THE FIRST TWO VARIABLES CAN BE FOUND IN SEMBLANS'S DOCUMENTATION
#
# 'db_directory' should contain the path to the Kraken2 databases the user wishes
# Semblans to utilize for its removal of foreign reads
[Kraken2 settings]
confidence_threshold = 0.2
min_base_quality = 0
min_hit_groups = 2
save_foreign_reads = true
db_directory =
; =======================================================================================
# THIS SECTION CONTAINS THE NAMES OF DATABASES THAT SEMBLANS WILL USE FOR FOREIGN
# READ REMOVAL
#
# The order by which foreign reads should be irrelevant, but is defined by the
# order in which the databases are listed here.
[Kraken2 filter order]
[Alignment settings]
use_blast_instead_of_diamond = false
blastx_max_evalue = 0.001
blastx_max_target_seqs = 25
blastp_max_evalue = 10
blastp_max_target_seqs = 1
[TransDecoder settings]
get_multiple_orfs = false