forked from snakemake-workflows/single-cell-rna-seq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.yaml
77 lines (69 loc) · 3.05 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# Path to sheet describing meta-information for each cell.
# The first column has to contain the cell id, all other columns are optional.
# They are added as metadata to the SingleCellExperiment object created in the
# workflow.
cells: cells.tsv
counts:
# specify count table (rows: genes/transcripts/spikes, cols: cells)
path: counts.tsv
# define which kind of features are described in each row (must be a term understood by Ensembl biomart, e.g. ensembl_gene_id, hgnc_symbol)
feature_ids: ensembl_gene_id
# BioMart host to use (e.g. www.ensembl.org, useast.ensembl.org, ...)
biomart: useast.ensembl.org
# Variables to use for batch effect removal and as factors to be ignored in variance
# analysis. This should always be the two cell cycle phases (G1, G2M) which are
# determined automatically during analysis. In addition, you may add any column
# name of the cell sheet (see above).
model:
# R formula representing known batch effects
design: "~ G1 + G2M + detection_rate"
# Rely on spike-ins for variance model estimation.
# This is usually not desired because of few and noisy spike-ins.
# If this is set to false, engogeneous genes are used instead, under the
# assumption that most of them are not differentially expressed.
use-spikes: false
# Minimum value of average difference in true (biological) log2 expression
# between any two cells.
min-bio-comp: 0.5
# FDR threshold for selecting highly variable genes/transcripts (HVGs)
fdr: 0.05
# show the expression distribution of the top n HVGs
show-n: 20
# compute correlation for top n HVGs
top-n: 200
# Comment out to not assign cell type via signatures
celltype:
# Table describing markers for assignment of cell types.
# Columns: name (cell type name), parent (parent cell type name),
# genes (comma-separated list of gene names/ids, as listed in the
# count matrix)
# Thereby, parent is usually empty. If not, it means that assignment for that
# type happens recursively only on those cells that have been assigned to the
# parent type.
markers: resources/markers.tsv
# Comment out to not do differential expression analysis.
diffexp:
# Add one entry per comparison here. The key below can be an arbitrary name.
a-vs-b:
# EdgeR design formula.
# Refer to any colData from SingleCellExperiment here.
# In addition, you can use celltype and detection_rate
# (number of expressed genes in cell divided by total
# number of genes in experiment).
design: "~ test.condition"
# Which coefficients of the model should be tested equal to zero.
# E.g., 2 to test the first coefficient after the implicit intercept
# (i.e., celltype in the example above).
coef: 2
# False discovery rate to control for.
fdr: 0.05
# Optional: constrain to cell types (comment out to use all cell types).
celltypes:
- Endothelial-cell
species: mouse
spike-ins:
# Regular expression pattern to detect spike-ins.
pattern: "^ERCC"
filtering:
# Remove all genes with a mean count less than the given threshold.
min-avg-count: 1