config.yaml

# Path to sheet describing meta-information for each cell.
# The first column has to contain the cell id, all other columns are optional.
# They are added as metadata to the SingleCellExperiment object created in the
# workflow.
cells: cells.tsv

counts:
  # specify count table (rows: genes/transcripts/spikes, cols: cells)
  path: counts.tsv
  # define which kind of features are described in each row (must be a term understood by Ensembl biomart, e.g. ensembl_gene_id, hgnc_symbol)
  feature_ids: ensembl_gene_id
  # BioMart host to use (e.g. www.ensembl.org, useast.ensembl.org, ...)
  biomart: useast.ensembl.org

# Variables to use for batch effect removal and as factors to be ignored in variance
# analysis. This should always be the two cell cycle phases (G1, G2M) which are
# determined automatically during analysis. In addition, you may add any column
# name of the cell sheet (see above).
model:
  # R formula representing known batch effects
  design: "~ G1 + G2M + detection_rate"
  # Rely on spike-ins for variance model estimation.
  # This is usually not desired because of few and noisy spike-ins.
  # If this is set to false, engogeneous genes are used instead, under the
  # assumption that most of them are not differentially expressed.
  use-spikes: false
  # Minimum value of average difference in true (biological) log2 expression
  # between any two cells.
  min-bio-comp: 0.5
  # FDR threshold for selecting highly variable genes/transcripts (HVGs)
  fdr: 0.05
  # show the expression distribution of the top n HVGs
  show-n: 20
  # compute correlation for top n HVGs
  top-n: 200

# Comment out to not assign cell type via signatures
celltype:
  # Table describing markers for assignment of cell types.
  # Columns: name (cell type name), parent (parent cell type name),
  #          genes (comma-separated list of gene names/ids, as listed in the
  #          count matrix)
  # Thereby, parent is usually empty. If not, it means that assignment for that
  # type happens recursively only on those cells that have been assigned to the
  # parent type.
  markers: resources/markers.tsv

# Comment out to not do differential expression analysis.
diffexp:
  # Add one entry per comparison here. The key below can be an arbitrary name.
  a-vs-b:
    # EdgeR design formula.
    # Refer to any colData from SingleCellExperiment here.
    # In addition, you can use celltype and detection_rate
    # (number of expressed genes in cell divided by total
    # number of genes in experiment).
    design: "~ test.condition"
    # Which coefficients of the model should be tested equal to zero.
    # E.g., 2 to test the first coefficient after the implicit intercept
    # (i.e., celltype in the example above).
    coef: 2
    # False discovery rate to control for.
    fdr: 0.05
    # Optional: constrain to cell types (comment out to use all cell types).
    celltypes:
      - Endothelial-cell

species: mouse

spike-ins:
  # Regular expression pattern to detect spike-ins.
  pattern: "^ERCC"

filtering:
  # Remove all genes with a  mean count less than the given threshold.
  min-avg-count: 1