Merge pull request friendsofstrandseq#46 from friendsofstrandseq/dev

2.1.3
grimbough · Jul 6, 2023 · 4463ca3 · 4463ca3
2 parents 11fd707 + 7239154
commit 4463ca3
Show file tree

Hide file tree

Showing 15 changed files with 384 additions and 117 deletions.
diff --git a/.github/workflows/assemblies.yaml b/.github/workflows/assemblies.yaml
@@ -3,14 +3,13 @@ name: MosaiCatcher assemblies checks
 on:
   schedule:
     # Run every Sunday at 00:00 UTC on the master branch
-    - cron:  '0 0 * * 0'
+    - cron: "0 0 * * 0"
       # branches:
       #   - master
   push:
     branches:
-      - '*'
-      - '!master'
-
+      - "*"
+      - "!master"
 
 jobs:
   # Downloading_example_data:

diff --git a/.github/workflows/conda.yaml b/.github/workflows/conda.yaml
@@ -3,15 +3,14 @@ name: MosaiCatcher conda checks
 on:
   schedule:
     # Run every Sunday at 00:00 UTC on the master branch
-    - cron:  '0 0 * * 0'
+    - cron: "0 0 * * 0"
       # branches:
       #   - master
   push:
     branches:
       - '*'
       - '!master'
 
-
 jobs:
   # WORK
 

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
@@ -8,9 +8,9 @@ on:
   # #       - master
 
   # # on:
-  push:
-    branches:
-      - dev
+  # push:
+  #   branches:
+  #     - dev
 
 env:
   IMAGE_NAME: mosaicatcher-pipeline

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
@@ -3,14 +3,13 @@ name: MosaiCatcher basic checks
 on:
   schedule:
     # Run every Sunday at 00:00 UTC on the master branch
-    - cron:  '0 0 * * 0'
+    - cron: "0 0 * * 0"
       # branches:
       #   - master
   push:
     branches:
-      - '*'
-      - '!master'
-
+      - "*"
+      - "!master"
 
 jobs:
   # WORK

diff --git a/.github/workflows/two_steps.dev b/.github/workflows/two_steps.dev
@@ -0,0 +1,68 @@
+name: Ashleys + MosaiCatcher two-steps conda checks
+
+on:
+  schedule:
+    # Run every Sunday at 00:00 UTC on the master branch
+    - cron: "0 0 * * 0"
+  push:
+    branches:
+      - "*"
+      - "!master"
+
+jobs:
+  ashleys:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout pipeline repository
+        uses: actions/checkout@v2
+        with:
+          repository: "friendsofstrandseq/ashleys-qc-pipeline"
+          path: "ashleys-qc-pipeline"
+
+      - name: Run snakemake
+        uses: snakemake/snakemake-github-action@v1.24.0
+        with:
+          directory: "./ashleys-qc-pipeline"
+          snakefile: "./ashleys-qc-pipeline/workflow/Snakefile"
+          stagein: "conda config --set channel_priority flexible"
+          args: "--cores 1 --use-conda --latency-wait 60 --configfile ./ashleys-qc-pipeline/.tests/config/simple_config.yaml --config ashleys_pipeline=True data_location=${{ GITHUB.WORKSPACE }}/ashleys-qc-pipeline/.tests/data_CHR17 --conda-frontend mamba -p --rerun-triggers mtime --verbose --debug"
+
+      - name: Display file
+        run: cat ./.tests/data_CHR17/RPE-BM510/cell_selection/labels.tsv
+      - name: Run awk command
+        run: |
+          awk 'NR==5 || NR==7 || NR==10 {$0 = "0"} 1' ./.tests/data_CHR17/RPE-BM510/cell_selection/labels.tsv > tmp.tsv && mv tmp.tsv ./.tests/data_CHR17/RPE-BM510/cell_selection/labels.tsv
+      - name: Display file
+        run: cat ./.tests/data_CHR17/RPE-BM510/cell_selection/labels.tsv
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: ashleys-data
+          path: ./.tests/data_CHR17
+
+  mosaicatcher:
+    runs-on: ubuntu-latest
+    needs:
+      - ashleys
+
+    steps:
+      - name: Checkout current repository
+        uses: actions/checkout@v2
+        with:
+          path: "mosaicatcher-pipeline"
+
+      - name: Download artifacts
+        uses: actions/download-artifact@v2
+        with:
+          name: ashleys-data
+          path: ./.tests/data_CHR17
+
+      - name: Run snakemake
+        uses: snakemake/snakemake-github-action@v1.24.0
+        with:
+          directory: "."
+          snakefile: "./workflow/Snakefile"
+          stagein: "conda config --set channel_priority flexible"
+          args: "--cores 1 --use-conda --latency-wait 60 --configfile .tests/config/simple_config.yaml --config ashleys_pipeline=True data_location=./.tests/data_CHR17 --conda-frontend mamba -p --rerun-triggers mtime --verbose --debug"
diff --git a/.gitignore b/.gitignore
@@ -211,3 +211,6 @@ workflow/data/arbigent/scTRIP_segmentation.bed
 !workflow/data/GC/*.txt.gz
 .tests/data_CHR17/RPE-BM510/multiqc/
 .tests/data_CHR17/RPE-BM510/bam_ashleys/
+.tests/data_CHR17/RPE-BM510/bam/*.bam.raw
+.tests/data_CHR17/RPE-BM510/bam/*.bam.sort
+.tests/external_data/chr17.fa.log
diff --git a/config/config.yaml b/config/config.yaml
@@ -1,5 +1,5 @@
-version: 2.1.2
-ashleys_pipeline_version: 2.1.1
+version: 2.1.3
+ashleys_pipeline_version: 2.1.3
 #######################################
 #   MOSAICATCHER CONFIGURATION FILE   #
 #######################################

diff --git a/docs/output.md b/docs/output.md
@@ -1,85 +1,9 @@
-# Outputs (ongoing)
+# Outputs
 
 This document describes the final outputs produced by the pipeline. Most of the plots are taken from report generated from the [full-sized test dataset](https://sandbox.zenodo.org/record/1074721) for the pipeline.
 
 The files listed below will be created in the selected results directory (`output_location` parameter). All paths are relative to the top-level results directory.
 
-## Directory structure (example for `<SAMPLE>`=_RPE-BM510_)
-
-```bash
-<DATA_LOCATION>/<SAMPLE>
-|-- alfred
-|   |-- Celln.tsv.gz
-|   `-- Celln.json.gz
-|-- bam
-|   |-- Cell1.sort.mdup.bam
-|   |-- Cell2.sort.mdup.bam
-|   `-- Celln.sort.mdup.bam
-|-- cell_selection
-|   |-- labels_raw.tsv
-|   `-- labels.tsv
-|-- config
-|   |-- chroms_to_exclude.txt
-|   `-- single_paired_end_detection.txt
-|-- counts
-|   `-- RPE-BM510
-|       `-- counts-per-cell
-|-- fastq
-|   |-- Cell1.1.fastq.gz
-|   |-- Cell1.2.fastq.gz
-|   |-- Cell2.1.fastq.gz
-|   `-- Cell2.2.fastq.gz
-|-- haplotag
-|   |-- bam
-|   |   `-- RPE-BM510
-|   |-- bed
-|   `-- table
-|       `-- RPE-BM510
-|           `-- by-cell
-|-- log
-|   |-- ...
-|   `-- ...
-|-- merged_bam
-|   `-- merged_bam.bam
-|-- mosaiclassifier
-|   |-- haplotag_likelihoods
-|   |-- postprocessing
-|   |   |-- filter
-|   |   |   `-- RPE-BM510
-|   |   |-- group-table
-|   |   |   `-- RPE-BM510
-|   |   `-- merge
-|   |       `-- RPE-BM510
-|   |-- sv_calls
-|   |   `-- RPE-BM510
-|   `-- sv_probabilities
-|       `-- RPE-BM510
-|-- plots
-|   `-- RPE-BM510
-|       |-- counts
-|       |-- final_results
-|       |-- sv_calls
-|       |-- sv_clustering
-|       `-- sv_consistency
-|-- segmentation
-|   `-- RPE-BM510
-|       `-- segmentation-per-cell
-|-- snv_genotyping
-|   `-- RPE-BM510
-|-- stats
-|   `-- RPE-BM510
-`-- strandphaser
-    |-- phased-snvs
-    |-- RPE-BM510
-    |   `-- StrandPhaseR_analysis.chr21
-    |       |-- browserFiles
-    |       |-- data
-    |       |-- Phased
-    |       |-- SingleCellHaps
-    |       `-- VCFfiles
-    `-- R_setup
-```
-
 ## Plots folder
 
 ### Mosaic count - reads density across bins

diff --git a/docs/parameters.md b/docs/parameters.md
@@ -33,8 +33,9 @@ All these arguments can be specified in two ways:
 | ---------------------------------------- | --------------------------------------------------------------------------------------------------- | ------- | ------------ |
 | `multistep_normalisation_analysis`       | Allow to perform multistep normalisation including GC correction for visualization (Marco Cosenza). | False   | False        |
 | `multistep_normalisation_for_SV_calling` | Allow to use multistep normalisation count file during SV calling (Marco Cosenza).                  | False   | False        |
-| `arbigent`                               | Enable ArbiGent mode of execution to genotype SV based on arbitrary segments                        | False   | True         |
-| `scNOVA`                                 | Enable scNOVA mode of execution to compute Nucleosome Occupancy (NO) of detected SV                 | False   | True         |
+| `hgsvc_based_normalized_counts`          | Use HGSVC based normalisation .                                                                     | True    | False        |
+| `arbigent`                               | Enable ArbiGent mode of execution to genotype SV based on arbitrary segments                        | False   | False        |
+| `scNOVA`                                 | Enable scNOVA mode of execution to compute Nucleosome Occupancy (NO) of detected SV                 | False   | False        |
 
 ### External files
 
@@ -66,10 +67,12 @@ All these arguments can be specified in two ways:
 
 ### EMBL specific options
 
-| Parameter              | Comment                                                                                               | Default |
-| ---------------------- | ----------------------------------------------------------------------------------------------------- | ------- |
-| `genecore`             | Enable/disable genecore mode to give as input the genecore shared folder in /g/korbel/shared/genecore | False   |
-| `genecore_date_folder` | Specify folder to be processed                                                                        |         |
+| Parameter                 | Comment                                                                                               | Default                                     |
+| ------------------------- | ----------------------------------------------------------------------------------------------------- | ------------------------------------------- |
+| `genecore`                | Enable/disable genecore mode to give as input the genecore shared folder in /g/korbel/shared/genecore | False                                       |
+| `genecore_date_folder`    | Specify folder to be processed                                                                        |                                             |
+| `genecore_prefix`         | Specify genecore prefix folder                                                                        | /g/korbel/STOCKS/Data/Assay/sequencing/2023 |
+| `genecore_regex_elements` | Specify genecore regex element to be used to distinguish sample from well number                      | PE20                                        |
 
 If `genecore` and `genecore_date_folder` are correctly specified, each plate will be processed independently by creating a specific folder in the `data_location` folder.
 

diff --git a/docs/usage.md b/docs/usage.md
@@ -61,7 +61,7 @@ snakemake \
 
 **ℹ️ Note for 🇪🇺 EMBL users**
 
-- You can load already installed snakemake modusl on the HPC (by connecting to login01 & login02) using the following `module load snakemake/7.14.0-foss-2022a`
+- You can load already installed snakemake modules on the HPC (by connecting to login01 & login02) using the following `module load snakemake/7.14.0-foss-2022a`
 - Use the following command for singularity-args parameter: `--singularity-args "-B /g:/g -B /scratch:/scratch"`
 
 ---