Skip to content

Commit

Permalink
added files for make test
Browse files Browse the repository at this point in the history
  • Loading branch information
RAHenriksen committed Oct 24, 2024
1 parent 1d7c4fb commit 4bcff79
Show file tree
Hide file tree
Showing 13 changed files with 161 additions and 1 deletion.
43 changes: 43 additions & 0 deletions .github/workflows/cdiff_fbi_workflow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: ecoli food bourne infection workflow

on:
push:
branches: ["main"]
pull_request:
branches: ["main"]

jobs:
build:
runs-on: ${{ matrix.os }}
defaults:
run:
shell: bash -l {0}
strategy:
matrix:
os: ["ubuntu-latest"]
python-version: ["3.11"]

steps:
# step 1: checkout repository
- name: Checkout repository
uses: actions/checkout@v3

# step 2 use environment.yml dependencies to create conda environment
- name: Set up miniconda
uses: conda-incubator/setup-miniconda@v3
with:
auto-update-conda: true
python-version: ${{ matrix.python-version }}

# step 3: Create the conda environment manually
- name: Create conda environment
run: |
conda create --name cdiff_pipeline picard gatk4 biopython ruamel.yaml kraken bwa samtools
# step 4
- name: Run test script
run: |
conda activate cdiff_pipeline
make test
# consider adding the webhook to https://github.com/ssi-dk/bifrost_sp_cdiff/
54 changes: 54 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Secret files and resources
.secret*
resources/*

# fastq and fasta files
*.fasta
*.fa
*.fsa
*.fasta.gz
*.fa.gz
*.fsa.gz
*.fastq
*.fq
*.fastq.gz
*.fq.gz

# Python cache files
__pycache__/
*.py[cod]
.pytest_cache/

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# IDE or editor files
.vscode/
.idea/
*.swp

# Jupyter Notebook checkpoints
.ipynb_checkpoints/

# Log and temporary files
*.log
*.tmp
*.temp

# Environment files (pip and conda)
.env
.venv/
env/
venv/
*.conda
*.condarc

# Distribution / packaging files
bin/
.Python
build/
dist/
lib/
lib64/
*.egg-info/
33 changes: 33 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Makefile

.PHONY: test update_db type process summarize

clean:
rm -rf test/ERR142064/

# Combined test target to run all steps
# test: serotyping processing summarizing
test: serotyping md5serotypecheck processing md5processcheck

# Step 1: Run the typing process
serotyping:
@BIN_PATH=$$(dirname $$(which python)) && \
echo "Conda environment bin path: $$BIN_PATH" && \
bash cdifftyping.sh -i ERR142064 -R1 test/ERR142064_1_subset.fastq.gz -R2 test/ERR142064_2_subset.fastq.gz -c test/ERR142064_subset.fasta -o test -db db -update no

# Step 2: Check MD5 checksum
md5serotypecheck:
cd test && \
cat ERR142064/sp_cdiff_fbi/ERR142064.indel.vcf|grep -v '#'> ERR142064/sp_cdiff_fbi/ERR142064.indel.noheader.vcf && \
cat ERR142064/sp_cdiff_fbi/ERR142064.snp.vcf|grep -v '#'> ERR142064/sp_cdiff_fbi/ERR142064.snp.noheader.vcf && \
cat ERR142064/sp_cdiff_fbi/ERR142064.snp_indel.vcf|grep -v '#'>ERR142064/sp_cdiff_fbi/ERR142064.snp_indel.noheader.vcf && \
md5sum -c Serotypesums.md5

# Step 3: Run the post-processing
processing:
bash postcdifftyping.sh -i ERR142064 -d test -stbit "STNA;NA:NA"

# Step 4: Check MD5 checksum - but remove variable date to accurately represent MD5sum
md5processcheck:
cd test && \
md5sum -c Processsums.md5
2 changes: 1 addition & 1 deletion cdifftyping.sh
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ mkdir -p $spcdifffbidir
prefix="$spcdifffbidir/$sampleid" # prefix for indexes


Filtering reads with serum_readfilter
#Filtering reads with serum_readfilter
echo -e "\n# Filtering reads with serum_readfilter..."
cmd="serum_readfilter runfilter kraken -R1 ${read1} -R2 ${read2} -o $spcdifffbidir/cdifffiltered -db $serumdb"
r1="$spcdifffbidir/cdifffiltered_R1.fastq"
Expand Down
2 changes: 2 additions & 0 deletions db/cdiff_toxins/tcdRegion.dict
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
@HD VN:1.6
@SQ SN:gi|126697566|ref|NC_009089.1| LN:25706 M5:4ff8ab7d041b1792b7b30e73461ff926 UR:file:///home/rahenriksen443/cdiff_fbi/db/cdiff_toxins/tcdRegion.fasta
1 change: 1 addition & 0 deletions db/cdiff_toxins/tcdRegion.fasta.amb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
25706 1 0
3 changes: 3 additions & 0 deletions db/cdiff_toxins/tcdRegion.fasta.ann
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
25706 1 11
0 gi|126697566|ref|NC_009089.1| Peptoclostridium difficile 630, complete genome
0 25706 0
Binary file added db/cdiff_toxins/tcdRegion.fasta.bwt
Binary file not shown.
1 change: 1 addition & 0 deletions db/cdiff_toxins/tcdRegion.fasta.fai
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
gi|126697566|ref|NC_009089.1| 25706 79 25706 25707
Binary file added db/cdiff_toxins/tcdRegion.fasta.pac
Binary file not shown.
Binary file added db/cdiff_toxins/tcdRegion.fasta.sa
Binary file not shown.
2 changes: 2 additions & 0 deletions test/Processsums.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
13c0b9b029c6131e5e69be1b7b934555 ERR142064/ERR142064.csv
01eb78d612b51d98b78e7fea67791da2 ERR142064/ERR142064.json
21 changes: 21 additions & 0 deletions test/Serotypesums.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
57304e76422f88d12526b0a63ad47da4 ERR142064/sp_cdiff_fbi/ERR142064.bam
9ddfaf45e2201214967389cf43da2f42 ERR142064/sp_cdiff_fbi/ERR142064.bam.bai
97e3ec59fc737b3073557c74e54bfe0e ERR142064/sp_cdiff_fbi/ERR142064.coverage
bcca0bfab13062fa427aeea04b2c7038 ERR142064/sp_cdiff_fbi/ERR142064.coverage.sample_cumulative_coverage_counts
4721790a8baec19dd0bc49119e231078 ERR142064/sp_cdiff_fbi/ERR142064.coverage.sample_cumulative_coverage_proportions
918105797eae2632de3e408d7327d885 ERR142064/sp_cdiff_fbi/ERR142064.coverage.sample_interval_statistics
385b9293fb26f90cba8f2d2f8e8b2f03 ERR142064/sp_cdiff_fbi/ERR142064.coverage.sample_interval_summary
47365c24bfe25d8796e6d1540d8e6ad9 ERR142064/sp_cdiff_fbi/ERR142064.coverage.sample_statistics
8c8d2703ddc82e2f060db5b04cc5b4e8 ERR142064/sp_cdiff_fbi/ERR142064.coverage.sample_summary
4f30e930ce5c65caae21783c533c9505 ERR142064/sp_cdiff_fbi/ERR142064.indel.noheader.vcf
e45d6433d00b4ac36ab62448e5fb37de ERR142064/sp_cdiff_fbi/ERR142064.sam
d7eda8c30011f4b9192211151de83e6c ERR142064/sp_cdiff_fbi/ERR142064.snp.noheader.vcf
ccbae19880ae00595370a548c2305844 ERR142064/sp_cdiff_fbi/ERR142064.snp_indel.noheader.vcf
bb7b6ccf22fa698793fbf8b2624c64ed ERR142064/sp_cdiff_fbi/ERR142064_TRST.fasta
9a2156bb8ad2f8e23dff43970a42104d ERR142064/sp_cdiff_fbi/ERR142064_cdtA.info
0e1ec33a8d7c9658294bdb9a383b638f ERR142064/sp_cdiff_fbi/ERR142064_cdtB.info
2d0476ceeed860351baf3ebbb3db3daa ERR142064/sp_cdiff_fbi/ERR142064_tcdA.info
c23cf0f5cb848b7bf2eafbba4e4aa07f ERR142064/sp_cdiff_fbi/ERR142064_tcdB.info
a0a43ac74e5e823bcab99e6e2cdff4eb ERR142064/sp_cdiff_fbi/ERR142064_tcdC.info
4d5500e512d5896f36d4726f5491bdc2 ERR142064/sp_cdiff_fbi/cdifffiltered_R1.fastq
0cffcfd0e88d2397b59612ef3e75fab0 ERR142064/sp_cdiff_fbi/cdifffiltered_R2.fastq

0 comments on commit 4bcff79

Please sign in to comment.