Skip to content

Commit

Permalink
Merge branch 'release/2.0.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
saramonzon committed Jul 14, 2021
2 parents e92f8f8 + bf74310 commit 278d07c
Show file tree
Hide file tree
Showing 13 changed files with 275 additions and 46 deletions.
25 changes: 25 additions & 0 deletions .github/workflows/dockerhub_push_release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: deploy release
# This builds the docker image and pushes it to DockerHub
on:
release:
types: [published]
jobs:
push_dockerhub:
name: Push new Docker image to Docker Hub (release)
runs-on: ubuntu-latest
# Only run for the official repo, for releases and merged PRs
if: ${{ github.repository == 'BU-ISCIII/taranis' }}
env:
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASSWORD }}
steps:
- name: Check out pipeline code
uses: actions/checkout@v2

- name: Build new docker image
run: docker build --no-cache . -t buisciii/taranis:${{ github.event.release.tag_name }}

- name: Push Docker image to DockerHub (develop)
run: |
echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
docker push buisciii/taranis:${{ github.event.release.tag_name }}
38 changes: 38 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: tests ci
# This workflow runs the pipeline with the minimal test dataset to check that it completes any errors
on:
push:
branches: [develop]
pull_request_target:
branches: [develop]
release:
types: [published]

jobs:
push_dockerhub:
name: Push new Docker image to Docker Hub (dev)
runs-on: ubuntu-latest
# Only run for the official repo, for releases and merged PRs
if: ${{ github.repository == 'BU-ISCIII/taranis' }}
env:
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASSWORD }}
steps:
- name: Check out pipeline code
uses: actions/checkout@v2

- name: Build new docker image
run: docker build --no-cache . -t buisciii/taranis:dev

- name: Push Docker image to DockerHub (develop)
run: |
echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
docker push buisciii/taranis:dev
run-tests:
name: Run tests
needs: push_dockerhub
runs-on: ubuntu-latest
steps:
- name: Run pipeline with test data
run: |
docker run buisciii/taranis:dev bash -c /opt/taranis/test/test.sh
17 changes: 17 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
FROM continuumio/miniconda3:latest

RUN mkdir /opt/taranis/
ADD utils /opt/taranis/utils
ADD test /opt/taranis/test
ADD *.py /opt/taranis/
ADD environment.yml /opt/taranis/
ADD logging_config.ini /opt/taranis/
ADD README.md /opt/taranis/
ADD LICENSE /opt/taranis/

SHELL ["/bin/bash", "-c"]
RUN cd /opt/taranis
RUN /opt/conda/bin/conda env create -f /opt/taranis/environment.yml && /opt/conda/bin/conda clean -a
RUN /opt/conda/bin/conda env export --name taranis > taranis.yml
RUN echo "conda activate taranis" > ~/.bashrc
ENV PATH /opt/conda/envs/taranis:/opt/conda/envs/taranis/utils:$PATH
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ This option is recomended.

Install Anaconda3.

`conda install -c bioconda taranis`
`conda install -c conda-forge -c bioconda -c defaults taranis`

Wait for the environment to solve. <br>
Ignore warnings/errors.
Expand Down
27 changes: 17 additions & 10 deletions allele_calling.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -1196,19 +1196,26 @@ def get_ST_profile(outputdir, profile_csv_path, exact_dict, inf_dict, core_gene_
break

if sample_name not in samples_profiles_dict:
if len(analysis_profiles_dict[sample_name]) == len(profile_header):
new_st_id = str(len(ST_profiles_dict) + 1)
ST_profiles_dict[new_st_id + "_INF"] = analysis_profile_dict[sample_name]
inf_ST[new_st_id] = analysis_profile_dict[sample_name]
if sample_name in analysis_profiles_dict:
if len(analysis_profiles_dict[sample_name]) == len(profile_header):
new_st_id = str(len(ST_profiles_dict) + 1)
ST_profiles_dict[new_st_id + "_INF"] = analysis_profile_dict[sample_name]
inf_ST[new_st_id] = analysis_profile_dict[sample_name]

samples_profiles_dict[sample_name]=new_st_id + "_INF"
samples_profiles_dict[sample_name]=new_st_id + "_INF"

if "New" not in count_st:
count_st["New"] = {}
if new_st_id not in count_st["New"]:
count_st["New"][new_st_id] = 0
count_st["New"][new_st_id] += 1
if "New" not in count_st:
count_st["New"] = {}
if new_st_id not in count_st["New"]:
count_st["New"][new_st_id] = 0
count_st["New"][new_st_id] += 1

else:
samples_profiles_dict[sample_name] = '-'

if "Unknown" not in count_st:
count_st["Unknown"] = 0
count_st["Unknown"] += 1
else:
samples_profiles_dict[sample_name] = '-'

Expand Down
19 changes: 12 additions & 7 deletions analyze_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import glob
from datetime import datetime
import statistics
from collections import Counter
#import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.io as pio
Expand Down Expand Up @@ -170,7 +171,9 @@ def extract_info_schema (schema_files, outputdir, genus, species, usegenus, logg
stdev = 0
else:
stdev = statistics.stdev(alleles_len)
schema_statistics[gene_name]=[statistics.mode(alleles_len), statistics.mean(alleles_len), stdev, min(alleles_len), max(alleles_len)]

#schema_statistics[gene_name]=[statistics.mode(alleles_len), statistics.mean(alleles_len), stdev, min(alleles_len), max(alleles_len)]
schema_statistics[gene_name]=[list(Counter(alleles_len).most_common(1)[0])[0], statistics.mean(alleles_len), stdev, min(alleles_len), max(alleles_len)]

for length in list(set(alleles_len)):
schema_variability_count[gene_name][str(length)] = str(alleles_len.count(length))
Expand Down Expand Up @@ -572,14 +575,16 @@ def analyze_schema (inputdir, outputdir, genus, species, usegenus, logger) :
total_alleles += int(schema_variability_count[core][length])

stat_fh.write(core + '\t' + '\t'.join (map(str,schema_statistics[core])) + '\t' + ', '.join(length_number) + '\t' + str(total_alleles) + '\n')
#stat_fh.write(core + '\t' + ', '.join(map(str,schema_statistics[core][0])) + '\t' + '\t'.join (map(str,schema_statistics[core][1::])) + '\t' + ', '.join(length_number) + '\t' + str(total_alleles) + '\n')

# Saving schema annotation to file
logger.info('Saving core gene schema annotation to file..')
annotation_file = os.path.join(outputdir, 'raw_info' , 'annotation.tsv')
with open (annotation_file , 'w') as annot_fh :
annot_fh.write('\t'.join(header_annotation) + '\n')
for core in sorted(annotation_core_dict) :
annot_fh.write(core + '\t' + '\t'.join(annotation_core_dict[core]) + '\n')
#logger.info('Saving core gene schema annotation to file..')
#annotation_file = os.path.join(outputdir, 'raw_info' , 'annotation.tsv')
#with open (annotation_file , 'w') as annot_fh :
# annot_fh.write('\t'.join(header_annotation) + '\n')
# for core in sorted(annotation_core_dict) :
# annot_fh.write(core + '\t' + '\t'.join(annotation_core_dict[core]) + '\n')


logger.info('Completed dumped raw information to files')

Expand Down
Empty file modified distance_matrix.py
100644 → 100755
Empty file.
17 changes: 7 additions & 10 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,14 @@ channels:
- bioconda
- defaults
dependencies:
- pip
- python>=3.6
- conda-forge::python>=3.6
- conda-forge::biopython==1.72
- conda-forge::pandas==1.2.4
- conda-forge::progressbar==2.5
- conda-forge::openpyxl==3.0.7
- conda-forge::plotly==5.0.0
- conda-forge::numpy==1.20.3
- bioconda::prokka>=1.14
- bioconda::blast>=2.9
- bioconda::mash>=2
- bioconda::prodigal=2.6.3

- pip:
- biopython==1.72
- pandas==1.2.4
- progressbar==2.5
- openpyxl==3.0.7
- plotly==5.0.0
- numpy==1.20.3
2 changes: 1 addition & 1 deletion logging_config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ class=handlers.RotatingFileHandler
level=NOTSET
## args(log_file_name, 'a', maxBytes , backupCount)
#args=('Programas/taranis_b/logs/taranis.log','a',500000,5)
args=("./taranis.log",'a',500000,5)
args=("taranis.log",'a',500000,5)
formatter=logfileformatter
Empty file modified reference_alleles.py
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion taranis.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def check_arg (args=None) :
default = 100)
distance_matrix_parser.add_argument('-sample_missing_threshold', required = False,
help = 'Missing values percentage threshold above which samples are excluded for distance matrix creation. Default is 100.',
default = 100)
default = 20)
distance_matrix_parser.add_argument('-paralog_filter', required = False,
help = 'Consider paralog tags (NIPH, NIPHEM) as missing values. Default is True',
default = True)
Expand Down
140 changes: 140 additions & 0 deletions test/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/bin/bash --login

# Exit immediately if a pipeline, which may consist of a single simple command, a list,
#or a compound command returns a non-zero status: If errors are not handled by user
set -e
# Treat unset variables and parameters other than the special parameters ‘@’ or ‘*’ as an error when performing parameter expansion.

#Print everything as if it were executed, after substitution and expansion is applied: Debug|log option
#set -x

#=============================================================
# HEADER
#=============================================================

#INSTITUTION:ISCIII
#CENTRE:BU-ISCIII
#
#ACKNOLEDGE: longops2getops.sh: https://gist.github.com/adamhotep/895cebf290e95e613c006afbffef09d7
#
#DESCRIPTION: test.sh uses test data for testing taranis installation.
#
#
#================================================================
# END_OF_HEADER
#================================================================

#SHORT USAGE RULES
#LONG USAGE FUNCTION
usage() {
cat << EOF
plasmidID is a computational pipeline tha reconstruct and annotate the most likely plasmids present in one sample
usage : $0
-v | --version version
-h | --help display usage message
example: ./test.sh
EOF
}

#================================================================
# OPTION_PROCESSING
#================================================================
# Error handling
error(){
local parent_lineno="$1"
local script="$2"
local message="$3"
local code="${4:-1}"

RED='\033[0;31m'
NC='\033[0m'

if [[ -n "$message" ]] ; then
echo -e "\n---------------------------------------\n"
echo -e "${RED}ERROR${NC} in Script $script on or near line ${parent_lineno}; exiting with status ${code}"
echo -e "MESSAGE:\n"
echo -e "$message"
echo -e "\n---------------------------------------\n"
else
echo -e "\n---------------------------------------\n"
echo -e "${RED}ERROR${NC} in Script $script on or near line ${parent_lineno}; exiting with status ${code}"
echo -e "\n---------------------------------------\n"
fi

exit "${code}"
}

# translate long options to short
reset=true
for arg in "$@"
do
if [ -n "$reset" ]; then
unset reset
set -- # this resets the "$@" array so we can rebuild it
fi
case "$arg" in
--help) set -- "$@" -h ;;
--version) set -- "$@" -v ;;
# pass through anything else
*) set -- "$@" "$arg" ;;
esac
done

#DECLARE FLAGS AND VARIABLES
script_dir=$(dirname $(readlink -f $0))
assemblies="./samples_listeria/"
schema="./MLST_listeria/"
profile="./profile_MLST_listeria/profiles_csv.csv"
refgenome="./reference_listeria/GCF_002213505.1_ASM221350v1_genomic.fna"

#PARSE VARIABLE ARGUMENTS WITH getops
#common example with letters, for long options check longopts2getopts.sh
options=":1:2:d:s:g:c:a:i:o:C:S:f:l:L:T:M:X:y:Y:RVtvh"
while getopts $options opt; do
case $opt in
h )
usage
exit 1
;;
v )
echo $VERSION
exit 1
;;
\?)
echo "Invalid Option: -$OPTARG" 1>&2
usage
exit 1
;;
: )
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
* )
echo "Unimplemented option: -$OPTARG" >&2;
exit 1
;;

esac
done
shift $((OPTIND-1))

## Execute plasmidID with test data.
echo "Executing:../taranis.py allele_calling -coregenedir $schema -inputdir $assemblies -refgenome $refgenome -outputdir allele_calling_test -percentlength 20 -refalleles $refallele -profile $profile"
echo "Assemblies: $assemblies"
echo "Schema: $schema"
echo "$PWD"
cd
$script_dir/../taranis.py analyze_schema -inputdir $script_dir/MLST_listeria -outputdir analyze_schema_test

$script_dir/../taranis.py reference_alleles -coregenedir $script_dir/MLST_listeria -outputdir reference_alleles_test

$script_dir/../taranis.py allele_calling -coregenedir $script_dir/$schema -inputdir $script_dir/$assemblies -refgenome $script_dir/$refgenome -outputdir allele_calling_test -percentlength 20 -refalleles reference_alleles_test -profile $script_dir/$profile

$script_dir/../taranis.py distance_matrix -alleles_matrix allele_calling_test/result.tsv -outputdir distance_matrix_test

echo "ALL DONE. TEST COMPLETED SUCCESSFULLY YOUR INSTALLATION SHOULD BE CORRECT."
Loading

0 comments on commit 278d07c

Please sign in to comment.