From 0bfea2bec5d6dadb9814180433d7caeec877de25 Mon Sep 17 00:00:00 2001 From: darcy220606 Date: Wed, 13 Mar 2024 10:53:45 +0100 Subject: [PATCH] changes for 0.2.1 --- CHANGES.txt | 5 +++++ LICENSE.txt | 2 +- README.md | 42 +++++++++++++++++------------------ ampcombi/ampcombi.py | 13 ++++++----- ampcombi/diamond_alignment.sh | 2 +- ampcombi/reformat_tables.py | 2 +- ampcombi/version.py | 2 +- setup.py | 12 +++++----- 8 files changed, 43 insertions(+), 37 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index e48fec2..9361558 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -43,3 +43,8 @@ v<0.2.0>, <09.02.2024> -- Added the submodule to predict signaling peptide using [SignalP-6.0h](https://services.healthtech.dtu.dk/services/SignalP-6.0/) -- Removed the HTML output for the complete summary and replaced it with shiny for python app that can be viewed using the commandline -- Updated AMPcombi to use subcommands for ease of use with standardized and portable piplines +v<0.2.1>, <14.03.2024> + -- Fixed the package versions in setup + -- Adjusted readme for installation setup + -- Fixed the `./temp` dir removal step at the end of the process + -- Changed the default matrix in diamond blastp to PAM250 diff --git a/LICENSE.txt b/LICENSE.txt index 8f9f76b..2d180c7 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022 Anan Ibrahim +Copyright (c) 2024 Anan Ibrahim Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index b300f42..99a29cb 100644 --- a/README.md +++ b/README.md @@ -66,32 +66,24 @@ AMPcombi and its submodules provide a command-line interface to parse the result To install AMPcombi: -First, install the dependencies of the tool ⬇ then carry on with the AMPcombi tool installation -- `python` > 3.0 -- `biopython` -- `pandas` -- `diamond` -- `mmseqs2` == 15.6f -- `shiny` - -Then, install AMPcombi using: - - - pip installation +- Using **conda**: ``` -pip install AMPcombi +conda create -n ampcombi python==3.11 diamond==2.0.15 mmseqs==15.6f452 ampcombi ``` - - git repository - ``` - git clone https://github.com/Darcy220606/AMPcombi.git - ``` - - conda +or ``` -conda env create -f ampcombi/environment.yml +conda env create -f ./ampcombi/environment.yml ``` -or + +- Using **singularity and docker**: ``` - conda install -c bioconda AMPcombi + ``` + +- From git repository: + ``` + git clone https://github.com/Darcy220606/AMPcombi.git + ``` --- ## Usage and Output: @@ -385,7 +377,7 @@ The user can upload the `Ampcombi_summary_cluster_SP.tsv` to generate tables and --- -## References for tools and databases used in AMPcombi: +## References for tools, packages and databases used in AMPcombi: - Steinegger M and Soeding J. MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets. Nature Biotechnology, doi: 10.1038/nbt.3988 (2017). @@ -401,6 +393,14 @@ The user can upload the `Ampcombi_summary_cluster_SP.tsv` to generate tables and - Shi G., Kang X., Dong F., Liu Y., Zhu N., Hu Y., Xu H., Lao X., Zheng H., DRAMP 3.0: an enhanced comprehensive data repository of antimicrobial peptides, Nucleic Acids Research, 50,D1, doi: 10.1093/nar/gkab651 (2022). +- The Shiny development team, Shiny for Python, https://shiny.posit.co/py/, license:https://github.com/posit-dev/py-shiny/blob/main/LICENSE v.0.8.0 + +- Inc., P. T. Collaborative data science. Montreal, QC: Plotly Technologies Inc. Retrieved from https://plot.ly (2015) + +- Upsetplot, https://github.com/jnothman/UpSetPlot license: https://github.com/jnothman/UpSetPlot/blob/master/LICENSE v.0.9.0 + +- py3Dmol, https://github.com/avirshup/py3dmol license: https://github.com/avirshup/py3dmol/blob/master/LICENSE.txt + --- ## Contribution: diff --git a/ampcombi/ampcombi.py b/ampcombi/ampcombi.py index d0f64f7..5b107a2 100644 --- a/ampcombi/ampcombi.py +++ b/ampcombi/ampcombi.py @@ -263,8 +263,6 @@ def parse_tables(args): sample_summary_df = pd.merge(summary_df, diamond_df, on = 'contig_id', how='left') # Insert column with sample name on position 0 sample_summary_df.insert(0, 'name', samplelist[i]) - # Remove the temp directory - # shutil.rmtree('./temp') # Estimate the aa functions: chemical and physical print(f'The estimation of functional and structural properties for {samplelist[i]} in progress ....') sample_summary_df_functions = functionality(sample_summary_df) @@ -287,7 +285,9 @@ def parse_tables(args): sample_summary_df = sample_summary_df.drop_duplicates() # Write sample summary into sample output folder sample_summary_df.to_csv(samplelist[i] +'/'+samplelist[i]+'_ampcombi.tsv', sep='\t', index=False) - print(f'The summary file for {samplelist[i]} was saved to {samplelist[i]}/.') + print(f'The summary file for {samplelist[i]} was saved to {samplelist[i]}.tsv.') + # Remove the temp directory + # shutil.rmtree('./temp') # Write the log file in the respective sample directory shutil.move(f'{samplelist[i]}_ampcombi.log', samplelist[i] + '/' + samplelist[i]+'_ampcombi.log') else: @@ -337,8 +337,11 @@ def parse_tables(args): sample_summary_df = sample_summary_df.drop_duplicates() # Write sample summary into sample output folder sample_summary_df.to_csv(samplelist[i] +'/'+samplelist[i]+'_ampcombi.tsv', sep='\t', index=False) - print(f'The summary file for {samplelist[i]} was saved to {samplelist[i]}/.tsv') - + print(f'The summary file for {samplelist[i]} was saved to {samplelist[i]}.tsv') + + # Remove the temp directory + shutil.rmtree('./temp') + ######################################### # FUNCTION : CONCATENATING ######################################### diff --git a/ampcombi/diamond_alignment.sh b/ampcombi/diamond_alignment.sh index e0bd5b4..b83ba4c 100755 --- a/ampcombi/diamond_alignment.sh +++ b/ampcombi/diamond_alignment.sh @@ -22,7 +22,7 @@ P=$THREADS diamond blastp \ -p $P -d $REF_DB/amp_ref -q $IN --quiet \ ---outfmt 6 qseqid sseqid pident evalue nident full_qseq full_sseq qseq sseq qcovhsp scovhsp --max-target-seqs 1 --ultra-sensitive -e10000 --id2 1 -s1 -c1 --masking 0 --gapped-filter-evalue 0 --algo 0 --min-score 0 --shape-mask 1111 \ +--outfmt 6 qseqid sseqid pident evalue nident full_qseq full_sseq qseq sseq qcovhsp scovhsp --max-target-seqs 1 --ultra-sensitive -e10000 --id2 1 -s1 -c1 --masking 0 --gapped-filter-evalue 0 --matrix PAM250 --algo 0 --min-score 0 --shape-mask 1111 \ -o $OUT/diamond_matches.txt echo -e "contig_id\ttarget_id\tpident\tevalue\tnident\tfull_qseq\tfull_sseq\tqseq\tsseq\tqcovhsp\tscovhsp" | cat - $OUT/diamond_matches.txt > $OUT/diamond_matches.tsv diff --git a/ampcombi/reformat_tables.py b/ampcombi/reformat_tables.py index 5ad8f3f..a619ed6 100755 --- a/ampcombi/reformat_tables.py +++ b/ampcombi/reformat_tables.py @@ -221,7 +221,7 @@ def summary(df_list, samplename, faa_path, aa_len): merge_df['p_sum']= merge_df.sum(axis=1)#.sort_values(ascending=False) merge_df = merge_df.sort_values('p_sum', ascending=False).drop(['p_sum', 'aa_lengths'], axis=1).reset_index() # cleanup remove temp dir - shutil.rmtree('./temp') + # shutil.rmtree('./temp') return merge_df ######################################### diff --git a/ampcombi/version.py b/ampcombi/version.py index a9fdc5c..fb13a35 100644 --- a/ampcombi/version.py +++ b/ampcombi/version.py @@ -1 +1 @@ -__version__ = '0.2.0' \ No newline at end of file +__version__ = '0.2.1' \ No newline at end of file diff --git a/setup.py b/setup.py index 5c6b472..bcad290 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='AMPcombi', - version='0.2.0', + version='0.2.1', author='Anan Ibrahim, Louisa Perelo', author_email='ananhamido@hotmail.com, louperelo@gmail.com', packages=['ampcombi'], @@ -32,9 +32,10 @@ long_description_content_type='text/markdown', keywords=["Proteomics", "Antimicrobial peptides", "Diamond", "MMSeqs2" "Standardization", "Formatting","Functional annotation"], - install_requires=['pandas', - 'biopython', + install_requires=['pandas==1.5.2', + 'biopython==1.80', 'requests'], + python_requires='==3.11.*', entry_points={ 'console_scripts': [ 'ampcombi = ampcombi:main', @@ -48,10 +49,7 @@ "Operating System :: MacOS", "Operating System :: POSIX", "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.11", "Intended Audience :: Science/Research", "Intended Audience :: Healthcare Industry", "Topic :: Scientific/Engineering :: Bio-Informatics",