Merge pull request #95 from Darcy220606/dev

Apply minor bug fixes for v0.2.1
Darcy220606 · Mar 13, 2024 · ac37b6f · ac37b6f
2 parents ce68c7b + 0bfea2b
commit ac37b6f
Show file tree

Hide file tree

Showing 8 changed files with 43 additions and 37 deletions.
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -43,3 +43,8 @@ v<0.2.0>, <09.02.2024>
     -- Added the submodule to predict signaling peptide using [SignalP-6.0h](https://services.healthtech.dtu.dk/services/SignalP-6.0/)
     -- Removed the HTML output for the complete summary and replaced it with  shiny for python app that can be viewed using the commandline
     -- Updated AMPcombi to use subcommands for ease of use with standardized and portable piplines
+v<0.2.1>, <14.03.2024>
+    -- Fixed the package versions in setup
+    -- Adjusted readme for installation setup
+    -- Fixed the `./temp` dir removal step at the end of the process
+    -- Changed the default matrix in diamond blastp to PAM250
diff --git a/LICENSE.txt b/LICENSE.txt
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2022 Anan Ibrahim
+Copyright (c) 2024 Anan Ibrahim
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -66,32 +66,24 @@ AMPcombi and its submodules provide a command-line interface to parse the result
 
 To install AMPcombi:
 
-First, install the dependencies of the tool ⬇ then carry on with the AMPcombi tool installation
-- `python` > 3.0
-- `biopython` 
-- `pandas` 
-- `diamond`
-- `mmseqs2` == 15.6f
-- `shiny`
-
-Then, install AMPcombi using:
-
- - pip installation
+- Using **conda**:
 ```
-pip install AMPcombi
+conda create -n ampcombi python==3.11 diamond==2.0.15 mmseqs==15.6f452 ampcombi
 ```
- - git repository
- ```
- git clone https://github.com/Darcy220606/AMPcombi.git
- ```
- - conda
+or 
 ```
-conda env create -f ampcombi/environment.yml
+conda env create -f ./ampcombi/environment.yml
 ```
-or
+
+- Using **singularity and docker**:
 ```
- conda install -c bioconda AMPcombi
+
 ```
+
+- From git repository:
+ ```
+ git clone https://github.com/Darcy220606/AMPcombi.git
+ ```
 ---
 ## Usage and Output:
 
@@ -385,7 +377,7 @@ The user can upload the `Ampcombi_summary_cluster_SP.tsv` to generate tables and
 
 ---
 
-## References for tools and databases used in AMPcombi:
+## References for tools, packages and databases used in AMPcombi:
 
 - Steinegger M and Soeding J. MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets. Nature Biotechnology, doi: 10.1038/nbt.3988 (2017).
 
@@ -401,6 +393,14 @@ The user can upload the `Ampcombi_summary_cluster_SP.tsv` to generate tables and
 
 - Shi G., Kang X., Dong F., Liu Y., Zhu N., Hu Y., Xu H., Lao X., Zheng H., DRAMP 3.0: an enhanced comprehensive data repository of antimicrobial peptides, Nucleic Acids Research, 50,D1, doi: 10.1093/nar/gkab651 (2022).
 
+- The Shiny development team, Shiny for Python, https://shiny.posit.co/py/, license:https://github.com/posit-dev/py-shiny/blob/main/LICENSE v.0.8.0
+
+- Inc., P. T. Collaborative data science. Montreal, QC: Plotly Technologies Inc. Retrieved from https://plot.ly (2015)
+
+- Upsetplot, https://github.com/jnothman/UpSetPlot license: https://github.com/jnothman/UpSetPlot/blob/master/LICENSE v.0.9.0
+
+- py3Dmol, https://github.com/avirshup/py3dmol license: https://github.com/avirshup/py3dmol/blob/master/LICENSE.txt
+
 ---
 
 ## Contribution:

diff --git a/ampcombi/ampcombi.py b/ampcombi/ampcombi.py
@@ -263,8 +263,6 @@ def parse_tables(args):
                     sample_summary_df = pd.merge(summary_df, diamond_df, on = 'contig_id', how='left')
                     # Insert column with sample name on position 0
                     sample_summary_df.insert(0, 'name', samplelist[i])
-                    # Remove the temp directory
-                    # shutil.rmtree('./temp')
                     # Estimate the aa functions: chemical and physical
                     print(f'The estimation of functional and structural properties for {samplelist[i]} in progress ....')
                     sample_summary_df_functions = functionality(sample_summary_df)
@@ -287,7 +285,9 @@ def parse_tables(args):
                     sample_summary_df = sample_summary_df.drop_duplicates()
                     # Write sample summary into sample output folder
                     sample_summary_df.to_csv(samplelist[i] +'/'+samplelist[i]+'_ampcombi.tsv', sep='\t', index=False)
-                    print(f'The summary file for {samplelist[i]} was saved to {samplelist[i]}/.')
+                    print(f'The summary file for {samplelist[i]} was saved to {samplelist[i]}.tsv.')
+                    # Remove the temp directory
+                    # shutil.rmtree('./temp')
                     # Write the log file in the respective sample directory
                     shutil.move(f'{samplelist[i]}_ampcombi.log', samplelist[i] + '/' + samplelist[i]+'_ampcombi.log')
         else:
@@ -337,8 +337,11 @@ def parse_tables(args):
             sample_summary_df = sample_summary_df.drop_duplicates()
             # Write sample summary into sample output folder
             sample_summary_df.to_csv(samplelist[i] +'/'+samplelist[i]+'_ampcombi.tsv', sep='\t', index=False)
-            print(f'The summary file for {samplelist[i]} was saved to {samplelist[i]}/.tsv')
-
+            print(f'The summary file for {samplelist[i]} was saved to {samplelist[i]}.tsv')
+
+    # Remove the temp directory
+    shutil.rmtree('./temp')
+
 #########################################
 # FUNCTION : CONCATENATING
 ######################################### 

diff --git a/ampcombi/diamond_alignment.sh b/ampcombi/diamond_alignment.sh
@@ -22,7 +22,7 @@ P=$THREADS
 
 diamond blastp \
 -p $P -d $REF_DB/amp_ref -q $IN --quiet \
---outfmt 6 qseqid sseqid pident evalue nident full_qseq full_sseq qseq sseq qcovhsp scovhsp --max-target-seqs 1 --ultra-sensitive -e10000 --id2 1 -s1 -c1 --masking 0 --gapped-filter-evalue 0 --algo 0 --min-score 0 --shape-mask 1111 \
+--outfmt 6 qseqid sseqid pident evalue nident full_qseq full_sseq qseq sseq qcovhsp scovhsp --max-target-seqs 1 --ultra-sensitive -e10000 --id2 1 -s1 -c1 --masking 0 --gapped-filter-evalue 0 --matrix PAM250 --algo 0 --min-score 0 --shape-mask 1111 \
 -o $OUT/diamond_matches.txt
 
 echo -e "contig_id\ttarget_id\tpident\tevalue\tnident\tfull_qseq\tfull_sseq\tqseq\tsseq\tqcovhsp\tscovhsp" | cat - $OUT/diamond_matches.txt > $OUT/diamond_matches.tsv

diff --git a/ampcombi/reformat_tables.py b/ampcombi/reformat_tables.py
@@ -221,7 +221,7 @@ def summary(df_list, samplename, faa_path, aa_len):
     merge_df['p_sum']= merge_df.sum(axis=1)#.sort_values(ascending=False)
     merge_df = merge_df.sort_values('p_sum', ascending=False).drop(['p_sum', 'aa_lengths'], axis=1).reset_index()
     # cleanup remove temp dir
-    shutil.rmtree('./temp')
+    # shutil.rmtree('./temp')
     return merge_df
 
 #########################################

diff --git a/ampcombi/version.py b/ampcombi/version.py
@@ -1 +1 @@
-__version__ = '0.2.0'
+__version__ = '0.2.1'
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name='AMPcombi',
-    version='0.2.0',
+    version='0.2.1',
     author='Anan Ibrahim, Louisa Perelo',
     author_email='ananhamido@hotmail.com, louperelo@gmail.com',
     packages=['ampcombi'],
@@ -32,9 +32,10 @@
     long_description_content_type='text/markdown',
     keywords=["Proteomics", "Antimicrobial peptides", "Diamond", "MMSeqs2"
               "Standardization", "Formatting","Functional annotation"],
-    install_requires=['pandas',
-                      'biopython',
+    install_requires=['pandas==1.5.2',
+                      'biopython==1.80',
                       'requests'],
+    python_requires='==3.11.*',
     entry_points={  
         'console_scripts': [
             'ampcombi = ampcombi:main',
@@ -48,10 +49,7 @@
         "Operating System :: MacOS",
         "Operating System :: POSIX",
         "Operating System :: POSIX :: Linux",
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.7",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.11",
         "Intended Audience :: Science/Research",
         "Intended Audience :: Healthcare Industry",
         "Topic :: Scientific/Engineering :: Bio-Informatics",