From fa04e9810f8ab9c166e21242a4919dab956cb928 Mon Sep 17 00:00:00 2001
From: Jason Stajich <jason.stajich@gmail.com>
Date: Mon, 21 Sep 2020 08:52:00 -0700
Subject: [PATCH] run 2to3 and manual convert for Python3

---
 Pangloss.py               | 40 +++++++++++++++++++--------------------
 Pangloss/BLASTAll.py      |  6 +++---
 Pangloss/BUSCO.py         |  4 ++--
 Pangloss/ExonerateGene.py |  2 +-
 Pangloss/GO.py            |  6 +++---
 Pangloss/Karyotype.py     | 16 ++++++++--------
 Pangloss/PAML.py          | 10 +++++-----
 Pangloss/PanGuess.py      | 12 ++++++------
 Pangloss/PanOCT.py        | 28 +++++++++++++--------------
 Pangloss/QualityCheck.py  | 16 ++++++++--------
 Pangloss/Size.py          |  4 ++--
 Pangloss/Tools.py         | 16 ++++++++--------
 12 files changed, 80 insertions(+), 80 deletions(-)

diff --git a/Pangloss.py b/Pangloss.py
index 1f3a855..1e230d4 100755
--- a/Pangloss.py
+++ b/Pangloss.py
@@ -86,7 +86,7 @@
 import sys
 import multiprocessing as mp
 from Bio.Data.CodonTable import TranslationError
-from ConfigParser import SafeConfigParser
+from configparser import SafeConfigParser
 from datetime import datetime
 from argparse import ArgumentParser
 from glob import glob
@@ -105,7 +105,7 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path,
         gm_path      = GeneMark-ES path.
         tp_path      = TransDecoder.Predict path.
         tl_path      = TransDecoder.LongOrfs path.
-    
+
     Arguments taken from Gene_model_prediction section of config file as follows:
         genomelist   = List of strain genomes specified by genomes_list.
         workdir      = Working directory for prediction given by prediction_dir.
@@ -127,7 +127,7 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path,
     # Generate list of genomes from user-provided genome list file.
     logging.info("Master: Parsing genome list.")
     genomes = [line.strip("\n") for line in open(genomelist)]
-    
+
     # Create working directory if not present.
     logging.info("Master: Building working directory for gene model prediction.")
     PanGuess.MakeWorkingDir(workdir)
@@ -136,7 +136,7 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path,
     if not skip:
         logging.info("Master: Building working directory for gene model prediction.")
         PanGuess.BuildRefSet(workdir, ref)
-    
+
     # Loop over each genome and carry out gene model prediction.
     for genome in genomes:
         # Make tag from genome name (assuming genome name is in the format STRAIN.fna).
@@ -150,11 +150,11 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path,
             # Run prediction using Exonerate.
             cmds = PanGuess.BuildExonerateCmds(workdir, ex_path, genome)
             exonerate_genes = PanGuess.RunExonerate(cmds, cores)
-        
+
             # Order gene models predicted via Exonerate by Contig ID: Location.
             logging.info("Master: Sorting gene model predictions by genomic location.")
             exonerate_genes.sort(key=lambda x: (x.contig_id, x.locs[0]))
-        
+
             # Extract genomic attributes from Exonerate gene model set.
             exonerate_attributes = PanGuess.GetExonerateAttributes(exonerate_genes, tag)
 
@@ -162,15 +162,15 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path,
             logging.info("Master: Skipping gene model prediction via Exonerate (--no_exonerate enabled).")
             exonerate_genes = None
             exonerate_attributes = None
-        
+
         # Run prediction using GeneMark-ES.
         logging.info("Master: Running gene model prediction for {0} using GeneMark-ES.".format(genome))
         genemark_gtf = PanGuess.RunGeneMark(genome, gm_path, gm_branch, cores)
-        
+
         # Convert GeneMark-ES GTF file into a more PanOCT-compatible version.
         logging.info("Master: Converting GeneMark GTF data to attribute data.")
         genemark_attributes = PanGuess.GeneMarkGTFConverter(genemark_gtf, tag)
-        
+
         # Merge unique gene model calls between Exonerate and GeneMark-ES.
         if not skip:
             logging.info("Master: Merging Exonerate and GeneMark-ES gene calls.")
@@ -182,19 +182,19 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path,
         # Clean up GeneMark-ES files and folders.
         logging.info("Master: Tidying up GeneMark-ES temporary files.")
         PanGuess.MoveGeneMarkFiles(workdir, genome)
-        
+
         # Extract NCRs into list.
         logging.info("Master: Extracting non-coding regions of {0} for TransDecoder analysis.".format(genome))
         noncoding = PanGuess.ExtractNCR(merged_attributes, genome)
-        
+
         # Run TransDecoder on NCRs.
         logging.info("Master: Running TransDecoder on non-coding regions of {0}.".format(genome))
         tdir = PanGuess.RunTransDecoder(noncoding, tp_path, tl_path, workdir, genome, td_len)
-        
+
         # Move TransDecoder files.
         logging.info("Master: Tidying up TransDecoder temporary files.")
         PanGuess.MoveTransDecoderFiles(tdir)
-        
+
         # Extract TransDecoder attributes.
         logging.info("Master: Converting TransDecoder GTF data to attribute data.")
         trans_attributes = PanGuess.TransDecoderGTFToAttributes(tdir, tag)
@@ -202,11 +202,11 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path,
         # Merge TransDecoder calls into the Exonerate + GeneMark-ES set.
         logging.info("Master: Merging all remmaining gene calls for {0}.".format(genome))
         full_attributes = PanGuess.MergeAttributes(merged_attributes, trans_attributes)
-        
+
         # Write out gene set, protein set and attributes set.
         logging.info("Master: Writing out datasets for {0}.".format(genome))
         PanGuess.ConstructGeneModelSets(full_attributes, exonerate_genes, workdir, genome, tag)
-        
+
         # Compress temporary folders and finish up.
         #logging.info("Master: Compressing temporary folders for {0}.".format(genome))
         #PanGuess.TarballGenePredictionDirs(workdir, genome)
@@ -331,7 +331,7 @@ def PAMLHandler(ml_path, yn_path, refine=False):
         try:
             trans_seqs = PAML.TranslateCDS(cluster)
         except TranslationError as e:
-            print "{0}, {1} has unusual frameshift mutation and can't be run through yn00.".format(e, cluster)
+            print("{0}, {1} has unusual frameshift mutation and can't be run through yn00.".format(e, cluster))
             trans_seqs = None
         if trans_seqs:
             prot_alignment = PAML.MUSCLEAlign(ml_path, trans_seqs)
@@ -525,9 +525,9 @@ def main():
     if ap.pred or ap.pred_only:
         in_date = CheckGeneMarkLicence(start_time)
         if not in_date:
-            print "Your 400-day GeneMark-ES license is out of date and hence PanGloss can't predict genes." \
+            print("Your 400-day GeneMark-ES license is out of date and hence PanGloss can't predict genes." \
                   "Go to http://exon.gatech.edu/GeneMark/gmes_instructions.html to download a new license key," \
-                  "and place it in your home folder under the name .gm_key. Exiting out of Pangloss."
+                  "and place it in your home folder under the name .gm_key. Exiting out of Pangloss.")
             exit(0)
         panguess_args = [ex_path, gm_path, tp_path, tl_path]
         logging.info("Master: Performing gene prediction steps using PanGuess.")
@@ -597,8 +597,8 @@ def main():
     # If enabled, run InterProScan analysis on entire dataset.
     if ap.ips:
         if not sys.platform.startswith("linux"):
-            print "InterProScan is not supported on non-Linux operating systems. Cannot run InterProScan analysis."
-            print "See https://github.com/ebi-pf-team/interproscan/wiki for more information."
+            print("InterProScan is not supported on non-Linux operating systems. Cannot run InterProScan analysis.")
+            print("See https://github.com/ebi-pf-team/interproscan/wiki for more information.")
             pass
         else:
             IPSHandler(ip_path)
diff --git a/Pangloss/BLASTAll.py b/Pangloss/BLASTAll.py
index 7d975fd..c96b31b 100644
--- a/Pangloss/BLASTAll.py
+++ b/Pangloss/BLASTAll.py
@@ -3,14 +3,14 @@
 BLASTAll: Module for handling parallelized all-vs.-all BLASTp searches, if enabled by user.
 """
 
-import cStringIO
+import io
 import logging
 import multiprocessing as mp
 import subprocess as sp
 
 from Bio import SeqIO, SearchIO
 
-from Tools import StringBLAST
+from .Tools import StringBLAST
 
 
 def BLASTAll(cores=None):
@@ -52,7 +52,7 @@ def MergeBLASTsAndWrite(results):
     # Filter last two lines of each BLASTp result and join remaining lines together, making one big SearchIO object.
     logging.info("BLASTAll: Merging all-vs.-all results together and parsing into tabular format.")
     merged = "\n".join((["\n".join(result.split("\n")[:-2]) for result in results if result]))
-    parsed = SearchIO.parse(cStringIO.StringIO(merged), "blast-tab", comments=True)
+    parsed = SearchIO.parse(io.StringIO(merged), "blast-tab", comments=True)
 
     # Write merged BLASTp results to file for PanOCT.
     logging.info("BLASTAll: Writing BLASTp results to file panoct.blast.")
diff --git a/Pangloss/BUSCO.py b/Pangloss/BUSCO.py
index a573ff1..cc4c343 100644
--- a/Pangloss/BUSCO.py
+++ b/Pangloss/BUSCO.py
@@ -2,7 +2,7 @@
 import shutil
 import subprocess as sp
 
-from Tools import TryMkDirs
+from .Tools import TryMkDirs
 
 
 def RunBUSCO(buscopath, lineagepath, gene_sets):
@@ -17,6 +17,6 @@ def RunBUSCO(buscopath, lineagepath, gene_sets):
     for gene_set in gene_sets:
         wd = gene_set.split("/")[-1]
         cmd = [buscopath, "-i", gene_set,  "-l", lineagepath, "-o", "{0}.busco".format(wd), "-m", "prot"]
-        print "Running BUSCO"
+        print("Running BUSCO")
         sp.call(cmd)
         shutil.move("run_{0}.busco".format(wd), bdir)
\ No newline at end of file
diff --git a/Pangloss/ExonerateGene.py b/Pangloss/ExonerateGene.py
index 7356d44..a77a91a 100644
--- a/Pangloss/ExonerateGene.py
+++ b/Pangloss/ExonerateGene.py
@@ -60,7 +60,7 @@ def __init__(self, string):
                                 prot.append(seq)
                             if "*" in record.seq[:-1]:
                                 stop = True
-                    cds_region = filter(lambda x: len(x) == 3, fragment.aln_annotation["hit_annotation"])
+                    cds_region = [x for x in fragment.aln_annotation["hit_annotation"] if len(x) == 3]
                     nucl.append(str("".join(cds_region)))
             
             # Populate attributes.
diff --git a/Pangloss/GO.py b/Pangloss/GO.py
index a81c57f..2dcbb93 100644
--- a/Pangloss/GO.py
+++ b/Pangloss/GO.py
@@ -7,7 +7,7 @@
 import subprocess as sp
 from csv import reader
 
-from Tools import Flatten, ParseMatchtable, TryMkDirs
+from .Tools import Flatten, ParseMatchtable, TryMkDirs
 
 
 def MakeWorkingDirs():
@@ -62,8 +62,8 @@ def GeneratePopulations(annos, matchtable):
     Write out background (full) population and study (core, accessory) population files for use in GOATools.
     """
     core, acc = ParseMatchtable(matchtable)
-    c_pop = [val for val in Flatten(core.values()) if val in annos]
-    a_pop = [val for val in Flatten(acc.values()) if val in annos]
+    c_pop = [val for val in Flatten(list(core.values())) if val in annos]
+    a_pop = [val for val in Flatten(list(acc.values())) if val in annos]
     full_pop = c_pop + a_pop
     with open("go/core_pop.txt", "w") as cp_file, open("go/acc_pop.txt", "w") as ap_file,\
          open("go/full_pop.txt", "w") as fp_file:
diff --git a/Pangloss/Karyotype.py b/Pangloss/Karyotype.py
index 2f16980..c6ac7b5 100644
--- a/Pangloss/Karyotype.py
+++ b/Pangloss/Karyotype.py
@@ -16,7 +16,7 @@
 
 from Bio import SeqIO
 
-from Tools import Flatten, ParseMatchtable, ParseKaryotypes, TryMkDirs
+from .Tools import Flatten, ParseMatchtable, ParseKaryotypes, TryMkDirs
 
 
 def GenerateContigLengths(genomes):
@@ -48,19 +48,19 @@ def GenerateKaryotypeFiles(attributes, matchtable):
 
     for row in attread:
         karyo = [row[0], row[1], row[2], row[3]]
-        core_gms = Flatten(core.values())
-        acc_gms = Flatten(acc.values())
-        total = len(core.values()[0])
+        core_gms = Flatten(list(core.values()))
+        acc_gms = Flatten(list(acc.values()))
+        total = len(list(core.values())[0])
         if row[1] in core_gms:
             number = core_gms.index(row[1]) / total
-            cluster = core.values()[number]
-            ortho = len(filter(lambda x: x is not None, cluster))
+            cluster = list(core.values())[number]
+            ortho = len([x for x in cluster if x is not None])
             karyo = karyo + ["core", row[5], str(ortho)]
             karyotype.append(karyo)
         elif row[1] in acc_gms:
             number = acc_gms.index(row[1]) / total
-            cluster = acc.values()[number]
-            ortho = len(filter(lambda x: x is not None, cluster))
+            cluster = list(acc.values())[number]
+            ortho = len([x for x in cluster if x is not None])
             karyo = karyo + ["acc", row[5], str(ortho)]
             karyotype.append(karyo)
         else:
diff --git a/Pangloss/PAML.py b/Pangloss/PAML.py
index ad99c2a..62c04c4 100644
--- a/Pangloss/PAML.py
+++ b/Pangloss/PAML.py
@@ -3,7 +3,7 @@
 PAML: Module for handling yn00 selection analysis (and maybe CodeML in the future), if enabled by user.
 """
 
-import cStringIO
+import io
 import os
 
 from Bio import AlignIO, SeqIO
@@ -11,7 +11,7 @@
 from Bio.Phylo.PAML._paml import PamlError
 from glob import glob
 
-from Tools import StringMUSCLE, Untranslate
+from .Tools import StringMUSCLE, Untranslate
 
 
 def TranslateCDS(seqs):
@@ -32,7 +32,7 @@ def MUSCLEAlign(ml_path, seqs):
     Align translated nucleotides in StringMUSCLE, return parsed alignment.
     """
     output = StringMUSCLE(ml_path, seqs)
-    return AlignIO.parse(cStringIO.StringIO(output), "fasta")
+    return AlignIO.parse(io.StringIO(output), "fasta")
 
 
 def PutGaps(alignment, cluster):
@@ -53,7 +53,7 @@ def PutGaps(alignment, cluster):
             unseq.id = seq.id.split("|")[0]
             nucl_aln += (">{0}\n{1}\n".format(unseq.id, unseq.seq))
 
-    fas_aln = AlignIO.read(cStringIO.StringIO(nucl_aln), "fasta")
+    fas_aln = AlignIO.read(io.StringIO(nucl_aln), "fasta")
     AlignIO.write(fas_aln, "{0}.aln".format(cluster), "phylip-sequential")
 
     return "{0}.aln".format(cluster)
@@ -68,7 +68,7 @@ def RunYn00(yn_path, alignment):
     try:
         yn.run(ctl_file=None, command=yn_path, parse=False)
     except PamlError as e:
-        print "{0}, {1} may have internal stop codons.".format(e, alignment)
+        print("{0}, {1} may have internal stop codons.".format(e, alignment))
         pass
 
 
diff --git a/Pangloss/PanGuess.py b/Pangloss/PanGuess.py
index 9f93926..d8f9dc0 100644
--- a/Pangloss/PanGuess.py
+++ b/Pangloss/PanGuess.py
@@ -45,7 +45,7 @@
 Maynooth University in 2017-2019 (Charley.McCarthy@nuim.ie).
 """
 
-from __future__ import division
+
 
 import logging
 import multiprocessing as mp
@@ -62,7 +62,7 @@
 from Bio.Seq import Seq
 from Bio.SeqRecord import SeqRecord
 
-from Tools import ExonerateCmdLine, LocationOverlap, Pairwise, TryMkDirs  # get_gene_lengths
+from .Tools import ExonerateCmdLine, LocationOverlap, Pairwise, TryMkDirs  # get_gene_lengths
 
 
 def LengthOverlap(gene, ref_lengths):
@@ -288,7 +288,7 @@ def ExtractNCR(attributes, genome):
 
     # Loop over every contig/chromosome in the genome.
     for seq in db:
-        coding = filter(lambda x: x[0] == seq.id, attributes)
+        coding = [x for x in attributes if x[0] == seq.id]
         for gene, next_gene in Pairwise(coding):
             if coding.index(gene) == 0:
                 if gene[2] != 0:
@@ -377,11 +377,11 @@ def TransDecoderGTFToAttributes(tdir, tag):
             if row:
                 if len(row) == 9:
                     contig_id = re.match(cregex, row[0]).group()[:-5]
-                    global_locs = map(int, row[0].split("_")[-2:])
+                    global_locs = list(map(int, row[0].split("_")[-2:]))
                     if row[2] == "exon":
                         exon_count = exon_count + 1
                     if row[2] == "CDS":
-                        relative_locs = map(int, row[3:5])
+                        relative_locs = list(map(int, row[3:5]))
                         start = global_locs[0] + relative_locs[0] - 1
                         stop = global_locs[0] + relative_locs[1] - 1
                         locs = [start, stop]
@@ -447,7 +447,7 @@ def ConstructGeneModelSets(attributes, exonerate_genes, workdir, genome, tag):
             prot_models.append(prot_seq)
             nucl_models.append(nucl_seq)
         if gene[4].startswith("Exonerate"):
-            match = filter(lambda x: x.id == gene[1], exonerate_genes)
+            match = [x for x in exonerate_genes if x.id == gene[1]]
             prot_seq = SeqRecord(Seq(match[0].prot), id=match[0].id)
             nucl_seq = SeqRecord(Seq(match[0].nucl), id=match[0].id)
             prot_seq.id = "{0}|{1}".format(tag, prot_seq.id)
diff --git a/Pangloss/PanOCT.py b/Pangloss/PanOCT.py
index 994853a..ba7c30e 100644
--- a/Pangloss/PanOCT.py
+++ b/Pangloss/PanOCT.py
@@ -8,7 +8,7 @@
 
 from Bio import SeqIO, SearchIO
 
-from Tools import ConcatenateDatasets, ClusterMerge, Flatten, MultipleInsert, ParseMatchtable, \
+from .Tools import ConcatenateDatasets, ClusterMerge, Flatten, MultipleInsert, ParseMatchtable, \
                   QueryClusterFirstHits, Reciprocal, TryMkDirs
 
 def RunPanOCT(fasta_db, attributes, blast, genome_list, **kwargs):
@@ -47,39 +47,39 @@ def FillGaps(blast, matchtable, seqs, tags):
         tags = [line.strip("\n") for line in open(tags)]
 
         # Loop over every accessory cluster.
-        og_acc = acc.keys()
+        og_acc = list(acc.keys())
         ignore = []
         for q_cluster_id in og_acc:
-            print "{0} out of {1} clusters searched".format(og_acc.index(q_cluster_id), len(og_acc))
+            print("{0} out of {1} clusters searched".format(og_acc.index(q_cluster_id), len(og_acc)))
             if q_cluster_id not in ignore:
-                current_acc = [key for key in acc.keys() if key not in ignore]
+                current_acc = [key for key in list(acc.keys()) if key not in ignore]
                 if q_cluster_id in current_acc:
                     q_cluster = acc[q_cluster_id]
                     q_pos = [pos for pos, gene in enumerate(q_cluster) if gene]
                     q_present = set([tags[pos] for pos in q_pos])
-                    q_members = set(sorted(filter(lambda x: x is not None, q_cluster)))
-                    q_missing = set(filter(lambda tag: tag not in q_present, tags))
+                    q_members = set(sorted([x for x in q_cluster if x is not None]))
+                    q_missing = set([tag for tag in tags if tag not in q_present])
                     q_blasts = QueryClusterFirstHits(q_cluster, searches, 30, q_missing)
-                    q_first_hits = set(filter(lambda x: x is not None, Flatten(q_blasts.values())))
+                    q_first_hits = set([x for x in Flatten(list(q_blasts.values())) if x is not None])
                     q_query = MultipleInsert(list(q_first_hits), tags)
-                    if q_query in acc.values():
-                        s_cluster_id = acc.keys()[acc.values().index(q_query)]
+                    if q_query in list(acc.values()):
+                        s_cluster_id = list(acc.keys())[list(acc.values()).index(q_query)]
                         if s_cluster_id not in ignore:
                             s_cluster = acc[s_cluster_id]
-                            s_members = set(sorted(filter(lambda x: x is not None, s_cluster)))
+                            s_members = set(sorted([x for x in s_cluster if x is not None]))
                             if s_members == q_first_hits:
                                 s_present = set([gene.split("|")[0] for gene in s_members])
-                                s_missing = set(filter(lambda tag: tag not in s_present, tags))
+                                s_missing = set([tag for tag in tags if tag not in s_present])
                                 s_blasts = QueryClusterFirstHits(s_cluster, searches, 30, s_missing)
-                                s_first_hits = set(filter(lambda x: x is not None, Flatten(s_blasts.values())))
+                                s_first_hits = set([x for x in Flatten(list(s_blasts.values())) if x is not None])
                                 reciprocal = Reciprocal(q_members, q_first_hits, s_members, s_first_hits)
                                 if reciprocal:
                                     new_cluster = ClusterMerge(q_cluster, s_cluster)
                                     new_clusters[q_cluster_id] = new_cluster
                                     acc.pop(q_cluster_id, "None")
                                     acc.pop(s_cluster_id, "None")
-                                    print "clusters merged: {0} {1}\n".format(str(q_cluster_id), str(s_cluster_id))
-                                    print "size of clusters merged: {0} {1}\n".format(len(q_members), len(s_members))
+                                    print("clusters merged: {0} {1}\n".format(str(q_cluster_id), str(s_cluster_id)))
+                                    print("size of clusters merged: {0} {1}\n".format(len(q_members), len(s_members)))
                                     ignore = ignore + [q_cluster_id, s_cluster_id]
             else:
                 pass
diff --git a/Pangloss/QualityCheck.py b/Pangloss/QualityCheck.py
index 90566f7..eb04cab 100644
--- a/Pangloss/QualityCheck.py
+++ b/Pangloss/QualityCheck.py
@@ -8,9 +8,9 @@
     queries     = Set of genes (protein sequences, in fact) to search against all gene model sets.
 """
 
-from __future__ import division
 
-import cStringIO
+
+import io
 import logging
 import multiprocessing as mp
 import os
@@ -19,7 +19,7 @@
 
 from Bio import SeqIO, SearchIO
 
-from Tools import MakeBLASTDBCmdLine, QCBLASTCmdLine, TryMkDirs
+from .Tools import MakeBLASTDBCmdLine, QCBLASTCmdLine, TryMkDirs
 
 
 def BuildMakeBLASTDBs(gene_sets, cores=None):
@@ -75,7 +75,7 @@ def QCBLAST(queries, sets, cores=None):
 
     # Return list of parsed BLASTp results.
     logging.info("QualityCheck: Parsing QCBLAST results into SeqIO XML objects.")
-    return [SearchIO.parse(cStringIO.StringIO(blast), "blast-xml") for blast in blasts]
+    return [SearchIO.parse(io.StringIO(blast), "blast-xml") for blast in blasts]
 
 
 def RemoveDubiousCalls(results, sets):
@@ -103,7 +103,7 @@ def RemoveDubiousCalls(results, sets):
     for path in sets:
         genome = path.split("/")[-1]
         tag = genome.split(".")[0]
-        tr_strain = filter(lambda x: x.split("|")[0] == tag, to_remove)
+        tr_strain = [x for x in to_remove if x.split("|")[0] == tag]
         if tr_strain:
             aa_path = "./gm_pred/sets/{0}.faa".format(tag)
             nt_path = "./gm_pred/sets/{0}.nucl".format(tag)
@@ -114,9 +114,9 @@ def RemoveDubiousCalls(results, sets):
             to_move = [aa_path, nt_path, at_path]
             TryMkDirs("./gm_pred/sets/old/")
 
-            new_prot = filter(lambda x: x.id not in tr_strain, current_prot)
-            new_nucl = filter(lambda x: x.id not in tr_strain, current_nucl)
-            new_att = filter(lambda x: x[1] not in tr_strain, current_att)
+            new_prot = [x for x in current_prot if x.id not in tr_strain]
+            new_nucl = [x for x in current_nucl if x.id not in tr_strain]
+            new_att = [x for x in current_att if x[1] not in tr_strain]
 
             logging.info("QualityCheck: Removed {0} dubious calls from {1},"
                          " writing remaining calls to new files.".format(len(tr_strain), genome))
diff --git a/Pangloss/Size.py b/Pangloss/Size.py
index 31e8c1b..2bc7446 100644
--- a/Pangloss/Size.py
+++ b/Pangloss/Size.py
@@ -1,4 +1,4 @@
-from __future__ import division
+
 
 """
 Module for generating bar and ring charts.
@@ -6,7 +6,7 @@
 import os
 import sys
 import subprocess as sp
-from Tools import ParseMatchtable, ClusterSizes
+from .Tools import ParseMatchtable, ClusterSizes
 
 def GenerateRingChart(matchtable):
     """
diff --git a/Pangloss/Tools.py b/Pangloss/Tools.py
index e5e749e..403a4cf 100644
--- a/Pangloss/Tools.py
+++ b/Pangloss/Tools.py
@@ -5,19 +5,19 @@
 Functions imported explictly via "from Pangloss.Tools import <name>".
 """
 
-from __future__ import division
 
-import cStringIO
+
+import io
 import datetime
 import os
 import subprocess as sp
 from collections import Counter, OrderedDict as od
 from csv import reader
-from itertools import chain, izip_longest, tee
+from itertools import chain, zip_longest, tee
 
 from Bio import SeqIO, SeqRecord
 
-from ExonerateGene import ExonerateGene
+from .ExonerateGene import ExonerateGene
 
 
 def TryMkDirs(path):
@@ -38,7 +38,7 @@ def Pairwise(iterable):
     """
     a, b = tee(iterable)
     next(b, None)
-    return izip_longest(a, b)  # Allows (line, None) for EOF.
+    return zip_longest(a, b)  # Allows (line, None) for EOF.
 
 
 def Flatten(iterable):
@@ -62,7 +62,7 @@ def ExonerateCmdLine(cmd):
     """
     process = sp.check_output(cmd)
     if "C4 Alignment:" in process:  # Empty results don't contain this line!
-        return ExonerateGene(cStringIO.StringIO(process))
+        return ExonerateGene(io.StringIO(process))
     else:
         pass
 
@@ -269,8 +269,8 @@ def ClusterSizes(component):
     """
     Return counts of cluster sizes within a component.
     """
-    clusters = component.values()
-    counts = [len(filter(lambda x: x is not None, cluster)) for cluster in clusters]
+    clusters = list(component.values())
+    counts = [len([x for x in cluster if x is not None]) for cluster in clusters]
     sizes = Counter(counts)
     return sizes