From fa04e9810f8ab9c166e21242a4919dab956cb928 Mon Sep 17 00:00:00 2001 From: Jason Stajich Date: Mon, 21 Sep 2020 08:52:00 -0700 Subject: [PATCH] run 2to3 and manual convert for Python3 --- Pangloss.py | 40 +++++++++++++++++++-------------------- Pangloss/BLASTAll.py | 6 +++--- Pangloss/BUSCO.py | 4 ++-- Pangloss/ExonerateGene.py | 2 +- Pangloss/GO.py | 6 +++--- Pangloss/Karyotype.py | 16 ++++++++-------- Pangloss/PAML.py | 10 +++++----- Pangloss/PanGuess.py | 12 ++++++------ Pangloss/PanOCT.py | 28 +++++++++++++-------------- Pangloss/QualityCheck.py | 16 ++++++++-------- Pangloss/Size.py | 4 ++-- Pangloss/Tools.py | 16 ++++++++-------- 12 files changed, 80 insertions(+), 80 deletions(-) diff --git a/Pangloss.py b/Pangloss.py index 1f3a855..1e230d4 100755 --- a/Pangloss.py +++ b/Pangloss.py @@ -86,7 +86,7 @@ import sys import multiprocessing as mp from Bio.Data.CodonTable import TranslationError -from ConfigParser import SafeConfigParser +from configparser import SafeConfigParser from datetime import datetime from argparse import ArgumentParser from glob import glob @@ -105,7 +105,7 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path, gm_path = GeneMark-ES path. tp_path = TransDecoder.Predict path. tl_path = TransDecoder.LongOrfs path. - + Arguments taken from Gene_model_prediction section of config file as follows: genomelist = List of strain genomes specified by genomes_list. workdir = Working directory for prediction given by prediction_dir. @@ -127,7 +127,7 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path, # Generate list of genomes from user-provided genome list file. logging.info("Master: Parsing genome list.") genomes = [line.strip("\n") for line in open(genomelist)] - + # Create working directory if not present. logging.info("Master: Building working directory for gene model prediction.") PanGuess.MakeWorkingDir(workdir) @@ -136,7 +136,7 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path, if not skip: logging.info("Master: Building working directory for gene model prediction.") PanGuess.BuildRefSet(workdir, ref) - + # Loop over each genome and carry out gene model prediction. for genome in genomes: # Make tag from genome name (assuming genome name is in the format STRAIN.fna). @@ -150,11 +150,11 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path, # Run prediction using Exonerate. cmds = PanGuess.BuildExonerateCmds(workdir, ex_path, genome) exonerate_genes = PanGuess.RunExonerate(cmds, cores) - + # Order gene models predicted via Exonerate by Contig ID: Location. logging.info("Master: Sorting gene model predictions by genomic location.") exonerate_genes.sort(key=lambda x: (x.contig_id, x.locs[0])) - + # Extract genomic attributes from Exonerate gene model set. exonerate_attributes = PanGuess.GetExonerateAttributes(exonerate_genes, tag) @@ -162,15 +162,15 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path, logging.info("Master: Skipping gene model prediction via Exonerate (--no_exonerate enabled).") exonerate_genes = None exonerate_attributes = None - + # Run prediction using GeneMark-ES. logging.info("Master: Running gene model prediction for {0} using GeneMark-ES.".format(genome)) genemark_gtf = PanGuess.RunGeneMark(genome, gm_path, gm_branch, cores) - + # Convert GeneMark-ES GTF file into a more PanOCT-compatible version. logging.info("Master: Converting GeneMark GTF data to attribute data.") genemark_attributes = PanGuess.GeneMarkGTFConverter(genemark_gtf, tag) - + # Merge unique gene model calls between Exonerate and GeneMark-ES. if not skip: logging.info("Master: Merging Exonerate and GeneMark-ES gene calls.") @@ -182,19 +182,19 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path, # Clean up GeneMark-ES files and folders. logging.info("Master: Tidying up GeneMark-ES temporary files.") PanGuess.MoveGeneMarkFiles(workdir, genome) - + # Extract NCRs into list. logging.info("Master: Extracting non-coding regions of {0} for TransDecoder analysis.".format(genome)) noncoding = PanGuess.ExtractNCR(merged_attributes, genome) - + # Run TransDecoder on NCRs. logging.info("Master: Running TransDecoder on non-coding regions of {0}.".format(genome)) tdir = PanGuess.RunTransDecoder(noncoding, tp_path, tl_path, workdir, genome, td_len) - + # Move TransDecoder files. logging.info("Master: Tidying up TransDecoder temporary files.") PanGuess.MoveTransDecoderFiles(tdir) - + # Extract TransDecoder attributes. logging.info("Master: Converting TransDecoder GTF data to attribute data.") trans_attributes = PanGuess.TransDecoderGTFToAttributes(tdir, tag) @@ -202,11 +202,11 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path, # Merge TransDecoder calls into the Exonerate + GeneMark-ES set. logging.info("Master: Merging all remmaining gene calls for {0}.".format(genome)) full_attributes = PanGuess.MergeAttributes(merged_attributes, trans_attributes) - + # Write out gene set, protein set and attributes set. logging.info("Master: Writing out datasets for {0}.".format(genome)) PanGuess.ConstructGeneModelSets(full_attributes, exonerate_genes, workdir, genome, tag) - + # Compress temporary folders and finish up. #logging.info("Master: Compressing temporary folders for {0}.".format(genome)) #PanGuess.TarballGenePredictionDirs(workdir, genome) @@ -331,7 +331,7 @@ def PAMLHandler(ml_path, yn_path, refine=False): try: trans_seqs = PAML.TranslateCDS(cluster) except TranslationError as e: - print "{0}, {1} has unusual frameshift mutation and can't be run through yn00.".format(e, cluster) + print("{0}, {1} has unusual frameshift mutation and can't be run through yn00.".format(e, cluster)) trans_seqs = None if trans_seqs: prot_alignment = PAML.MUSCLEAlign(ml_path, trans_seqs) @@ -525,9 +525,9 @@ def main(): if ap.pred or ap.pred_only: in_date = CheckGeneMarkLicence(start_time) if not in_date: - print "Your 400-day GeneMark-ES license is out of date and hence PanGloss can't predict genes." \ + print("Your 400-day GeneMark-ES license is out of date and hence PanGloss can't predict genes." \ "Go to http://exon.gatech.edu/GeneMark/gmes_instructions.html to download a new license key," \ - "and place it in your home folder under the name .gm_key. Exiting out of Pangloss." + "and place it in your home folder under the name .gm_key. Exiting out of Pangloss.") exit(0) panguess_args = [ex_path, gm_path, tp_path, tl_path] logging.info("Master: Performing gene prediction steps using PanGuess.") @@ -597,8 +597,8 @@ def main(): # If enabled, run InterProScan analysis on entire dataset. if ap.ips: if not sys.platform.startswith("linux"): - print "InterProScan is not supported on non-Linux operating systems. Cannot run InterProScan analysis." - print "See https://github.com/ebi-pf-team/interproscan/wiki for more information." + print("InterProScan is not supported on non-Linux operating systems. Cannot run InterProScan analysis.") + print("See https://github.com/ebi-pf-team/interproscan/wiki for more information.") pass else: IPSHandler(ip_path) diff --git a/Pangloss/BLASTAll.py b/Pangloss/BLASTAll.py index 7d975fd..c96b31b 100644 --- a/Pangloss/BLASTAll.py +++ b/Pangloss/BLASTAll.py @@ -3,14 +3,14 @@ BLASTAll: Module for handling parallelized all-vs.-all BLASTp searches, if enabled by user. """ -import cStringIO +import io import logging import multiprocessing as mp import subprocess as sp from Bio import SeqIO, SearchIO -from Tools import StringBLAST +from .Tools import StringBLAST def BLASTAll(cores=None): @@ -52,7 +52,7 @@ def MergeBLASTsAndWrite(results): # Filter last two lines of each BLASTp result and join remaining lines together, making one big SearchIO object. logging.info("BLASTAll: Merging all-vs.-all results together and parsing into tabular format.") merged = "\n".join((["\n".join(result.split("\n")[:-2]) for result in results if result])) - parsed = SearchIO.parse(cStringIO.StringIO(merged), "blast-tab", comments=True) + parsed = SearchIO.parse(io.StringIO(merged), "blast-tab", comments=True) # Write merged BLASTp results to file for PanOCT. logging.info("BLASTAll: Writing BLASTp results to file panoct.blast.") diff --git a/Pangloss/BUSCO.py b/Pangloss/BUSCO.py index a573ff1..cc4c343 100644 --- a/Pangloss/BUSCO.py +++ b/Pangloss/BUSCO.py @@ -2,7 +2,7 @@ import shutil import subprocess as sp -from Tools import TryMkDirs +from .Tools import TryMkDirs def RunBUSCO(buscopath, lineagepath, gene_sets): @@ -17,6 +17,6 @@ def RunBUSCO(buscopath, lineagepath, gene_sets): for gene_set in gene_sets: wd = gene_set.split("/")[-1] cmd = [buscopath, "-i", gene_set, "-l", lineagepath, "-o", "{0}.busco".format(wd), "-m", "prot"] - print "Running BUSCO" + print("Running BUSCO") sp.call(cmd) shutil.move("run_{0}.busco".format(wd), bdir) \ No newline at end of file diff --git a/Pangloss/ExonerateGene.py b/Pangloss/ExonerateGene.py index 7356d44..a77a91a 100644 --- a/Pangloss/ExonerateGene.py +++ b/Pangloss/ExonerateGene.py @@ -60,7 +60,7 @@ def __init__(self, string): prot.append(seq) if "*" in record.seq[:-1]: stop = True - cds_region = filter(lambda x: len(x) == 3, fragment.aln_annotation["hit_annotation"]) + cds_region = [x for x in fragment.aln_annotation["hit_annotation"] if len(x) == 3] nucl.append(str("".join(cds_region))) # Populate attributes. diff --git a/Pangloss/GO.py b/Pangloss/GO.py index a81c57f..2dcbb93 100644 --- a/Pangloss/GO.py +++ b/Pangloss/GO.py @@ -7,7 +7,7 @@ import subprocess as sp from csv import reader -from Tools import Flatten, ParseMatchtable, TryMkDirs +from .Tools import Flatten, ParseMatchtable, TryMkDirs def MakeWorkingDirs(): @@ -62,8 +62,8 @@ def GeneratePopulations(annos, matchtable): Write out background (full) population and study (core, accessory) population files for use in GOATools. """ core, acc = ParseMatchtable(matchtable) - c_pop = [val for val in Flatten(core.values()) if val in annos] - a_pop = [val for val in Flatten(acc.values()) if val in annos] + c_pop = [val for val in Flatten(list(core.values())) if val in annos] + a_pop = [val for val in Flatten(list(acc.values())) if val in annos] full_pop = c_pop + a_pop with open("go/core_pop.txt", "w") as cp_file, open("go/acc_pop.txt", "w") as ap_file,\ open("go/full_pop.txt", "w") as fp_file: diff --git a/Pangloss/Karyotype.py b/Pangloss/Karyotype.py index 2f16980..c6ac7b5 100644 --- a/Pangloss/Karyotype.py +++ b/Pangloss/Karyotype.py @@ -16,7 +16,7 @@ from Bio import SeqIO -from Tools import Flatten, ParseMatchtable, ParseKaryotypes, TryMkDirs +from .Tools import Flatten, ParseMatchtable, ParseKaryotypes, TryMkDirs def GenerateContigLengths(genomes): @@ -48,19 +48,19 @@ def GenerateKaryotypeFiles(attributes, matchtable): for row in attread: karyo = [row[0], row[1], row[2], row[3]] - core_gms = Flatten(core.values()) - acc_gms = Flatten(acc.values()) - total = len(core.values()[0]) + core_gms = Flatten(list(core.values())) + acc_gms = Flatten(list(acc.values())) + total = len(list(core.values())[0]) if row[1] in core_gms: number = core_gms.index(row[1]) / total - cluster = core.values()[number] - ortho = len(filter(lambda x: x is not None, cluster)) + cluster = list(core.values())[number] + ortho = len([x for x in cluster if x is not None]) karyo = karyo + ["core", row[5], str(ortho)] karyotype.append(karyo) elif row[1] in acc_gms: number = acc_gms.index(row[1]) / total - cluster = acc.values()[number] - ortho = len(filter(lambda x: x is not None, cluster)) + cluster = list(acc.values())[number] + ortho = len([x for x in cluster if x is not None]) karyo = karyo + ["acc", row[5], str(ortho)] karyotype.append(karyo) else: diff --git a/Pangloss/PAML.py b/Pangloss/PAML.py index ad99c2a..62c04c4 100644 --- a/Pangloss/PAML.py +++ b/Pangloss/PAML.py @@ -3,7 +3,7 @@ PAML: Module for handling yn00 selection analysis (and maybe CodeML in the future), if enabled by user. """ -import cStringIO +import io import os from Bio import AlignIO, SeqIO @@ -11,7 +11,7 @@ from Bio.Phylo.PAML._paml import PamlError from glob import glob -from Tools import StringMUSCLE, Untranslate +from .Tools import StringMUSCLE, Untranslate def TranslateCDS(seqs): @@ -32,7 +32,7 @@ def MUSCLEAlign(ml_path, seqs): Align translated nucleotides in StringMUSCLE, return parsed alignment. """ output = StringMUSCLE(ml_path, seqs) - return AlignIO.parse(cStringIO.StringIO(output), "fasta") + return AlignIO.parse(io.StringIO(output), "fasta") def PutGaps(alignment, cluster): @@ -53,7 +53,7 @@ def PutGaps(alignment, cluster): unseq.id = seq.id.split("|")[0] nucl_aln += (">{0}\n{1}\n".format(unseq.id, unseq.seq)) - fas_aln = AlignIO.read(cStringIO.StringIO(nucl_aln), "fasta") + fas_aln = AlignIO.read(io.StringIO(nucl_aln), "fasta") AlignIO.write(fas_aln, "{0}.aln".format(cluster), "phylip-sequential") return "{0}.aln".format(cluster) @@ -68,7 +68,7 @@ def RunYn00(yn_path, alignment): try: yn.run(ctl_file=None, command=yn_path, parse=False) except PamlError as e: - print "{0}, {1} may have internal stop codons.".format(e, alignment) + print("{0}, {1} may have internal stop codons.".format(e, alignment)) pass diff --git a/Pangloss/PanGuess.py b/Pangloss/PanGuess.py index 9f93926..d8f9dc0 100644 --- a/Pangloss/PanGuess.py +++ b/Pangloss/PanGuess.py @@ -45,7 +45,7 @@ Maynooth University in 2017-2019 (Charley.McCarthy@nuim.ie). """ -from __future__ import division + import logging import multiprocessing as mp @@ -62,7 +62,7 @@ from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord -from Tools import ExonerateCmdLine, LocationOverlap, Pairwise, TryMkDirs # get_gene_lengths +from .Tools import ExonerateCmdLine, LocationOverlap, Pairwise, TryMkDirs # get_gene_lengths def LengthOverlap(gene, ref_lengths): @@ -288,7 +288,7 @@ def ExtractNCR(attributes, genome): # Loop over every contig/chromosome in the genome. for seq in db: - coding = filter(lambda x: x[0] == seq.id, attributes) + coding = [x for x in attributes if x[0] == seq.id] for gene, next_gene in Pairwise(coding): if coding.index(gene) == 0: if gene[2] != 0: @@ -377,11 +377,11 @@ def TransDecoderGTFToAttributes(tdir, tag): if row: if len(row) == 9: contig_id = re.match(cregex, row[0]).group()[:-5] - global_locs = map(int, row[0].split("_")[-2:]) + global_locs = list(map(int, row[0].split("_")[-2:])) if row[2] == "exon": exon_count = exon_count + 1 if row[2] == "CDS": - relative_locs = map(int, row[3:5]) + relative_locs = list(map(int, row[3:5])) start = global_locs[0] + relative_locs[0] - 1 stop = global_locs[0] + relative_locs[1] - 1 locs = [start, stop] @@ -447,7 +447,7 @@ def ConstructGeneModelSets(attributes, exonerate_genes, workdir, genome, tag): prot_models.append(prot_seq) nucl_models.append(nucl_seq) if gene[4].startswith("Exonerate"): - match = filter(lambda x: x.id == gene[1], exonerate_genes) + match = [x for x in exonerate_genes if x.id == gene[1]] prot_seq = SeqRecord(Seq(match[0].prot), id=match[0].id) nucl_seq = SeqRecord(Seq(match[0].nucl), id=match[0].id) prot_seq.id = "{0}|{1}".format(tag, prot_seq.id) diff --git a/Pangloss/PanOCT.py b/Pangloss/PanOCT.py index 994853a..ba7c30e 100644 --- a/Pangloss/PanOCT.py +++ b/Pangloss/PanOCT.py @@ -8,7 +8,7 @@ from Bio import SeqIO, SearchIO -from Tools import ConcatenateDatasets, ClusterMerge, Flatten, MultipleInsert, ParseMatchtable, \ +from .Tools import ConcatenateDatasets, ClusterMerge, Flatten, MultipleInsert, ParseMatchtable, \ QueryClusterFirstHits, Reciprocal, TryMkDirs def RunPanOCT(fasta_db, attributes, blast, genome_list, **kwargs): @@ -47,39 +47,39 @@ def FillGaps(blast, matchtable, seqs, tags): tags = [line.strip("\n") for line in open(tags)] # Loop over every accessory cluster. - og_acc = acc.keys() + og_acc = list(acc.keys()) ignore = [] for q_cluster_id in og_acc: - print "{0} out of {1} clusters searched".format(og_acc.index(q_cluster_id), len(og_acc)) + print("{0} out of {1} clusters searched".format(og_acc.index(q_cluster_id), len(og_acc))) if q_cluster_id not in ignore: - current_acc = [key for key in acc.keys() if key not in ignore] + current_acc = [key for key in list(acc.keys()) if key not in ignore] if q_cluster_id in current_acc: q_cluster = acc[q_cluster_id] q_pos = [pos for pos, gene in enumerate(q_cluster) if gene] q_present = set([tags[pos] for pos in q_pos]) - q_members = set(sorted(filter(lambda x: x is not None, q_cluster))) - q_missing = set(filter(lambda tag: tag not in q_present, tags)) + q_members = set(sorted([x for x in q_cluster if x is not None])) + q_missing = set([tag for tag in tags if tag not in q_present]) q_blasts = QueryClusterFirstHits(q_cluster, searches, 30, q_missing) - q_first_hits = set(filter(lambda x: x is not None, Flatten(q_blasts.values()))) + q_first_hits = set([x for x in Flatten(list(q_blasts.values())) if x is not None]) q_query = MultipleInsert(list(q_first_hits), tags) - if q_query in acc.values(): - s_cluster_id = acc.keys()[acc.values().index(q_query)] + if q_query in list(acc.values()): + s_cluster_id = list(acc.keys())[list(acc.values()).index(q_query)] if s_cluster_id not in ignore: s_cluster = acc[s_cluster_id] - s_members = set(sorted(filter(lambda x: x is not None, s_cluster))) + s_members = set(sorted([x for x in s_cluster if x is not None])) if s_members == q_first_hits: s_present = set([gene.split("|")[0] for gene in s_members]) - s_missing = set(filter(lambda tag: tag not in s_present, tags)) + s_missing = set([tag for tag in tags if tag not in s_present]) s_blasts = QueryClusterFirstHits(s_cluster, searches, 30, s_missing) - s_first_hits = set(filter(lambda x: x is not None, Flatten(s_blasts.values()))) + s_first_hits = set([x for x in Flatten(list(s_blasts.values())) if x is not None]) reciprocal = Reciprocal(q_members, q_first_hits, s_members, s_first_hits) if reciprocal: new_cluster = ClusterMerge(q_cluster, s_cluster) new_clusters[q_cluster_id] = new_cluster acc.pop(q_cluster_id, "None") acc.pop(s_cluster_id, "None") - print "clusters merged: {0} {1}\n".format(str(q_cluster_id), str(s_cluster_id)) - print "size of clusters merged: {0} {1}\n".format(len(q_members), len(s_members)) + print("clusters merged: {0} {1}\n".format(str(q_cluster_id), str(s_cluster_id))) + print("size of clusters merged: {0} {1}\n".format(len(q_members), len(s_members))) ignore = ignore + [q_cluster_id, s_cluster_id] else: pass diff --git a/Pangloss/QualityCheck.py b/Pangloss/QualityCheck.py index 90566f7..eb04cab 100644 --- a/Pangloss/QualityCheck.py +++ b/Pangloss/QualityCheck.py @@ -8,9 +8,9 @@ queries = Set of genes (protein sequences, in fact) to search against all gene model sets. """ -from __future__ import division -import cStringIO + +import io import logging import multiprocessing as mp import os @@ -19,7 +19,7 @@ from Bio import SeqIO, SearchIO -from Tools import MakeBLASTDBCmdLine, QCBLASTCmdLine, TryMkDirs +from .Tools import MakeBLASTDBCmdLine, QCBLASTCmdLine, TryMkDirs def BuildMakeBLASTDBs(gene_sets, cores=None): @@ -75,7 +75,7 @@ def QCBLAST(queries, sets, cores=None): # Return list of parsed BLASTp results. logging.info("QualityCheck: Parsing QCBLAST results into SeqIO XML objects.") - return [SearchIO.parse(cStringIO.StringIO(blast), "blast-xml") for blast in blasts] + return [SearchIO.parse(io.StringIO(blast), "blast-xml") for blast in blasts] def RemoveDubiousCalls(results, sets): @@ -103,7 +103,7 @@ def RemoveDubiousCalls(results, sets): for path in sets: genome = path.split("/")[-1] tag = genome.split(".")[0] - tr_strain = filter(lambda x: x.split("|")[0] == tag, to_remove) + tr_strain = [x for x in to_remove if x.split("|")[0] == tag] if tr_strain: aa_path = "./gm_pred/sets/{0}.faa".format(tag) nt_path = "./gm_pred/sets/{0}.nucl".format(tag) @@ -114,9 +114,9 @@ def RemoveDubiousCalls(results, sets): to_move = [aa_path, nt_path, at_path] TryMkDirs("./gm_pred/sets/old/") - new_prot = filter(lambda x: x.id not in tr_strain, current_prot) - new_nucl = filter(lambda x: x.id not in tr_strain, current_nucl) - new_att = filter(lambda x: x[1] not in tr_strain, current_att) + new_prot = [x for x in current_prot if x.id not in tr_strain] + new_nucl = [x for x in current_nucl if x.id not in tr_strain] + new_att = [x for x in current_att if x[1] not in tr_strain] logging.info("QualityCheck: Removed {0} dubious calls from {1}," " writing remaining calls to new files.".format(len(tr_strain), genome)) diff --git a/Pangloss/Size.py b/Pangloss/Size.py index 31e8c1b..2bc7446 100644 --- a/Pangloss/Size.py +++ b/Pangloss/Size.py @@ -1,4 +1,4 @@ -from __future__ import division + """ Module for generating bar and ring charts. @@ -6,7 +6,7 @@ import os import sys import subprocess as sp -from Tools import ParseMatchtable, ClusterSizes +from .Tools import ParseMatchtable, ClusterSizes def GenerateRingChart(matchtable): """ diff --git a/Pangloss/Tools.py b/Pangloss/Tools.py index e5e749e..403a4cf 100644 --- a/Pangloss/Tools.py +++ b/Pangloss/Tools.py @@ -5,19 +5,19 @@ Functions imported explictly via "from Pangloss.Tools import ". """ -from __future__ import division -import cStringIO + +import io import datetime import os import subprocess as sp from collections import Counter, OrderedDict as od from csv import reader -from itertools import chain, izip_longest, tee +from itertools import chain, zip_longest, tee from Bio import SeqIO, SeqRecord -from ExonerateGene import ExonerateGene +from .ExonerateGene import ExonerateGene def TryMkDirs(path): @@ -38,7 +38,7 @@ def Pairwise(iterable): """ a, b = tee(iterable) next(b, None) - return izip_longest(a, b) # Allows (line, None) for EOF. + return zip_longest(a, b) # Allows (line, None) for EOF. def Flatten(iterable): @@ -62,7 +62,7 @@ def ExonerateCmdLine(cmd): """ process = sp.check_output(cmd) if "C4 Alignment:" in process: # Empty results don't contain this line! - return ExonerateGene(cStringIO.StringIO(process)) + return ExonerateGene(io.StringIO(process)) else: pass @@ -269,8 +269,8 @@ def ClusterSizes(component): """ Return counts of cluster sizes within a component. """ - clusters = component.values() - counts = [len(filter(lambda x: x is not None, cluster)) for cluster in clusters] + clusters = list(component.values()) + counts = [len([x for x in cluster if x is not None]) for cluster in clusters] sizes = Counter(counts) return sizes