Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

run 2to3 and manual convert for Python3 #9

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 20 additions & 20 deletions Pangloss.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
import sys
import multiprocessing as mp
from Bio.Data.CodonTable import TranslationError
from ConfigParser import SafeConfigParser
from configparser import SafeConfigParser
from datetime import datetime
from argparse import ArgumentParser
from glob import glob
Expand All @@ -105,7 +105,7 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path,
gm_path = GeneMark-ES path.
tp_path = TransDecoder.Predict path.
tl_path = TransDecoder.LongOrfs path.

Arguments taken from Gene_model_prediction section of config file as follows:
genomelist = List of strain genomes specified by genomes_list.
workdir = Working directory for prediction given by prediction_dir.
Expand All @@ -127,7 +127,7 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path,
# Generate list of genomes from user-provided genome list file.
logging.info("Master: Parsing genome list.")
genomes = [line.strip("\n") for line in open(genomelist)]

# Create working directory if not present.
logging.info("Master: Building working directory for gene model prediction.")
PanGuess.MakeWorkingDir(workdir)
Expand All @@ -136,7 +136,7 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path,
if not skip:
logging.info("Master: Building working directory for gene model prediction.")
PanGuess.BuildRefSet(workdir, ref)

# Loop over each genome and carry out gene model prediction.
for genome in genomes:
# Make tag from genome name (assuming genome name is in the format STRAIN.fna).
Expand All @@ -150,27 +150,27 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path,
# Run prediction using Exonerate.
cmds = PanGuess.BuildExonerateCmds(workdir, ex_path, genome)
exonerate_genes = PanGuess.RunExonerate(cmds, cores)

# Order gene models predicted via Exonerate by Contig ID: Location.
logging.info("Master: Sorting gene model predictions by genomic location.")
exonerate_genes.sort(key=lambda x: (x.contig_id, x.locs[0]))

# Extract genomic attributes from Exonerate gene model set.
exonerate_attributes = PanGuess.GetExonerateAttributes(exonerate_genes, tag)

else:
logging.info("Master: Skipping gene model prediction via Exonerate (--no_exonerate enabled).")
exonerate_genes = None
exonerate_attributes = None

# Run prediction using GeneMark-ES.
logging.info("Master: Running gene model prediction for {0} using GeneMark-ES.".format(genome))
genemark_gtf = PanGuess.RunGeneMark(genome, gm_path, gm_branch, cores)

# Convert GeneMark-ES GTF file into a more PanOCT-compatible version.
logging.info("Master: Converting GeneMark GTF data to attribute data.")
genemark_attributes = PanGuess.GeneMarkGTFConverter(genemark_gtf, tag)

# Merge unique gene model calls between Exonerate and GeneMark-ES.
if not skip:
logging.info("Master: Merging Exonerate and GeneMark-ES gene calls.")
Expand All @@ -182,31 +182,31 @@ def PanGuessHandler(ex_path, gm_path, tp_path, tl_path,
# Clean up GeneMark-ES files and folders.
logging.info("Master: Tidying up GeneMark-ES temporary files.")
PanGuess.MoveGeneMarkFiles(workdir, genome)

# Extract NCRs into list.
logging.info("Master: Extracting non-coding regions of {0} for TransDecoder analysis.".format(genome))
noncoding = PanGuess.ExtractNCR(merged_attributes, genome)

# Run TransDecoder on NCRs.
logging.info("Master: Running TransDecoder on non-coding regions of {0}.".format(genome))
tdir = PanGuess.RunTransDecoder(noncoding, tp_path, tl_path, workdir, genome, td_len)

# Move TransDecoder files.
logging.info("Master: Tidying up TransDecoder temporary files.")
PanGuess.MoveTransDecoderFiles(tdir)

# Extract TransDecoder attributes.
logging.info("Master: Converting TransDecoder GTF data to attribute data.")
trans_attributes = PanGuess.TransDecoderGTFToAttributes(tdir, tag)

# Merge TransDecoder calls into the Exonerate + GeneMark-ES set.
logging.info("Master: Merging all remmaining gene calls for {0}.".format(genome))
full_attributes = PanGuess.MergeAttributes(merged_attributes, trans_attributes)

# Write out gene set, protein set and attributes set.
logging.info("Master: Writing out datasets for {0}.".format(genome))
PanGuess.ConstructGeneModelSets(full_attributes, exonerate_genes, workdir, genome, tag)

# Compress temporary folders and finish up.
#logging.info("Master: Compressing temporary folders for {0}.".format(genome))
#PanGuess.TarballGenePredictionDirs(workdir, genome)
Expand Down Expand Up @@ -331,7 +331,7 @@ def PAMLHandler(ml_path, yn_path, refine=False):
try:
trans_seqs = PAML.TranslateCDS(cluster)
except TranslationError as e:
print "{0}, {1} has unusual frameshift mutation and can't be run through yn00.".format(e, cluster)
print("{0}, {1} has unusual frameshift mutation and can't be run through yn00.".format(e, cluster))
trans_seqs = None
if trans_seqs:
prot_alignment = PAML.MUSCLEAlign(ml_path, trans_seqs)
Expand Down Expand Up @@ -525,9 +525,9 @@ def main():
if ap.pred or ap.pred_only:
in_date = CheckGeneMarkLicence(start_time)
if not in_date:
print "Your 400-day GeneMark-ES license is out of date and hence PanGloss can't predict genes." \
print("Your 400-day GeneMark-ES license is out of date and hence PanGloss can't predict genes." \
"Go to http://exon.gatech.edu/GeneMark/gmes_instructions.html to download a new license key," \
"and place it in your home folder under the name .gm_key. Exiting out of Pangloss."
"and place it in your home folder under the name .gm_key. Exiting out of Pangloss.")
exit(0)
panguess_args = [ex_path, gm_path, tp_path, tl_path]
logging.info("Master: Performing gene prediction steps using PanGuess.")
Expand Down Expand Up @@ -597,8 +597,8 @@ def main():
# If enabled, run InterProScan analysis on entire dataset.
if ap.ips:
if not sys.platform.startswith("linux"):
print "InterProScan is not supported on non-Linux operating systems. Cannot run InterProScan analysis."
print "See https://github.com/ebi-pf-team/interproscan/wiki for more information."
print("InterProScan is not supported on non-Linux operating systems. Cannot run InterProScan analysis.")
print("See https://github.com/ebi-pf-team/interproscan/wiki for more information.")
pass
else:
IPSHandler(ip_path)
Expand Down
6 changes: 3 additions & 3 deletions Pangloss/BLASTAll.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
BLASTAll: Module for handling parallelized all-vs.-all BLASTp searches, if enabled by user.
"""

import cStringIO
import io
import logging
import multiprocessing as mp
import subprocess as sp

from Bio import SeqIO, SearchIO

from Tools import StringBLAST
from .Tools import StringBLAST


def BLASTAll(cores=None):
Expand Down Expand Up @@ -52,7 +52,7 @@ def MergeBLASTsAndWrite(results):
# Filter last two lines of each BLASTp result and join remaining lines together, making one big SearchIO object.
logging.info("BLASTAll: Merging all-vs.-all results together and parsing into tabular format.")
merged = "\n".join((["\n".join(result.split("\n")[:-2]) for result in results if result]))
parsed = SearchIO.parse(cStringIO.StringIO(merged), "blast-tab", comments=True)
parsed = SearchIO.parse(io.StringIO(merged), "blast-tab", comments=True)

# Write merged BLASTp results to file for PanOCT.
logging.info("BLASTAll: Writing BLASTp results to file panoct.blast.")
Expand Down
4 changes: 2 additions & 2 deletions Pangloss/BUSCO.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import shutil
import subprocess as sp

from Tools import TryMkDirs
from .Tools import TryMkDirs


def RunBUSCO(buscopath, lineagepath, gene_sets):
Expand All @@ -17,6 +17,6 @@ def RunBUSCO(buscopath, lineagepath, gene_sets):
for gene_set in gene_sets:
wd = gene_set.split("/")[-1]
cmd = [buscopath, "-i", gene_set, "-l", lineagepath, "-o", "{0}.busco".format(wd), "-m", "prot"]
print "Running BUSCO"
print("Running BUSCO")
sp.call(cmd)
shutil.move("run_{0}.busco".format(wd), bdir)
2 changes: 1 addition & 1 deletion Pangloss/ExonerateGene.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __init__(self, string):
prot.append(seq)
if "*" in record.seq[:-1]:
stop = True
cds_region = filter(lambda x: len(x) == 3, fragment.aln_annotation["hit_annotation"])
cds_region = [x for x in fragment.aln_annotation["hit_annotation"] if len(x) == 3]
nucl.append(str("".join(cds_region)))

# Populate attributes.
Expand Down
6 changes: 3 additions & 3 deletions Pangloss/GO.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import subprocess as sp
from csv import reader

from Tools import Flatten, ParseMatchtable, TryMkDirs
from .Tools import Flatten, ParseMatchtable, TryMkDirs


def MakeWorkingDirs():
Expand Down Expand Up @@ -62,8 +62,8 @@ def GeneratePopulations(annos, matchtable):
Write out background (full) population and study (core, accessory) population files for use in GOATools.
"""
core, acc = ParseMatchtable(matchtable)
c_pop = [val for val in Flatten(core.values()) if val in annos]
a_pop = [val for val in Flatten(acc.values()) if val in annos]
c_pop = [val for val in Flatten(list(core.values())) if val in annos]
a_pop = [val for val in Flatten(list(acc.values())) if val in annos]
full_pop = c_pop + a_pop
with open("go/core_pop.txt", "w") as cp_file, open("go/acc_pop.txt", "w") as ap_file,\
open("go/full_pop.txt", "w") as fp_file:
Expand Down
16 changes: 8 additions & 8 deletions Pangloss/Karyotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from Bio import SeqIO

from Tools import Flatten, ParseMatchtable, ParseKaryotypes, TryMkDirs
from .Tools import Flatten, ParseMatchtable, ParseKaryotypes, TryMkDirs


def GenerateContigLengths(genomes):
Expand Down Expand Up @@ -48,19 +48,19 @@ def GenerateKaryotypeFiles(attributes, matchtable):

for row in attread:
karyo = [row[0], row[1], row[2], row[3]]
core_gms = Flatten(core.values())
acc_gms = Flatten(acc.values())
total = len(core.values()[0])
core_gms = Flatten(list(core.values()))
acc_gms = Flatten(list(acc.values()))
total = len(list(core.values())[0])
if row[1] in core_gms:
number = core_gms.index(row[1]) / total
cluster = core.values()[number]
ortho = len(filter(lambda x: x is not None, cluster))
cluster = list(core.values())[number]
ortho = len([x for x in cluster if x is not None])
karyo = karyo + ["core", row[5], str(ortho)]
karyotype.append(karyo)
elif row[1] in acc_gms:
number = acc_gms.index(row[1]) / total
cluster = acc.values()[number]
ortho = len(filter(lambda x: x is not None, cluster))
cluster = list(acc.values())[number]
ortho = len([x for x in cluster if x is not None])
karyo = karyo + ["acc", row[5], str(ortho)]
karyotype.append(karyo)
else:
Expand Down
10 changes: 5 additions & 5 deletions Pangloss/PAML.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
PAML: Module for handling yn00 selection analysis (and maybe CodeML in the future), if enabled by user.
"""

import cStringIO
import io
import os

from Bio import AlignIO, SeqIO
from Bio.Phylo.PAML import yn00
from Bio.Phylo.PAML._paml import PamlError
from glob import glob

from Tools import StringMUSCLE, Untranslate
from .Tools import StringMUSCLE, Untranslate


def TranslateCDS(seqs):
Expand All @@ -32,7 +32,7 @@ def MUSCLEAlign(ml_path, seqs):
Align translated nucleotides in StringMUSCLE, return parsed alignment.
"""
output = StringMUSCLE(ml_path, seqs)
return AlignIO.parse(cStringIO.StringIO(output), "fasta")
return AlignIO.parse(io.StringIO(output), "fasta")


def PutGaps(alignment, cluster):
Expand All @@ -53,7 +53,7 @@ def PutGaps(alignment, cluster):
unseq.id = seq.id.split("|")[0]
nucl_aln += (">{0}\n{1}\n".format(unseq.id, unseq.seq))

fas_aln = AlignIO.read(cStringIO.StringIO(nucl_aln), "fasta")
fas_aln = AlignIO.read(io.StringIO(nucl_aln), "fasta")
AlignIO.write(fas_aln, "{0}.aln".format(cluster), "phylip-sequential")

return "{0}.aln".format(cluster)
Expand All @@ -68,7 +68,7 @@ def RunYn00(yn_path, alignment):
try:
yn.run(ctl_file=None, command=yn_path, parse=False)
except PamlError as e:
print "{0}, {1} may have internal stop codons.".format(e, alignment)
print("{0}, {1} may have internal stop codons.".format(e, alignment))
pass


Expand Down
12 changes: 6 additions & 6 deletions Pangloss/PanGuess.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
Maynooth University in 2017-2019 (Charley.McCarthy@nuim.ie).
"""

from __future__ import division


import logging
import multiprocessing as mp
Expand All @@ -62,7 +62,7 @@
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

from Tools import ExonerateCmdLine, LocationOverlap, Pairwise, TryMkDirs # get_gene_lengths
from .Tools import ExonerateCmdLine, LocationOverlap, Pairwise, TryMkDirs # get_gene_lengths


def LengthOverlap(gene, ref_lengths):
Expand Down Expand Up @@ -288,7 +288,7 @@ def ExtractNCR(attributes, genome):

# Loop over every contig/chromosome in the genome.
for seq in db:
coding = filter(lambda x: x[0] == seq.id, attributes)
coding = [x for x in attributes if x[0] == seq.id]
for gene, next_gene in Pairwise(coding):
if coding.index(gene) == 0:
if gene[2] != 0:
Expand Down Expand Up @@ -377,11 +377,11 @@ def TransDecoderGTFToAttributes(tdir, tag):
if row:
if len(row) == 9:
contig_id = re.match(cregex, row[0]).group()[:-5]
global_locs = map(int, row[0].split("_")[-2:])
global_locs = list(map(int, row[0].split("_")[-2:]))
if row[2] == "exon":
exon_count = exon_count + 1
if row[2] == "CDS":
relative_locs = map(int, row[3:5])
relative_locs = list(map(int, row[3:5]))
start = global_locs[0] + relative_locs[0] - 1
stop = global_locs[0] + relative_locs[1] - 1
locs = [start, stop]
Expand Down Expand Up @@ -447,7 +447,7 @@ def ConstructGeneModelSets(attributes, exonerate_genes, workdir, genome, tag):
prot_models.append(prot_seq)
nucl_models.append(nucl_seq)
if gene[4].startswith("Exonerate"):
match = filter(lambda x: x.id == gene[1], exonerate_genes)
match = [x for x in exonerate_genes if x.id == gene[1]]
prot_seq = SeqRecord(Seq(match[0].prot), id=match[0].id)
nucl_seq = SeqRecord(Seq(match[0].nucl), id=match[0].id)
prot_seq.id = "{0}|{1}".format(tag, prot_seq.id)
Expand Down
Loading