Skip to content

Commit

Permalink
Merge pull request #5 from dparks1134/dev
Browse files Browse the repository at this point in the history
Better handling of genomes causing Prodigal or HMMER exceptions; use …
  • Loading branch information
donovan-h-parks authored Jan 4, 2022
2 parents 2720198 + 114f7a7 commit 1aad2f2
Show file tree
Hide file tree
Showing 6 changed files with 220 additions and 180 deletions.
4 changes: 4 additions & 0 deletions unitem/VERSION
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
1.0.4
- better handling of genomes that fail with Prodigal or HMMER
- make use of previously calculated results for calling genes and identifying marker genes

1.0.3
- fix bug with progress indicator for identifying Pfam and TIGRfam markers

Expand Down
19 changes: 16 additions & 3 deletions unitem/external/pfam_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@
PFAM_OUT = '_pfam.out'


class PfamException(Exception):
"""Exception for errors associated with running HMMER with Pfam marker genes."""

def __init__(self, message=''):
Exception.__init__(self, message)


class PfamSearch(object):
"""Runs pfam_search.pl over a set of genomes."""

Expand Down Expand Up @@ -116,6 +123,12 @@ def _worker(self, gene_dir, output_dir, queue_in, queue_out):
output_hit_file = os.path.join(gene_dir, filename.replace(AA_GENE_FILE_SUFFIX,
PFAM_SUFFIX))

if os.path.exists(output_hit_file):
# use previously calculated results
queue_out.put(gene_file)
continue

# identify Pfam marker genes
pfam_scan = PfamScan(cpu=self.cpus_per_genome,
fasta=gene_file,
dir=self.marker_dir)
Expand Down Expand Up @@ -183,10 +196,10 @@ def run(self, gene_files, gene_dir, output_dir):

for p in worker_proc:
p.join()

if p.exitcode != 0:
self.logger.error(
'An error was encountered while running hmmsearch.')
sys.exit(1)
raise PfamException(
'HMMER returned non-zero exit code.')

writer_queue.put(None)
write_proc.join()
Expand Down
26 changes: 17 additions & 9 deletions unitem/external/prodigal.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,23 @@ def version():
def _run_prodigal(self, gid, genome_file):
"""Run Prodigal."""

# check if genome has previously been processed
trans_table_file = os.path.join(
self.output_dir,
f'{gid}_translation_table.tsv')
aa_gene_file = os.path.join(
self.output_dir, f'{gid}{AA_GENE_FILE_SUFFIX}')
nt_gene_file = os.path.join(
self.output_dir, f'{gid}{NT_GENE_FILE_SUFFIX}')
gff_file = os.path.join(self.output_dir, f'{gid}.gff')

if (os.path.exists(trans_table_file)
and os.path.exists(aa_gene_file)
and os.path.exists(nt_gene_file)
and os.path.exists(gff_file)):
# use previously calculated results
return aa_gene_file, nt_gene_file, gff_file, trans_table_file

seqs = read_fasta(genome_file)

if len(seqs) == 0:
Expand Down Expand Up @@ -144,9 +161,6 @@ def _run_prodigal(self, gid, genome_file):
table_coding_density[trans_table] = codingDensity

# determine best translation table
trans_table_file = os.path.join(
self.output_dir,
f'{gid}_translation_table.tsv')
trans_table_out = open(trans_table_file, 'w')
trans_table_out.write('Table\tCoding density\n')
trans_table_out.write(f'4\t{table_coding_density[4]:.3f}\n')
Expand All @@ -159,12 +173,6 @@ def _run_prodigal(self, gid, genome_file):
trans_table_out.write(f'\nSelected table\t{best_tt}\n')

# copy results for best translation table to desired output files
aa_gene_file = os.path.join(
self.output_dir, f'{gid}{AA_GENE_FILE_SUFFIX}')
nt_gene_file = os.path.join(
self.output_dir, f'{gid}{NT_GENE_FILE_SUFFIX}')
gff_file = os.path.join(self.output_dir, f'{gid}.gff')

best_tt_dir = os.path.join(tmp_dir, str(best_tt))
shutil.copyfile(os.path.join(
best_tt_dir, f'{gid}{AA_GENE_FILE_SUFFIX}'), aa_gene_file)
Expand Down
28 changes: 20 additions & 8 deletions unitem/external/tigrfam_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@
TIGRFAM_OUT = '_tigrfam.out'


class TigrfamException(Exception):
"""Exception for errors associated with running HMMER with TIGRfam marker genes."""

def __init__(self, message=''):
Exception.__init__(self, message)


class TigrfamSearch(object):
"""Runs TIGRFAM HMMs over a set of genomes."""

Expand Down Expand Up @@ -143,6 +150,12 @@ def _worker(self, gene_dir, output_dir, queue_in, queue_out):
hmmsearch_out = os.path.join(gene_dir, filename.replace(AA_GENE_FILE_SUFFIX,
TIGRFAM_OUT))

if os.path.exists(output_hit_file) and os.path.exists(hmmsearch_out):
# use previously calculated results
queue_out.put(gene_file)
continue

# identify TIGRfam marker genes
cmd = ['hmmsearch', '--noali', '--notextw', '--cut_nc',
'-o', hmmsearch_out,
'--tblout', output_hit_file,
Expand All @@ -154,7 +167,9 @@ def _worker(self, gene_dir, output_dir, queue_in, queue_out):
if p.returncode != 0:
self.logger.error(
f'Non-zero exit code returned when running hmmsearch: {stderr}')
sys.exit(1)
self.logger.error(f'Skipping {filename}.')

return None

# identify top hit for each gene
self._top_hit(output_hit_file, output_dir)
Expand Down Expand Up @@ -218,15 +233,12 @@ def run(self, gene_files, gene_dir, output_dir):
for p in worker_proc:
p.join()

if p.exitcode != 0:
raise TigrfamException(
'HMMER returned non-zero exit code.')

writer_queue.put(None)
write_proc.join()

for proc in worker_proc:
if proc.exitcode != 0:
self.logger.error(
'An error was encountered while running hmmsearch.')
sys.exit(1)

except Exception:
for p in worker_proc:
p.terminate()
Expand Down
Loading

0 comments on commit 1aad2f2

Please sign in to comment.