Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Acribbs committed Jul 23, 2021
2 parents c79d251 + 9dcc08e commit f7ee682
Show file tree
Hide file tree
Showing 31 changed files with 291 additions and 729 deletions.
696 changes: 0 additions & 696 deletions cgat/Plots/VariantPlots.py

This file was deleted.

Empty file removed cgat/Plots/__init__.py
Empty file.
38 changes: 18 additions & 20 deletions cgat/SequenceProperties.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@

from cgat import Genomics as Genomics

import Bio.Alphabet.IUPAC


class SequenceProperties(object):
"""Base class.
Expand Down Expand Up @@ -223,7 +221,7 @@ def __init__(self, reference_usage=[]):
self.mCountsOthers = 0
# counts of nucleotides
self.mCountsNA = {}
self.mAlphabet = Bio.Alphabet.IUPAC.unambiguous_dna.letters + "N"
self.mAlphabet = "GATCN"
for x in self.mAlphabet:
self.mCountsNA[x] = 0

Expand Down Expand Up @@ -313,7 +311,7 @@ def __init__(self, reference_usage=[]):
SequenceProperties.__init__(self)
self.mCountsDinuc = {}
self.mCountsOthers = 0
self.mAlphabet = Bio.Alphabet.IUPAC.unambiguous_dna.letters
self.mAlphabet = "GATC"
for dinucleotide in itertools.product(self.mAlphabet, repeat=2):
self.mCountsDinuc["".join(dinucleotide)] = 0

Expand Down Expand Up @@ -555,7 +553,7 @@ def __init__(self):
xx = []
for y in range(5):
yy = {}
for z in Bio.Alphabet.IUPAC.extended_dna.letters:
for z in "GATCBDSW":
yy[z] = 0
xx.append(yy)
self.mCountsDegeneracy.append(xx)
Expand All @@ -567,7 +565,7 @@ def addProperties(self, other):

for x in (0, 1, 2):
for y in range(5):
for z in Bio.Alphabet.IUPAC.extended_dna.letters:
for z in "GATCBDSW":
self.mCountsDegeneracy[x][y][
z] += other.mCountsDegeneracy[x][y][z]

Expand Down Expand Up @@ -603,7 +601,7 @@ def loadSequence(self, sequence, seqtype="na"):
xx = []
for y in range(5):
yy = {}
for z in Bio.Alphabet.IUPAC.extended_dna.letters:
for z in "GATCBDSW":
yy[z] = 0
xx.append(yy)
self.mCountsDegeneracy.append(xx)
Expand Down Expand Up @@ -759,7 +757,7 @@ def __init__(self, reference_usage=[]):

# counts of amino acids
self.mCountsAA = {}
for x in Bio.Alphabet.IUPAC.extended_protein.letters:
for x in "ACDEFGHIKLMNPQRSTVWYBXZJUO":
self.mCountsAA[x] = 0

def addProperties(self, other):
Expand All @@ -781,7 +779,7 @@ def loadSequence(self, sequence, seqtype="na"):
# counts of amino acids
self.mCountsAA = {}

for x in Bio.Alphabet.IUPAC.extended_protein.letters:
for x in "ACDEFGHIKLMNPQRSTVWYBXZJUO":
self.mCountsAA[x] = 0

for codon in (sequence[x:x + 3] for x in range(0, len(sequence), 3)):
Expand All @@ -792,19 +790,19 @@ def getFields(self):

fields = SequenceProperties.getFields(self)
t = 0
for x in Bio.Alphabet.IUPAC.extended_protein.letters:
for x in "ACDEFGHIKLMNPQRSTVWYBXZJUO":
fields.append("%i" % self.mCountsAA[x])
t += self.mCountsAA[x]
for x in Bio.Alphabet.IUPAC.extended_protein.letters:
for x in "ACDEFGHIKLMNPQRSTVWYBXZJUO":
fields.append("%f" % (float(self.mCountsAA[x]) / t))
return fields

def getHeaders(self):
'''Return list of data headers'''
headers = SequenceProperties.getHeaders(self)
for x in Bio.Alphabet.IUPAC.extended_protein.letters:
for x in "ACDEFGHIKLMNPQRSTVWYBXZJUO":
headers.append("n%s" % x)
for x in Bio.Alphabet.IUPAC.extended_protein.letters:
for x in "ACDEFGHIKLMNPQRSTVWYBXZJUO":
headers.append("p%s" % x)
return headers

Expand All @@ -824,7 +822,7 @@ def __init__(self, reference_usage=[]):

# counts of amino acids
self.mCountsAA = {}
for x in Bio.Alphabet.IUPAC.extended_protein.letters:
for x in "ACDEFGHIKLMNPQRSTVWYBXZJUO":
self.mCountsAA[x] = 0
self.mOtherCounts = 0

Expand All @@ -840,7 +838,7 @@ def loadSequence(self, sequence, seqtype="na"):
SequenceProperties.loadSequence(self, sequence, seqtype)

# set to zero
for x in Bio.Alphabet.IUPAC.extended_protein.letters:
for x in "ACDEFGHIKLMNPQRSTVWYBXZJUO":
self.mCountsAA[x] = 0
self.mOtherCounts = 0

Expand All @@ -858,25 +856,25 @@ def getFields(self):

t = 0

for x in Bio.Alphabet.IUPAC.extended_protein.letters:
for x in "ACDEFGHIKLMNPQRSTVWYBXZJUO":
fields.append("%i" % self.mCountsAA[x])
t += self.mCountsAA[x]

if t > 0:
for x in Bio.Alphabet.IUPAC.extended_protein.letters:
for x in "ACDEFGHIKLMNPQRSTVWYBXZJUO":
fields.append("%f" % (float(self.mCountsAA[x]) / t))
else:
for x in Bio.Alphabet.IUPAC.extended_protein.letters:
for x in "ACDEFGHIKLMNPQRSTVWYBXZJUO":
fields.append("0")

return fields

def getHeaders(self):

fields = SequenceProperties.getHeaders(self)
for x in Bio.Alphabet.IUPAC.extended_protein.letters:
for x in "ACDEFGHIKLMNPQRSTVWYBXZJUO":
fields.append("n%s" % x)
for x in Bio.Alphabet.IUPAC.extended_protein.letters:
for x in "ACDEFGHIKLMNPQRSTVWYBXZJUO":
fields.append("p%s" % x)

return fields
Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/bed2bed.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,8 @@ def main(argv=sys.argv):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

# IMS: new method: extend intervals by set amount
parser.add_argument("-m", "--method", dest="methods", type=str,
action="append",
Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/bed2fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def main(argv=None):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument("-g", "--genome-file", dest="genome_file", type=str,
help="filename with genomic sequence to retrieve "
"sequences from.")
Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/bed2gff.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ def main(argv=sys.argv):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument("-a", "--as-gtf", dest="as_gtf", action="store_true",
help="output as gtf.")

Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/bed2stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ def main(argv=None):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument(
"-g", "--genome-file", dest="genome_file", type=str,
help="filename with genome.")
Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/bed2table.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,8 @@ def main(argv=None):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument("-g", "--genome-file", dest="genome_file", type=str,
help="filename with genome.")

Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/csv_cut.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ def main(argv=None):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument("-r", "--remove", dest="remove", action="store_true",
help="remove specified columns, keep all others.")

Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/data2histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def main(argv=None):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument("-r", "--range", dest="range", type=str,
help="range to calculate histogram for.")
parser.add_argument("-b", "--bin-size", dest="bin_size", type=str,
Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/gff2bed.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ def main(argv=sys.argv):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument("--is-gtf", dest="is_gtf", action="store_true",
help="input file is in gtf format")

Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/gff2fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ def main(argv=None):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument("--is-gtf", dest="is_gtf", action="store_true",
help="input is gtf instead of gff.")

Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/gff2psl.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ def main(argv=None):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument("--is-gtf", dest="is_gtf", action="store_true",
help="input is gtf.")

Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/gff32gtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ def main(argv=None):
# setup command line parser
parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument("-m", "--method", dest="method", type=str, action="store",
choices=("hierarchy", "set-field", "set-pattern", "set-none"),
help="Method to use for conversion")
Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/gtf2table.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,8 @@ def main(argv=None):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument("-g", "--genome-file", dest="genome_file", type=str,
help="filename with genome.")

Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/randomize_lines.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ def main(argv=None):
# setup command line parser
parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument("-k", "--keep-header", dest="keep_header", type=int,
help="randomize, but keep header in place ")

Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/split_gff.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ def main(argv=None):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument(
"-i", "--min-chunk-size", dest="min_chunk_size", type=int,
help="minimum chunk size.")
Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/table2stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ def main(argv=None):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument(
"-d", "--delimiter", dest="delimiter", type=str,
help="delimiter to separate columns ")
Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/tables2table.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ def main(argv=sys.argv):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument("-t", "--no-titles",
dest="input_has_titles",
action="store_false",
Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/transfac2transfac.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ def main(argv=None):
# setup command line parser
parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument(
"-f", "--filter-prefix", dest="filter_prefix", default=None,
help="ID prefix to filter on, eg. V for vertebrates")
Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/vcf2tsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,8 @@ def main(argv=None):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument(
"-s", "--sample-size", dest="sample_size", type=float,
help="sample size. If less than 0, take a proportion of the chromosome size. "
Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/vcf_compare_phase.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ def main(argv=sys.argv):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument(
"-i", "--input-vcf", dest="input_vcf_file", type=str,
help="input vcf file")
Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/vcf_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ def main(argv=sys.argv):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument(
"-i", "--input-vcf", dest="input_vcf_file", type=str,
help="input vcf file")
Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/vcf_vs_vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ def main(argv=None):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument(
"--regex-filename", dest="regex_filename", type=str,
help="extract column name from filename via regular expression "
Expand Down
2 changes: 2 additions & 0 deletions cgat/tools/wig2bed.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ def main(argv=sys.argv):

parser = E.ArgumentParser(description=__doc__)

parser.add_argument("--version", action='version', version="1.0")

parser.add_argument("-m", "--method", dest="methods", type=str,
action="append",
choices=("threshold", "stddev-above-mean",
Expand Down
16 changes: 3 additions & 13 deletions conda/environments/cgat-apps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,44 +13,34 @@ dependencies:
# python dependencies
- python
- alignlib-lite
- biopython<=1.77
- biopython
- cython
- cgatcore
- jinja2
- cgatcore
- matplotlib
- nose
- numpy
- pandas
- pep8
- pybedtools
- pybigwig
- pysam=0.15.2
- python-lzo
- pysam
- pyyaml
- pytest<=5.4.3
- quicksect
- rdflib
- scikit-learn
- scipy==1.5.3
- seaborn
- setuptools
- six
- sortedcontainers
# R dependencies
# Misc dependencies
# WARNING: gdc-client is Py2 only. Please install it on a separate conda env
- bedtools
- coreutils
- grep
- htslib
- libpng
- samtools
- sra-tools
- tar
- ucsc-bedgraphtobigwig
- ucsc-wigtobigwig
# does not exist on OSX
# - util-linux
- wget
- zlib
- nomkl
Binary file not shown.
Binary file not shown.
Binary file modified tests/data/small.bam
Binary file not shown.
Binary file modified tests/data/small.bam.bai
Binary file not shown.
Loading

0 comments on commit f7ee682

Please sign in to comment.