Skip to content

Commit

Permalink
Merge pull request #35 from ding-lab/v0.5.4
Browse files Browse the repository at this point in the history
V0.5.4
  • Loading branch information
fernanda-rodrigues authored Oct 1, 2019
2 parents 9ab6105 + de55b2b commit f8403f7
Show file tree
Hide file tree
Showing 6 changed files with 66,958 additions and 13 deletions.
65,069 changes: 65,069 additions & 0 deletions PanCanAtlasData/MC3.noHypers.mericUnspecified.d10.r20.v114.grch38liftOver.clusters

Large diffs are not rendered by default.

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion charger/autovivification.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# - Fernanda Martins Rodrigues (fernanda@wustl.edu)
# - Jay R. Mashl (rmashl@wustl.edu)
# - Kuan-lin Huang (khuang@genome.wustl.edu)
# version: v0.5.3 - September, 2019
# version: v0.5.4 - September, 2019

class autovivification(dict):
'''Implementation of perl's autovivification feature.'''
Expand Down
69 changes: 59 additions & 10 deletions charger/charger.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# - Fernanda Martins Rodrigues (fernanda@wustl.edu)
# - Jay R. Mashl (rmashl@wustl.edu)
# - Kuan-lin Huang (khuang@genome.wustl.edu)
# version: v0.5.3 - September, 2019
# version: v0.5.4 - September, 2019

import os
import sys
Expand Down Expand Up @@ -201,7 +201,7 @@ def readMAF( self , inputFile , **kwargs ):
def readVCF( self , inputFile , **kwargs ):
""" read & parse input .vcf
Look for VEP CSQ info field & extract all information possible
Can get allele frequency & clinical annotations after VEP v81 (or so)
Can get allele frequency & clinical annotations for all VEP releases after VEP v81.
http://useast.ensembl.org/info/docs/tools/vep/vep_formats.html#output
"""
inFile = None
Expand Down Expand Up @@ -332,6 +332,11 @@ def skipIfNotInMutationTypes( self , var ):
return False

def getVEPConsequences( self , info , var , preVEP ):
""" read & parse VEP annotations from input .vcf
Look for VEP CSQ info field & extract all information possible.
As of CharGer release 0.5.4, this function can get allele frequency & clinical annotations for all VEP releases after VEP v81.
http://useast.ensembl.org/info/docs/tools/vep/vep_formats.html#output
"""
csq = info.get( 'CSQ' , "noCSQ" )
if not csq == "noCSQ":
vepDone = True
Expand Down Expand Up @@ -379,8 +384,11 @@ def getVEPConsequences( self , info , var , preVEP ):
)
var.vepVariant.consequences.append( vcv )
self.getMostSevereConsequence( var )
hasAF = self.getExAC_MAF( values , var )
hasAF = self.getGMAF( values , var )
# adding support for VEP releases ≥ 90, which contain gnomAD frequencies by default.
# will proritize gnomAD AF over ExAC and 1Kg, when possible
hasAF = self.getGnomAD_MAF ( values , var )
hasAF = self.getExAC_MAF( values , var ) # if gnomAD MAF not there, take ExAC
hasAF = self.getGMAF( values , var ) # if gnomAD or ExAC MAF not there, take 1000 genomes
self.getCLIN_SIG( values , var )

def getCodingPosition( self , values , var , preVEP , key ):
Expand Down Expand Up @@ -461,14 +469,44 @@ def getConsequence( self , values ):
if consequence_terms:
csq_terms = self.getVCFKeyIndex( values , "Consequence" ).split( "&" )
return csq_terms

# added function to parse gnomAD AF from VEP annotation
def getGnomAD_MAF( self , values , var ):
#if the .vcf does not have AF, then check for gnomAD_AF (annotated with VEP releases ≥ 90 )
if ( var.alleleFrequency is None ): # fixed (refer to pull request #28)
if "gnomAD_AF" in self.vcfKeyIndex:
gmaf = self.getVCFKeyIndex( values , "gnomAD_AF" )
VEP_version = 90 # VEP annotates gnomAD_AF by default when using its --everything argument as of release 90
else:
print("Unsupported VEP version or no gnomAD AF annotation in input file; will search for ExAC frequencies...")
return False
if gmaf is not None:
if len(gmaf)==0:
return False
if len(gmaf)>0 and VEP_version==90:
var.alleleFrequency = gmaf
return True
return False

def getExAC_MAF( self , values , var ):
#if the .vcf does not have AF, then check for ExAC_MAF
# if the .vcf does not have AF or gnomAD_AF, then check for ExAC_AF
# function now supports VEP releases 90 or older
if ( var.alleleFrequency is None ): # fixed (refer to pull request #28)
emaf = self.getVCFKeyIndex( values , "ExAC_MAF" )
if "ExAC_MAF" in self.vcfKeyIndex:
emaf = self.getVCFKeyIndex( values , "ExAC_MAF" )
VEP_version=87 # nomenclature changed to ExAC_AF after this VEP release
elif "ExAC_AF" in self.vcfKeyIndex:
emaf = self.getVCFKeyIndex( values , "ExAC_AF" )
VEP_version=90
else:
print("Unsupported VEP version or no ExAC AF annotation in input file; will search for 1000 Genomes frequencies...")
return False
if emaf is not None:
if len(emaf)==0:
return False
if len(emaf)>0 and VEP_version>=90:
var.alleleFrequency = emaf
return True
for alt in emaf.split( "&" ):
if alt.split(":")[0] == var.alternate:
parts = alt.split( ":" )
Expand All @@ -478,12 +516,23 @@ def getExAC_MAF( self , values , var ):
return False

def getGMAF( self , values , var ):
#if the .vcf does not have AF or ExAC_MAF, then check for 1kg MAF
if ( var.alleleFrequency is None ): # fixed (refer to pull request #28)
gmaf = self.getVCFKeyIndex( values , "GMAF" )
# if the .vcf does not have AF, gnomAD_AF or ExAC_AF, then check for frequencies from 1000 Genomes
# function now supports VEP releases 90 or older
if ( var.alleleFrequency is None ): # fixed (refer to pull request #28)
if "GMAF" in self.vcfKeyIndex:
gmaf = self.getVCFKeyIndex( values , "GMAF" )
VEP_version=87 # nomenclature changed to AF after this VEP release
elif "AF" in self.vcfKeyIndex:
gmaf = self.getVCFKeyIndex( values , "AF" )
VEP_version=90
else:
print("unsupported VEP version or not 1kG AF annotation in input file.")
if gmaf is not None:
if len(gmaf)==0:
if len(gmaf) == 0:
return False
if len(gmaf)>0 and VEP_version==90:
var.alleleFrequency = gmaf
return True
for alt in gmaf.split( "&" ):
if alt.split(":")[0] == var.alternate:
parts = alt.split( ":" )
Expand Down
2 changes: 1 addition & 1 deletion charger/chargervariant.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# - Fernanda Martins Rodrigues (fernanda@wustl.edu)
# - Jay R. Mashl (rmashl@wustl.edu)
# - Kuan-lin Huang (khuang@genome.wustl.edu)
# version: v0.5.3 - September, 2019
# version: v0.5.4 - September, 2019

import pdb
from biomine.variant.clinvarvariant import clinvarvariant
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#https://docs.python.org/2/distutils/examples.html
from distutils.core import setup
version = "0.5.3"
version = "0.5.4"
setup( \
name = 'CharGer' ,
version = version ,
Expand Down

0 comments on commit f8403f7

Please sign in to comment.