Skip to content

Commit

Permalink
better minimap2 sam output support
Browse files Browse the repository at this point in the history
  • Loading branch information
Baoxing Song committed Feb 24, 2019
1 parent d4a5dcb commit c09e27c
Show file tree
Hide file tree
Showing 9 changed files with 364 additions and 46 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,6 @@ add_executable(gean
src/impl/deNovoGenomeVariantCalling.cpp
src/impl/deNovoGenomeVariantCalling.h
src/service/deNovoGenomeVariantCallingService.cpp
src/service/deNovoGenomeVariantCallingService.h src/myImportandFunction/hengAlign.cpp src/myImportandFunction/hengAlign.h src/cns.cpp src/song_cns.cpp src/song_cns.h )
src/service/deNovoGenomeVariantCallingService.h src/myImportandFunction/hengAlign.cpp src/myImportandFunction/hengAlign.h src/cns.cpp src/song_cns.cpp src/song_cns.h src/tests/service/TransferAllExonWithSpliceAlignmentResult_test.cpp)

target_link_libraries(gean gtest gtest_main)
71 changes: 71 additions & 0 deletions script/fordebug/undectedOrfNonCompletes.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!perl
use strict;
use warnings FATAL => 'all';

my %allTranscripts;
my %allGoodTranscripts;
my %liftTranscripts;
my %realignTranscripts;
my $source="";
my $transcript="";

open INPUT, "/workdir/bx674/melon_debug/bs674/melon_gean_purify.gff";
while( my $line=<INPUT> ){
if( $line=~/^(\S+)\t+(\S+)\t+.*Parent=([0123456789MELOC]+\.\d\.\d)/ ){
$source=$2;
$transcript=$3;
}elsif( $line=~/ConservedFunction/ ){
if( $source eq "LIFTOVER" ){
$liftTranscripts{$transcript}=1;
}elsif( $source eq "REALIGNMENT" ){
$realignTranscripts{$transcript}=1;
}
$allGoodTranscripts{$transcript}=1;
}
$allTranscripts{$transcript}=1;
}
close INPUT;

my %lostOfFunctions;
my %withFunctions;
open INPUT, "/workdir/bx674/melon/gean/CM3.6.1_pseudomol_cds.fa";
while( my $line=<INPUT> ){
if( $line=~/^>(\S+)\s/ ){
$transcript=$1;
if( $line=~/ConservedFunction/ ){
$withFunctions{$transcript}=1;
}else{
$lostOfFunctions{$transcript}=1;
}
}
}
close INPUT;

print "those transcripts have re-annotated as functional\n";

while( my ($key, $value) = each %allGoodTranscripts ){
if( exists $lostOfFunctions{$key} ){
print "$key\n";
}
}

print "those functional transcript have not been aligned correctly\n";

while( my ($key, $value) = each %withFunctions ){
if( exists $allGoodTranscripts{$key} ){

}else{
print "$key\n";
}
}


my $size = keys %allTranscripts;
print "allTranscripts\t$size\n";
$size = keys %allGoodTranscripts;
print "allGoodTranscripts\t$size\n";
$size = keys %realignTranscripts;
print "realignTranscripts\t$size\n";
$size = keys %liftTranscripts;
print "liftTranscripts\t$size\n";

2 changes: 1 addition & 1 deletion song.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ using namespace std;

int main(int argc, char** argv){
//testing::InitGoogleTest(&argc, argv);
// RUN_ALL_TESTS();
//RUN_ALL_TESTS();
//return 0;

if( argc<=1 ){
Expand Down
3 changes: 1 addition & 2 deletions src/controlLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -521,8 +521,7 @@ int TransGff( int argc, char** argv, std::map<std::string, std::string>& paramet
if( inputParser.cmdOptionExists("-w") ){
windowWidth = std::stoi( inputParser.getCmdOption("-w") );
}
TransferAllExonWithNucmerResult( gffFilePath, databaseFastaFilePath, queryFastaFilePath, nucmerFilePath, parameters, outPutFilePath, minIntron, slowMode, windowWidth,lengthThreadhold );

TransferAllExonWithNucmerResult( gffFilePath, databaseFastaFilePath, queryFastaFilePath, nucmerFilePath, parameters, outPutFilePath, minIntron, slowMode, windowWidth, lengthThreadhold );
return 0;
}else{
std::cerr << usage.str();
Expand Down
15 changes: 9 additions & 6 deletions src/impl/readGffFileWithEverything.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ void readGffFileWithEveryThing (const std::string& filePath, std::map<std::strin
cdsParentRegex.insert("Parent=([\\s\\S^,^;]*?)$");//exon
transcriptParentRegex.insert("Parent=([\\s\\S]*?)[;,]");
transcriptIdRegex.insert("ID=([\\s\\S]*?)[;,]");
geneIdRegex.insert("ID=([\\s\\S]*?)[;,]");
geneIdRegex.insert("ID=([\\s\\S]*?)$"); //melon
//geneIdRegex.insert("ID=([\\s\\S]*?)[;,;]");
geneIdRegex.insert("ID=([-_0-9:a-zA-Z.]*?)[;,]");
geneIdRegex.insert("ID=([-_0-9:a-zA-Z.]*?)$"); //melon
// tair10 end

//ensembl begin
Expand Down Expand Up @@ -167,6 +168,7 @@ void readGffFileWithEveryThing (const std::string& filePath, std::map<std::strin
Gene gene1(geneId, chromosomeName, strand);
geneHashMap[geneId] = gene1;
}
// std::cout << "line 170 " << geneId << std::endl;
geneHashMap[geneId].setStart(start);
geneHashMap[geneId].setEnd(end);
geneHashMap[geneId].setSource(source);
Expand Down Expand Up @@ -220,11 +222,12 @@ void readGffFileWithEveryThing (const std::string& filePath, std::map<std::strin
if (geneHashMap.find(geneId) != geneHashMap.end()) {

} else {
//std::cout << "there is something wrong with the gff file, maybe could not generate correct result" << std::endl;
Gene gene1(geneId, chromosomeName, strand);
geneHashMap[geneId] = gene1;
}
geneHashMap[geneId].addTranscript(transcriptHashMap[transcriptId]);
// std::cout << "line 178 very good very good " << geneHashMap[geneId].getName() << " " << transcriptHashMap[transcriptId].getName() << std::endl;
//std::cout << "line 178 very good very good " << geneHashMap[geneId].getName() << " " << transcriptHashMap[transcriptId].getName() << std::endl;
matched = true;
break; // jump out for loop
}
Expand Down Expand Up @@ -338,7 +341,7 @@ void readGffFileWithEveryThing (const std::string& filePath, std::map<std::strin
} else if (ignoreTypes.find(elemetns[2]) != ignoreTypes.end()) { // ignore those elements

} else {
std::cout << "we could not analysis the line in the gff/gtf file: " << line << std::endl;
// std::cout << "we could not analysis the line in the gff/gtf file: " << line << std::endl;
}
}
}
Expand Down Expand Up @@ -439,8 +442,8 @@ void readGffFileWithEveryThing (const std::string& filePath, std::map<std::strin
cdsParentRegex.insert("Parent=([\\s\\S^,^;]*?)$");//exon
transcriptParentRegex.insert("Parent=([\\s\\S]*?)[;,]");
transcriptIdRegex.insert("ID=([\\s\\S]*?)[;,]");
geneIdRegex.insert("ID=([\\s\\S]*?)[;,]");
geneIdRegex.insert("ID=([\\s\\S]*?)$"); //melon
geneIdRegex.insert("ID=([-_0-9:a-zA-Z.]*?)[;,]");
geneIdRegex.insert("ID=([-_0-9:a-zA-Z.]*?)$"); //melon
// tair10 end

//ensembl begin
Expand Down
Loading

0 comments on commit c09e27c

Please sign in to comment.