From c09e27c940352eb9f3e3b25df698e45f6133eef4 Mon Sep 17 00:00:00 2001 From: Baoxing Song Date: Sat, 23 Feb 2019 21:43:20 -0500 Subject: [PATCH] better minimap2 sam output support --- CMakeLists.txt | 2 +- script/fordebug/undectedOrfNonCompletes.pl | 71 +++++ song.cpp | 2 +- src/controlLayer.cpp | 3 +- src/impl/readGffFileWithEverything.cpp | 15 +- src/service/TransferGffWithNucmerResult.cpp | 255 ++++++++++++++++-- src/service/TransferGffWithNucmerResult.h | 3 +- .../alignSlidingWindow_test.cpp | 41 ++- ...rAllExonWithSpliceAlignmentResult_test.cpp | 18 +- 9 files changed, 364 insertions(+), 46 deletions(-) create mode 100644 script/fordebug/undectedOrfNonCompletes.pl diff --git a/CMakeLists.txt b/CMakeLists.txt index 42fb3ee..953184a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -152,6 +152,6 @@ add_executable(gean src/impl/deNovoGenomeVariantCalling.cpp src/impl/deNovoGenomeVariantCalling.h src/service/deNovoGenomeVariantCallingService.cpp - src/service/deNovoGenomeVariantCallingService.h src/myImportandFunction/hengAlign.cpp src/myImportandFunction/hengAlign.h src/cns.cpp src/song_cns.cpp src/song_cns.h ) + src/service/deNovoGenomeVariantCallingService.h src/myImportandFunction/hengAlign.cpp src/myImportandFunction/hengAlign.h src/cns.cpp src/song_cns.cpp src/song_cns.h src/tests/service/TransferAllExonWithSpliceAlignmentResult_test.cpp) target_link_libraries(gean gtest gtest_main) diff --git a/script/fordebug/undectedOrfNonCompletes.pl b/script/fordebug/undectedOrfNonCompletes.pl new file mode 100644 index 0000000..777fb3c --- /dev/null +++ b/script/fordebug/undectedOrfNonCompletes.pl @@ -0,0 +1,71 @@ +#!perl +use strict; +use warnings FATAL => 'all'; + +my %allTranscripts; +my %allGoodTranscripts; +my %liftTranscripts; +my %realignTranscripts; +my $source=""; +my $transcript=""; + +open INPUT, "/workdir/bx674/melon_debug/bs674/melon_gean_purify.gff"; +while( my $line= ){ + if( $line=~/^(\S+)\t+(\S+)\t+.*Parent=([0123456789MELOC]+\.\d\.\d)/ ){ + $source=$2; + $transcript=$3; + }elsif( $line=~/ConservedFunction/ ){ + if( $source eq "LIFTOVER" ){ + $liftTranscripts{$transcript}=1; + }elsif( $source eq "REALIGNMENT" ){ + $realignTranscripts{$transcript}=1; + } + $allGoodTranscripts{$transcript}=1; + } + $allTranscripts{$transcript}=1; +} +close INPUT; + +my %lostOfFunctions; +my %withFunctions; +open INPUT, "/workdir/bx674/melon/gean/CM3.6.1_pseudomol_cds.fa"; +while( my $line= ){ + if( $line=~/^>(\S+)\s/ ){ + $transcript=$1; + if( $line=~/ConservedFunction/ ){ + $withFunctions{$transcript}=1; + }else{ + $lostOfFunctions{$transcript}=1; + } + } +} +close INPUT; + +print "those transcripts have re-annotated as functional\n"; + +while( my ($key, $value) = each %allGoodTranscripts ){ + if( exists $lostOfFunctions{$key} ){ + print "$key\n"; + } +} + +print "those functional transcript have not been aligned correctly\n"; + +while( my ($key, $value) = each %withFunctions ){ + if( exists $allGoodTranscripts{$key} ){ + + }else{ + print "$key\n"; + } +} + + +my $size = keys %allTranscripts; +print "allTranscripts\t$size\n"; +$size = keys %allGoodTranscripts; +print "allGoodTranscripts\t$size\n"; +$size = keys %realignTranscripts; +print "realignTranscripts\t$size\n"; +$size = keys %liftTranscripts; +print "liftTranscripts\t$size\n"; + diff --git a/song.cpp b/song.cpp index ff01fed..10143c3 100644 --- a/song.cpp +++ b/song.cpp @@ -31,7 +31,7 @@ using namespace std; int main(int argc, char** argv){ //testing::InitGoogleTest(&argc, argv); -// RUN_ALL_TESTS(); +//RUN_ALL_TESTS(); //return 0; if( argc<=1 ){ diff --git a/src/controlLayer.cpp b/src/controlLayer.cpp index 3aa2bc1..20fbc6c 100644 --- a/src/controlLayer.cpp +++ b/src/controlLayer.cpp @@ -521,8 +521,7 @@ int TransGff( int argc, char** argv, std::map& paramet if( inputParser.cmdOptionExists("-w") ){ windowWidth = std::stoi( inputParser.getCmdOption("-w") ); } - TransferAllExonWithNucmerResult( gffFilePath, databaseFastaFilePath, queryFastaFilePath, nucmerFilePath, parameters, outPutFilePath, minIntron, slowMode, windowWidth,lengthThreadhold ); - + TransferAllExonWithNucmerResult( gffFilePath, databaseFastaFilePath, queryFastaFilePath, nucmerFilePath, parameters, outPutFilePath, minIntron, slowMode, windowWidth, lengthThreadhold ); return 0; }else{ std::cerr << usage.str(); diff --git a/src/impl/readGffFileWithEverything.cpp b/src/impl/readGffFileWithEverything.cpp index ad275f3..e20f3af 100644 --- a/src/impl/readGffFileWithEverything.cpp +++ b/src/impl/readGffFileWithEverything.cpp @@ -15,8 +15,9 @@ void readGffFileWithEveryThing (const std::string& filePath, std::map > newStartShitfDistance = (geneHashMap[geneNameMap[chr][i]].getEnd()-geneHashMap[geneNameMap[chr][i]].getStart()); } } +// std::cout << "geneHashMap[geneNameMap[chr][i]].getEnd() " << geneHashMap[geneNameMap[chr][i]].getEnd() << +// " alignmentMatch.getDatabase().getEnd() " << alignmentMatch.getDatabase().getEnd() << std::endl; if( geneHashMap[geneNameMap[chr][i]].getEnd() > alignmentMatch.getDatabase().getEnd() ){ if( endShiftDistance < (geneHashMap[geneNameMap[chr][i]].getEnd()-geneHashMap[geneNameMap[chr][i]].getStart()) ){ //std::cout << "line 612" << std::endl; @@ -618,8 +620,12 @@ void getAlltheOverLappedGenes2( std::map > } // std::cout << "newStartShitfDistance: " << newStartShitfDistance << " newEndShiftDistance: " << newEndShiftDistance << std::endl; if( newStartShitfDistance <= 100000 && newEndShiftDistance <= 100000 ){ - startShitfDistance=newStartShitfDistance; - endShiftDistance=newEndShiftDistance; + if( startShitfDistancegetName() << std::endl; + for (int i=0; i< gene->getTranscriptVector().size(); ++i) { +// std::cout << gene->getTranscriptVector()[i] << std::endl; + if( transcriptHashMap[gene->getTranscriptVector()[i]].getCdsVector().size() > 0 ){ + for (int it4 =0; it4< transcriptHashMap[gene->getTranscriptVector()[i]].getCdsVector().size(); ++it4) { + GenomeBasicFeature * cds = & transcriptHashMap[gene->getTranscriptVector()[i]].getCdsVector()[it4]; + importantPositions[(*cds).getStart()] = 0; + importantPositions[(*cds).getEnd()] = 0; + } + } + if( transcriptHashMap[gene->getTranscriptVector()[i]].getExonVector().size()>0 ){ + for (int it4=0; it4< transcriptHashMap[gene->getTranscriptVector()[i]].getExonVector().size(); ++it4) { + GenomeBasicFeature *exon = & transcriptHashMap[gene->getTranscriptVector()[i]].getExonVector()[it4]; + importantPositions[(*exon).getStart()] = 0; + importantPositions[(*exon).getEnd()] = 0; + } + } + if( transcriptHashMap[gene->getTranscriptVector()[i]].getFivePrimerUtr().size()>0 ){ + for (int it4=0; it4< transcriptHashMap[gene->getTranscriptVector()[i]].getFivePrimerUtr().size(); ++it4) { + importantPositions[transcriptHashMap[gene->getTranscriptVector()[i]].getFivePrimerUtr()[it4].getStart()] = 0; + importantPositions[transcriptHashMap[gene->getTranscriptVector()[i]].getFivePrimerUtr()[it4].getEnd()] = 0; + } + } + if( transcriptHashMap[gene->getTranscriptVector()[i]].getThreePrimerUtr().size()>0 ){ + for (int it4=0; it4< transcriptHashMap[gene->getTranscriptVector()[i]].getThreePrimerUtr().size(); ++it4) { + importantPositions[transcriptHashMap[gene->getTranscriptVector()[i]].getThreePrimerUtr()[it4].getStart()] = 0; + importantPositions[transcriptHashMap[gene->getTranscriptVector()[i]].getThreePrimerUtr()[it4].getEnd()] = 0; + } + } + } + } +// std::cout << "line 682" << std::endl; + int databasePosition = databaseStart - 1; + int newStart; + int queryPosition = 0; + for (size_t tp = 0; tp < alignQuerySequence.length(); ++tp) { + if (alignQuerySequence[tp] != '-') { + ++queryPosition; + } + if (alignDatabaseSequence[tp] != '-') { + ++databasePosition; + if( importantPositions.find(databasePosition) != importantPositions.end() ){ + if (POSITIVE == alignmentMatch.getQueryStrand()) { + newStart = queryStart + queryPosition - 1; + } else { + newStart = queryEnd - queryPosition + 1; + } + if (newStart < 1) { + newStart = 1; + } else if (newStart > + querySequences[alignmentMatch.getQueryChr()].getSequence().length()) { + newStart = querySequences[alignmentMatch.getQueryChr()].getSequence().length(); + } + importantPositions[databasePosition] = newStart; + } + } + } +} + + +void slidingWinAlnAndGeneRateAnnotationSam(AlignmentMatch & alignmentMatch, + std::map & databaseSequences, + std::map & querySequences, + std::vector & overLappedGenes, std::map & transcriptHashMap, + std::map & importantPositions, + const int & slidingWindowSize, const int & startShitfDistance, + const int & endShiftDistance, std::map& parameters, + NucleotideCodeSubstitutionMatrix & nucleotideCodeSubstitutionMatrix){ + + + int queryStart=alignmentMatch.getQueryStart(); + int queryEnd= alignmentMatch.getQueryEnd(); + int databaseStart=alignmentMatch.getDatabaseStart(); + int databaseEnd=alignmentMatch.getDatabaseEnd(); + +// std::cout << "line 677" << std::endl; + std::string alignQuerySequence=""; + std::string alignDatabaseSequence=""; + +// std::string alignQuerySequence1=""; +// std::string alignDatabaseSequence1=""; +// +// std::string alignQuerySequence2=""; +// std::string alignDatabaseSequence2=""; + + std::string querySequence0=""; + std::string databaseSequence0=""; + + if(startShitfDistance>0){ + databaseStart = alignmentMatch.getDatabaseStart()-startShitfDistance; + if( databaseStart < 1 ){ + databaseStart = 1; + } + if( alignmentMatch.getDatabaseStart()>1 ) { + databaseSequence0 = getSubsequence(databaseSequences, alignmentMatch.getDatabaseChr(), + databaseStart, alignmentMatch.getDatabaseStart() - 1); + } + + if( POSITIVE == alignmentMatch.getQueryStrand() && alignmentMatch.getQueryStart()>1 ){ + queryStart = alignmentMatch.getQueryStart()-startShitfDistance; + if( queryStart < 1 ){ + queryStart=1; + } + querySequence0 = getSubsequence(querySequences, alignmentMatch.getQueryChr(), queryStart, + alignmentMatch.getQueryStart()-1); + } else if ( NEGATIVE == alignmentMatch.getQueryStrand() && alignmentMatch.getQueryEnd() < querySequences[alignmentMatch.getQueryChr()].getSequence().length() ) { + queryEnd = alignmentMatch.getQueryEnd()+startShitfDistance; + if( queryEnd > querySequences[alignmentMatch.getQueryChr()].getSequence().length() ){ + queryEnd = querySequences[alignmentMatch.getQueryChr()].getSequence().length(); + } + querySequence0 = getSubsequence(querySequences, alignmentMatch.getQueryChr(), + alignmentMatch.getQueryEnd()+1, + queryEnd, alignmentMatch.getQueryStrand()); + } +// int longerLength = querySequence0.length(); +// if( longerLength < databaseSequence0.length()){ +// longerLength = databaseSequence0.length(); +// } + //alignSlidingWindow(querySequence0, databaseSequence0, alignQuerySequence, alignDatabaseSequence, longerLength, parameters, nucleotideCodeSubstitutionMatrix); + } + std::string querySequence1 = getSubsequence(querySequences, alignmentMatch.getQueryChr(), + alignmentMatch.getQueryStart(), + alignmentMatch.getQueryEnd(), alignmentMatch.getQueryStrand()); + std::string databaseSequence1 = getSubsequence(databaseSequences, alignmentMatch.getDatabaseChr(), + alignmentMatch.getDatabaseStart(), + alignmentMatch.getDatabaseEnd()); + + std::string querySequence2=""; + std::string databaseSequence2=""; + if( endShiftDistance>0 ){ + databaseEnd = alignmentMatch.getDatabaseEnd()+endShiftDistance; + if( databaseEnd > databaseSequences[alignmentMatch.getDatabaseChr()].getSequence().length() ){ + databaseEnd = databaseSequences[alignmentMatch.getDatabaseChr()].getSequence().length(); + } + if( alignmentMatch.getDatabaseEnd() < databaseSequences[alignmentMatch.getDatabaseChr()].getSequence().length() ) { + databaseSequence2 = getSubsequence(databaseSequences, alignmentMatch.getDatabaseChr(), + alignmentMatch.getDatabaseEnd()+1, databaseEnd); + } + + if( POSITIVE == alignmentMatch.getQueryStrand() && alignmentMatch.getQueryEnd() < querySequences[alignmentMatch.getQueryChr()].getSequence().length() ) { + queryEnd = alignmentMatch.getQueryEnd() + endShiftDistance; + if (queryEnd > querySequences[alignmentMatch.getQueryChr()].getSequence().length() ) { + queryEnd = querySequences[alignmentMatch.getQueryChr()].getSequence().length(); + } + querySequence2 = getSubsequence(querySequences, alignmentMatch.getQueryChr(), + alignmentMatch.getQueryEnd() + 1, queryEnd, + alignmentMatch.getQueryStrand()); + } else if ( NEGATIVE == alignmentMatch.getQueryStrand() && alignmentMatch.getQueryStart()>1){ + queryStart = alignmentMatch.getQueryStart() - endShiftDistance; + if (queryStart < 1 ) { + queryStart = 1; + } + querySequence2 = getSubsequence(querySequences, alignmentMatch.getQueryChr(), + queryStart, alignmentMatch.getQueryStart() - 1, + alignmentMatch.getQueryStrand()); + } +// int longerLength = querySequence2.length(); +// if( longerLength < databaseSequence2.length()){ +// longerLength = databaseSequence2.length(); +// } + //alignSlidingWindow(querySequence2, databaseSequence2, alignQuerySequence2, alignDatabaseSequence2, longerLength, parameters, nucleotideCodeSubstitutionMatrix); + } + + + std::string querySequence = querySequence0 + querySequence1 + querySequence2; + std::string databaseSequence = databaseSequence0 + databaseSequence1 + databaseSequence2; + alignSlidingWindow(querySequence, databaseSequence, alignQuerySequence, alignDatabaseSequence, slidingWindowSize, parameters, nucleotideCodeSubstitutionMatrix); + + +// alignQuerySequence = alignQuerySequence + alignQuerySequence1; +// alignQuerySequence = alignQuerySequence + alignQuerySequence2; +// alignDatabaseSequence = alignDatabaseSequence + alignDatabaseSequence1; +// alignDatabaseSequence = alignDatabaseSequence + alignDatabaseSequence2; + +// std::cout << "alignQuerySequence: " << alignQuerySequence << std::endl; +// std::cout << "alignDatabaseSequence: " << alignDatabaseSequence << std::endl; //generate annotation according to sequence alignment begin // std::cout << "line 661" << std::endl; for( Gene * gene : overLappedGenes ){ @@ -1182,13 +1369,14 @@ void TransferAllExonWithSpliceAlignmentResult( const std::string & gffFilePath, split(line, delim, elems); queryStart=stoi(elems[3]); // if(databaseStart>0 && transcriptHashMap.find(elems[0])!=transcriptHashMap.end() ){ // ignore those none mapping records - if( transcriptHashMap.find(elems[0])!=transcriptHashMap.end() ){ // ignore those none mapping records + queryChr=elems[2]; + if( queryChr.compare("*") != 0 && transcriptHashMap.find(elems[0])!=transcriptHashMap.end() ){ // ignore those none mapping records //std::cout << "begain to analysis " << line << std::endl; databaseChr=transcriptHashMap[elems[0]].getChromeSomeName(); - queryChr=elems[2]; databaseStart = transcriptHashMap[elems[0]].getPStart(); - //databaseEnd=databaseStart; + databaseEnd = transcriptHashMap[elems[0]].getPEnd(); +// databaseEnd=databaseStart; queryEnd=queryStart; std::vector cigarElems; splitCIGAR(elems[5], cigarElems); @@ -1196,36 +1384,38 @@ void TransferAllExonWithSpliceAlignmentResult( const std::string & gffFilePath, for(int i=0;i > & geneNameMap, @@ -1293,10 +1483,8 @@ void TransferAllExonWithNucmerResult( std::map> & alignmentMatchsMap, std::map& parameters, const std::string & outPutFilePath, - const size_t & minIntron , const bool & slowMode, const int & slidingWindowSize, const size_t & maxLengthForStructureAlignment){ - + const size_t & minIntron , const bool & slowMode, const int & slidingWindowSize, const size_t & maxLengthForStructureAlignment, const std::string source){ NucleotideCodeSubstitutionMatrix nucleotideCodeSubstitutionMatrix(parameters); - std::map databaseSequences; readFastaFile(databaseFastaFilePath, databaseSequences); CheckAndUpdateTranscriptsEnds( transcriptHashMap, databaseSequences, nucleotideCodeSubstitutionMatrix, minIntron); @@ -1325,7 +1513,7 @@ void TransferAllExonWithNucmerResult( std::map overLappedGenes; if( slowMode ){ - //std::cout << "line 1317" << std::endl; +// std::cout << "line 1317" << std::endl; getAlltheOverLappedGenes2( geneNameMap, geneHashMap, it1->first, alignmentMatch, overLappedGenes, startShitfDistance, endShiftDistance); }else{ @@ -1334,7 +1522,8 @@ void TransferAllExonWithNucmerResult( std::map0 ) { std::vector newGffRecords; // std::cout << "line 984" << std::endl; @@ -1343,26 +1532,36 @@ void TransferAllExonWithNucmerResult( std::map importantPositions; // std::cout << "line 990" << std::endl; - if( alignmentMatch.getWindowSize()>1 ){ + //windows size if a record specific sliding window size + // if it is not set, then use slidingWindowSize + if( source.compare("sam") == 0 ){ + slidingWinAlnAndGeneRateAnnotationSam( alignmentMatch, databaseSequences, querySequences, + overLappedGenes, transcriptHashMap, importantPositions, + slidingWindowSize, startShitfDistance, endShiftDistance, parameters, nucleotideCodeSubstitutionMatrix); + }else if( alignmentMatch.getWindowSize()>1 ){ + //std::cout << "line 1352 alignmentMatch.getWindowSize() " << alignmentMatch.getWindowSize() << std::endl; slidingWinAlnAndGeneRateAnnotation( alignmentMatch, databaseSequences, querySequences, overLappedGenes, transcriptHashMap, importantPositions, alignmentMatch.getWindowSize(), startShitfDistance, endShiftDistance, parameters, nucleotideCodeSubstitutionMatrix); }else{ + //std::cout << "line 1357 alignmentMatch.getWindowSize() " << alignmentMatch.getWindowSize() << std::endl; slidingWinAlnAndGeneRateAnnotation( alignmentMatch, databaseSequences, querySequences, overLappedGenes, transcriptHashMap, importantPositions, slidingWindowSize, startShitfDistance, endShiftDistance, parameters, nucleotideCodeSubstitutionMatrix); } - //std::cout << "genome sequence alignment done" << std::endl; +// std::cout << "genome sequence alignment done" << std::endl; if( POSITIVE == alignmentMatch.getQueryStrand() ) { +// std::cout << "line 1554" << std::endl; for( Gene * gene : overLappedGenes ) { +// std::cout << "line 1556 " << gene->getName() << std::endl; NewGffRecord newGffRecord; newGffRecord.geneName=gene->getName(); newGffRecord.strand=gene->getStrand(); // std::cout << gene->getName() << " " << gene->getTranscriptVector().size() << std::endl; for (i=0; igetTranscriptVector().size();++i ) { -// std::cout << transcriptHashMap[gene->getTranscriptVector()[i]].getName() << std::endl; +// std::cout << "positive " + transcriptHashMap[gene->getTranscriptVector()[i]].getName() << std::endl; std::string referenceTranscriptId = gene->getTranscriptVector()[i]; -// std::cout << referenceTranscriptId << " " << transcriptHashMap[referenceTranscriptId].getStrand() << std::endl; + //std::cout << referenceTranscriptId << " " << transcriptHashMap[referenceTranscriptId].getStrand() << std::endl; Transcript * referenceTranscript = & transcriptHashMap[referenceTranscriptId]; Transcript newTranscript(referenceTranscriptId, alignmentMatch.getQueryChr(), diff --git a/src/service/TransferGffWithNucmerResult.h b/src/service/TransferGffWithNucmerResult.h index 5697319..3b72fcc 100644 --- a/src/service/TransferGffWithNucmerResult.h +++ b/src/service/TransferGffWithNucmerResult.h @@ -32,5 +32,6 @@ void TransferAllExonWithNucmerResult( std::map> & alignmentMatchsMap, std::map& parameters, const std::string & outPutFilePath, - const size_t & minIntron , const bool & slowMode, const int & slidingWindowSize, const size_t & maxLengthForStructureAlignment); + const size_t & minIntron , const bool & slowMode, const int & slidingWindowSize, const size_t & maxLengthForStructureAlignment, const std::string source); + #endif //ANNOTATIONLIFTOVER_TRANSFERGFFWITHNUCMERRESULT_H diff --git a/src/tests/myImportandFunction/alignSlidingWindow_test.cpp b/src/tests/myImportandFunction/alignSlidingWindow_test.cpp index 6f74a8a..293feee 100644 --- a/src/tests/myImportandFunction/alignSlidingWindow_test.cpp +++ b/src/tests/myImportandFunction/alignSlidingWindow_test.cpp @@ -95,7 +95,7 @@ TEST(alignSlidingWindow, c2){ // it takes about 25mins to finish the alignment TEST(alignSlidingWindow, c3){ std::cout << std::endl; std::string parameterFile = "configure"; - std::map parameters = initialize_paramters(parameterFile, "./"); + std::map parameters = initialize_paramters(parameterFile, "."); NucleotideCodeSubstitutionMatrix nucleotideCodeSubstitutionMatrix(parameters); std::string dna_q = "TAAACCT"; std::string dna_d = "CCTATCTGA"; @@ -106,3 +106,42 @@ TEST(alignSlidingWindow, c3){ std::cout << _alignment_d << std::endl; ASSERT_EQ(0, 0); } + + + + +TEST(alignSlidingWindow, c5){ + std::cout << std::endl; + std::string parameterFile = "configure"; + std::map parameters = initialize_paramters(parameterFile, "/Users/song/Dropbox/gean"); + NucleotideCodeSubstitutionMatrix nucleotideCodeSubstitutionMatrix(parameters); + + std::map querySequences; + std::map databaseSequences; + + readFastaFile("/Users/song/Dropbox/A1033_01_h50.fasta", querySequences); + readFastaFile("/Users/song/Dropbox/adapter.fa", databaseSequences); + + + std::string dna_q; + std::string dna_d; + std::cout << "begin to alignment" << std::endl; + for( std::map::iterator it=querySequences.begin(); it != querySequences.end(); it++ ){ + for( std::map::iterator it1=databaseSequences.begin(); it1 != databaseSequences.end(); it1++ ) { + dna_q = it->second.getSequence(); + dna_d = it1->second.getSequence(); + std::string _alignment_q=""; + std::string _alignment_d=""; + + alignSlidingWindow(dna_q, dna_d, _alignment_q, _alignment_d, 60, parameters, + nucleotideCodeSubstitutionMatrix); + std::cout << "sequence" << std::endl; + std::cout << dna_q << std::endl; + std::cout << dna_d << std::endl; + std::cout << "alignment" << std::endl; + std::cout << _alignment_q << std::endl; + std::cout << _alignment_d << std::endl << std::endl; + } + } + ASSERT_EQ(0, 0); +} diff --git a/src/tests/service/TransferAllExonWithSpliceAlignmentResult_test.cpp b/src/tests/service/TransferAllExonWithSpliceAlignmentResult_test.cpp index 31e4a17..0af6b6d 100644 --- a/src/tests/service/TransferAllExonWithSpliceAlignmentResult_test.cpp +++ b/src/tests/service/TransferAllExonWithSpliceAlignmentResult_test.cpp @@ -11,15 +11,21 @@ TEST(TransferAllExonWithSpliceAlignmentResult, c1){ std::string parameterFile = "/Users/song/Dropbox/GEAN/configure"; std::map parameters = initialize_paramters(parameterFile, "/Users/song/Dropbox/GEAN/"); - std::string gffFilePath = "/Users/song/CM4.0.noZero.gff3"; - std::string databaseFastaFilePath = "/Users/song/CM3.6.1_pseudomol.fa"; - std::string queryFastaFilePath="/Users/song/CM3.6.1_pseudomol.fa"; - std::string samFilePath="/Users/song/CM361_vs_CM361.sam"; +// std::string gffFilePath = "/Users/song/CM4.0.noZero.gff3"; + std::string gffFilePath = "/Users/song/Zea_mays.AGPv3.31.gff3"; + std::string databaseFastaFilePath = "/Users/song/Zea_mays.AGPv3.31.dna.genome.fa"; + std::string queryFastaFilePath="/Users/song/Zm-CML247-DRAFT-PANZEA-1.0.fasta"; +// std::string databaseFastaFilePath = "/Users/song/CM3.6.1_pseudomol.fa"; +// std::string queryFastaFilePath="/Users/song/CM3.6.1_pseudomol.fa"; + //std::string samFilePath="/Users/song/MELO3C031356.2.1.sam"; +// std::string samFilePath="/Users/song/MELO3C035475.2.1.sam"; + //std::string samFilePath="/Users/song/MELO3C003750.2.1.sam"; + std::string samFilePath="/Users/song/minimap2ab"; std::string outPutFilePath="/Users/song/ler.gff"; // std::cout << " go to the function" << std::endl; - size_t maxLengthForStructureAlignment=600; + size_t maxLengthForStructureAlignment=60000; int minIntron = 5; - int windowWidth = 60; + int windowWidth = 20; TransferAllExonWithSpliceAlignmentResult( gffFilePath, databaseFastaFilePath, queryFastaFilePath, samFilePath, parameters, outPutFilePath, minIntron, windowWidth, maxLengthForStructureAlignment); ASSERT_EQ(0, 0); }