From 0540f86d3d5965cf9a6ef8871068999ef61211dc Mon Sep 17 00:00:00 2001 From: Benjamin Buchfink Date: Sat, 18 Mar 2023 11:19:01 +0100 Subject: [PATCH] Squashed commit of the following: commit 9542bcaa9036f585f56048cb12c4eee8d12319c0 Author: Benjamin Buchfink Date: Sat Mar 18 11:17:52 2023 +0100 Fixed PAF bug. commit e6563b3dd023bd7bd4c81c7bce8ba7b655a635f0 Author: Benjamin Buchfink Date: Sat Mar 18 10:47:09 2023 +0100 Fixed unaligned bug. commit 285afd5f33b8a58eb2d1d2cf3afff08063446f94 Author: Benjamin Buchfink Date: Sat Mar 18 10:16:15 2023 +0100 Fixed macOS compiler error. commit 7dd350e04eb5626090313fe1af6d79e73711c6cc Author: Benjamin Buchfink Date: Thu Mar 16 09:35:22 2023 +0100 Added switch to disable tls. commit 39c80484883ccbc31c439da1f37f5ef6dedb42f3 Merge: a11f8eda 21a32fc5 Author: Benjamin Buchfink Date: Wed Mar 15 11:50:28 2023 +0100 Merge branch 'master' into dev commit a11f8eda395a1d7055d243346c1d786572c9343e Author: Benjamin Buchfink Date: Wed Mar 15 11:49:23 2023 +0100 Disable sse for building blast. commit 857e96f539089474a66b37e35033ffcabd3bb8b3 Author: Benjamin Buchfink Date: Tue Mar 14 12:54:52 2023 +0100 Fixed error. commit f8b3c28075c6a7a0bf2764389696de372dd8aa59 Author: Benjamin Buchfink Date: Tue Mar 14 12:52:36 2023 +0100 Removed iostream includes. commit 24c930eed464fe08be2ab61f93042ce1a0fc804c Author: Benjamin Buchfink Date: Tue Mar 14 11:29:03 2023 +0100 Added new dispatch code. commit b18d9e16043fc57122c095c91b29c0dcbf5f4b69 Author: Benjamin Buchfink Date: Tue Mar 14 10:57:10 2023 +0100 Added new dispatch code. commit f1b05d05d0948f99f30c3e854b522546940d115e Author: Benjamin Buchfink Date: Mon Mar 13 17:22:15 2023 +0100 Added new dispatch code. commit 8c66829f0df52eabaffe58215c730dae2af8b521 Author: Benjamin Buchfink Date: Mon Mar 13 12:25:46 2023 +0100 Added new dispatch code. commit edb9ede545536c05ec47c16734a67a7c68b38033 Merge: 6b1658e2 c71b49c6 Author: Benjamin Buchfink Date: Mon Mar 13 09:55:35 2023 +0100 Merge branch 'dev' of https://github.com/bbuchfink/diamond_dev into dev commit c71b49c6d16f6134b7068aa896be7961de02bce3 Author: Benjamin Buchfink Date: Fri Mar 10 15:23:18 2023 +0100 Handle non-x86 case. --- CMakeLists.txt | 9 +- Dockerfile | 2 +- src/ChangeLog | 8 + src/align/align.cpp | 4 +- src/align/extend.cpp | 2 +- src/align/extend_chunk.h | 2 +- src/align/gapped_filter.cpp | 2 +- src/align/gapped_score.cpp | 2 +- src/align/global_ranking/extend.cpp | 4 +- src/align/global_ranking/table.cpp | 2 +- src/align/legacy/banded_swipe_pipeline.cpp | 2 +- src/align/legacy/query_mapper.cpp | 2 +- src/align/output.cpp | 2 +- src/align/ungapped.cpp | 2 +- src/basic/basic.cpp | 2 +- src/basic/config.cpp | 60 +------ src/basic/const.h | 2 +- src/chaining/greedy_align.cpp | 1 + src/cluster/cascaded/cascaded.cpp | 4 +- src/cluster/cascaded/recluster.cpp | 4 +- src/cluster/cascaded/wrapper.cpp | 7 +- src/cluster/helpers.cpp | 1 - src/cluster/incremental/common.h | 2 +- src/cluster/incremental/run.cpp | 6 +- src/cluster/output.cpp | 4 +- src/cluster/realign.cpp | 2 +- src/cluster/reassign.cpp | 2 +- src/contrib/mcl/sparse_matrix_stream.h | 2 +- src/data/blastdb/blastdb.cpp | 2 +- src/data/dmnd/dmnd.cpp | 11 +- src/data/enum_seeds.h | 2 +- src/data/fasta/fasta_file.cpp | 4 +- src/data/index.cpp | 2 +- src/data/sequence_file.cpp | 2 +- src/data/taxon_list.cpp | 2 +- src/data/taxonomy.cpp | 2 +- src/data/taxonomy_nodes.cpp | 2 +- src/dna/dna_index.cpp | 2 +- src/dp/dp.h | 10 +- src/dp/pfscan/pfscan.cpp | 11 +- src/dp/pfscan/pfscan.h | 10 +- src/dp/pfscan/smith_waterman.h | 2 +- src/dp/scan_diags.cpp | 10 +- src/dp/scan_diags.h | 8 +- src/dp/score_profile.cpp | 8 +- src/dp/score_profile.h | 4 +- src/dp/swipe/anchored_wrapper.cpp | 11 +- src/dp/swipe/banded_3frame_swipe.cpp | 107 ++++++------ src/dp/swipe/banded_matrix.h | 6 +- src/dp/swipe/banded_swipe.h | 2 +- src/dp/swipe/full_matrix.h | 8 +- src/dp/swipe/swipe_wrapper.cpp | 13 +- src/dp/ungapped_simd.cpp | 14 +- src/dp/ungapped_simd.h | 5 +- src/lib/alp/sls_alp.hpp | 2 +- src/lib/alp/sls_alp_data.cpp | 1 + src/lib/alp/sls_alp_data.hpp | 2 +- src/lib/alp/sls_alp_regression.hpp | 2 +- src/lib/alp/sls_alp_sim.hpp | 2 +- src/lib/alp/sls_pvalues.hpp | 2 +- src/lib/interval_tree/interval_tree.hpp | 2 +- src/lib/tantan/LambdaCalculator.cc | 1 - src/masking/tantan.cpp | 13 +- src/masking/tantan.h | 2 +- src/output/daa/merge.cpp | 4 +- src/output/daa/view.cpp | 6 +- src/output/join_blocks.cpp | 2 +- src/output/output_format.h | 4 +- src/output/paf_format.cpp | 14 +- src/output/taxon_format.cpp | 3 +- src/run/double_indexed.cpp | 16 +- src/run/main.cpp | 2 +- src/search/search.h | 7 +- src/search/seed_complexity.cpp | 2 +- src/search/stage0.cpp | 18 +- src/search/stage1.h | 185 +++++++++++++++++++++ src/search/stage1_2.cpp | 28 ++++ src/search/{stage2.cpp => stage2.h} | 147 +--------------- src/stats/matrix_adjust_eigen.cpp | 2 - src/stats/score_matrix.cpp | 3 +- src/test/test.cpp | 2 +- src/test/test_cases.cpp | 6 +- src/tools/benchmark.cpp | 76 +++++---- src/tools/benchmark_io.cpp | 16 +- src/tools/benchmark_swipe.cpp | 3 +- src/tools/greedy_vertex_cover.cpp | 4 +- src/tools/roc.cpp | 2 +- src/tools/tsv.cpp | 3 +- src/tools/view.cpp | 4 +- src/util/algo/greedy_vertex_cover.cpp | 2 +- src/util/algo/join_result.h | 18 +- src/util/io/file_sink.cpp | 2 - src/util/io/file_source.cpp | 2 - src/util/io/input_file.cpp | 2 +- src/util/log_stream.h | 18 +- src/util/parallel/thread_pool.h | 2 +- src/util/profiler.h | 2 +- src/util/simd.cpp | 1 + src/util/simd.h | 63 +------ src/util/simd/dispatch.h | 163 ++++++++++++++++++ src/util/util.cpp | 6 +- src/util/util.h | 7 +- 102 files changed, 735 insertions(+), 545 deletions(-) create mode 100644 src/search/stage1.h create mode 100644 src/search/stage1_2.cpp rename src/search/{stage2.cpp => stage2.h} (54%) create mode 100644 src/util/simd/dispatch.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a1b45631..13c5cdf9e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,7 @@ option(WITH_AVX512 "WITH_AVX512" OFF) option(WITH_DNA "WITH_DNA" OFF) option(WITH_MCL "WITH_MCL" OFF) option(WITH_MIMALLOC "WITH_MIMALLOC" OFF) +option(USE_TLS "USE_TLS" OFF) set(MAX_SHAPE_LEN 19) set(BLAST_INCLUDE_DIR "" CACHE STRING "BLAST_INCLUDE_DIR") set(BLAST_LIBRARY_DIR "" CACHE STRING "BLAST_LIBRARY_DIR") @@ -45,6 +46,10 @@ if(STRICT_BAND) add_definitions(-DSTRICT_BAND) endif() +if(USE_TLS) + add_definitions(-DUSE_TLS) +endif() + if(SINGLE_THREADED) add_definitions(-DSINGLE_THREADED) endif() @@ -148,7 +153,7 @@ endif() set(DISPATCH_OBJECTS "src/dp/swipe/banded_3frame_swipe.cpp" - "src/search/stage2.cpp" + "src/search/stage1_2.cpp" "src/tools/benchmark.cpp" "src/dp/swipe/swipe_wrapper.cpp" "src/masking/tantan.cpp" @@ -175,6 +180,8 @@ if(X86) add_library(arch_avx512 OBJECT ${DISPATCH_OBJECTS}) target_include_directories(arch_avx512 PRIVATE "${CMAKE_SOURCE_DIR}/src/lib") endif() + add_definitions(-DWITH_AVX2) + add_definitions(-DWITH_SSE4_1) if (${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC) target_compile_options(arch_sse4_1 PUBLIC -DDISPATCH_ARCH=ARCH_SSE4_1 -DARCH_ID=1 -D__SSSE3__ -D__SSE4_1__ -D__POPCNT__ -DEigen=Eigen_SSE4_1) target_compile_options(arch_avx2 PUBLIC -DDISPATCH_ARCH=ARCH_AVX2 -DARCH_ID=2 /arch:AVX2 -D__SSSE3__ -D__SSE4_1__ -D__POPCNT__ -DEigen=Eigen_AVX2) diff --git a/Dockerfile b/Dockerfile index b19beda1c..8f5f3cd1a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ ADD . . RUN git clone https://github.com/ncbi/ncbi-cxx-toolkit-public.git WORKDIR ncbi-cxx-toolkit-public -RUN ./cmake-configure --without-debug --with-projects="objtools/blast/seqdb_reader;objtools/blast/blastdb_format" --with-build-root=build +RUN ./cmake-configure --without-debug --with-projects="objtools/blast/seqdb_reader;objtools/blast/blastdb_format" --with-build-root=build --with-features="-SSE" WORKDIR build/build RUN make -j4 RUN cp /opt/diamond/ncbi-cxx-toolkit-public/build/inc/ncbiconf_unix.h /opt/diamond/ncbi-cxx-toolkit-public/include diff --git a/src/ChangeLog b/src/ChangeLog index 2093cf6f0..db5a80eb8 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,11 @@ +[2.1.6] +- Fixed compatibility issues on older systems without support for AVX2. +- Fixed linker errors when compiled with `-DX86=OFF`. +- Fixed a compiler error on macOS systems. +- Fixed a bug that could cause missing tags in the XML output format and + unaligned queries not to be reported correctly. +- Fixed a bug that caused the PAF output format not to work correctly. + [2.1.5] - Disabled the use of frequency based seed masking when using the linear-time search feature with respect to the targets. diff --git a/src/align/align.cpp b/src/align/align.cpp index e308fe5f1..563f720cd 100644 --- a/src/align/align.cpp +++ b/src/align/align.cpp @@ -198,7 +198,7 @@ TextBuffer* legacy_pipeline(const HitIterator::Hits& hits, Search::Config& cfg, QueryMapper *mapper = new ExtensionPipeline::BandedSwipe::Pipeline(hits.query, hits.begin, hits.end, dp_stat, cfg); - task_timer timer("Initializing mapper", UINT_MAX); + TaskTimer timer("Initializing mapper", UINT_MAX); mapper->init(); timer.finish(); mapper->run(stat, cfg); @@ -293,7 +293,7 @@ void align_queries(Consumer* output_file, Search::Config& cfg) const int64_t mem_limit = Util::String::interpret_number(config.memory_limit.get("16G")); pair query_range; - task_timer timer(nullptr, 3); + TaskTimer timer(nullptr, 3); if (!cfg.blocked_processing && !cfg.iterated()) cfg.db->init_random_access(cfg.current_query_block, 0, false); diff --git a/src/align/extend.cpp b/src/align/extend.cpp index e753d3a78..c924d71e5 100644 --- a/src/align/extend.cpp +++ b/src/align/extend.cpp @@ -252,7 +252,7 @@ pair, Stats> extend( } pair, Stats> extend(BlockId query_id, Search::Hit* begin, Search::Hit* end, const Search::Config &cfg, Statistics &stat, DP::Flags flags) { - task_timer timer(flag_any(flags, DP::Flags::PARALLEL) ? config.target_parallel_verbosity : UINT_MAX); + TaskTimer timer(flag_any(flags, DP::Flags::PARALLEL) ? config.target_parallel_verbosity : UINT_MAX); timer.go("Loading seed hits"); SeedHitList l = load_hits(begin, end, cfg.target->seqs()); stat.inc(Statistics::TARGET_HITS0, l.target_block_ids.size()); diff --git a/src/align/extend_chunk.h b/src/align/extend_chunk.h index 5699314e9..c526770fa 100644 --- a/src/align/extend_chunk.h +++ b/src/align/extend_chunk.h @@ -29,7 +29,7 @@ pair, Stats> extend(BlockId query_id, static const Loc GAPPED_FILTER_MIN_QLEN = 85; const int64_t n = seed_hits_end - seed_hits; stat.inc(Statistics::TARGET_HITS2, n); - task_timer timer(flag_any(flags, DP::Flags::PARALLEL) ? config.target_parallel_verbosity : UINT_MAX); + TaskTimer timer(flag_any(flags, DP::Flags::PARALLEL) ? config.target_parallel_verbosity : UINT_MAX); if (cfg.lazy_masking && !config.global_ranking_targets) stat.inc(Statistics::MASKED_LAZY, lazy_masking(target_block_ids, target_block_ids + n, *cfg.target, cfg.target_masking)); diff --git a/src/align/gapped_filter.cpp b/src/align/gapped_filter.cpp index 4ccccb3f3..2d455ad01 100644 --- a/src/align/gapped_filter.cpp +++ b/src/align/gapped_filter.cpp @@ -34,7 +34,7 @@ using std::pair; namespace Extension { -int gapped_filter(const SeedHit &hit, const LongScoreProfile *query_profile, const Sequence &target, int band, int window, std::function f) { +int gapped_filter(const SeedHit &hit, const LongScoreProfile *query_profile, const Sequence &target, int band, int window, std::function f) { const int slen = (int)target.length(); const int d = std::max(hit.diag() - band / 2, -(slen - 1)), j0 = std::max(hit.j - window, 0), diff --git a/src/align/gapped_score.cpp b/src/align/gapped_score.cpp index 5e5b4e8d1..9cfeb2c42 100644 --- a/src/align/gapped_score.cpp +++ b/src/align/gapped_score.cpp @@ -212,7 +212,7 @@ pair, Stats> align(const vector &targets, const Seque LongScoreProfile p; LongScoreProfile p8; const bool hauser_cbs = ::Stats::CBS::hauser(config.comp_based_stats); - task_timer timer; + TaskTimer timer; p = DP::make_profile16(query_seq[frame], hauser_cbs ? query_cb[frame].int8.data() : nullptr, 0); p8 = DP::make_profile8(query_seq[frame], hauser_cbs ? query_cb[frame].int8.data() : nullptr, 0); LongScoreProfile p_rev(p.reverse()); diff --git a/src/align/global_ranking/extend.cpp b/src/align/global_ranking/extend.cpp index f3732d6a6..ae5b3dc58 100644 --- a/src/align/global_ranking/extend.cpp +++ b/src/align/global_ranking/extend.cpp @@ -89,7 +89,7 @@ void align_worker(InputFile* query_list, const TargetMap* db2block_id, const Sea } void extend(SequenceFile& db, TempFile& merged_query_list, BitVector& ranking_db_filter, Search::Config& cfg, Consumer& master_out) { - task_timer timer("Loading reference sequences"); + TaskTimer timer("Loading reference sequences"); InputFile query_list(merged_query_list); db.set_seqinfo_ptr(0); cfg.target.reset(db.load_seqs(INT64_MAX, &ranking_db_filter, SequenceFile::LoadFlags::SEQS)); @@ -178,7 +178,7 @@ static BitVector db_filter(const Search::Config::RankingTable& table, size_t db_ } void extend(Search::Config& cfg, Consumer& out) { - task_timer timer("Listing target sequences"); + TaskTimer timer("Listing target sequences"); const BitVector filter = db_filter(*cfg.ranking_table, cfg.db->sequence_count()); timer.go("Loading target sequences"); cfg.db->set_seqinfo_ptr(0); diff --git a/src/align/global_ranking/table.cpp b/src/align/global_ranking/table.cpp index 7b17956ec..85da83d90 100644 --- a/src/align/global_ranking/table.cpp +++ b/src/align/global_ranking/table.cpp @@ -152,7 +152,7 @@ void update_table(Search::Config& cfg) { log_stream << "Seed hits = " << hits.size() << endl; if (hits.size() == 0) return; - task_timer timer("Sorting seed hits"); + TaskTimer timer("Sorting seed hits"); #if _MSC_FULL_VER == 191627042 merge_sort(hits.begin(), hits.end(), config.threads_, Search::Hit::CmpQueryTarget()); #else diff --git a/src/align/legacy/banded_swipe_pipeline.cpp b/src/align/legacy/banded_swipe_pipeline.cpp index b9802f28d..c80b70430 100644 --- a/src/align/legacy/banded_swipe_pipeline.cpp +++ b/src/align/legacy/banded_swipe_pipeline.cpp @@ -181,7 +181,7 @@ void build_ranking_worker(PtrVector<::Target>::iterator begin, PtrVector<::Targe void Pipeline::run(Statistics &stat, const Search::Config& cfg) { - task_timer timer("Init banded swipe pipeline", target_parallel ? 3 : UINT_MAX); + TaskTimer timer("Init banded swipe pipeline", target_parallel ? 3 : UINT_MAX); Config::set_option(config.padding, 32); if (n_targets() == 0) return; diff --git a/src/align/legacy/query_mapper.cpp b/src/align/legacy/query_mapper.cpp index b8e74ae17..c727941bc 100644 --- a/src/align/legacy/query_mapper.cpp +++ b/src/align/legacy/query_mapper.cpp @@ -99,7 +99,7 @@ QueryMapper::QueryMapper(size_t query_id, Search::Hit* begin, Search::Hit* end, void QueryMapper::init() { if(config.log_query) - std::cout << "Query = " << metadata.query->ids()[query_id] << '\t' << query_id << std::endl; + message_stream << "Query = " << metadata.query->ids()[query_id] << '\t' << query_id << std::endl; if (Stats::CBS::hauser(config.comp_based_stats)) for (int i = 0; i < align_mode.query_contexts; ++i) query_cb.emplace_back(query_seq(i)); diff --git a/src/align/output.cpp b/src/align/output.cpp index 2d3234bac..e8cdf9bb3 100644 --- a/src/align/output.cpp +++ b/src/align/output.cpp @@ -37,7 +37,7 @@ TextBuffer* generate_output(vector &targets, const Extension::Stats& stat std::unique_ptr f(cfg.output_format->clone()); size_t seek_pos = 0; unsigned n_hsp = 0, hit_hsps = 0; - Output::Info info{ cfg.query->seq_info(query_block_id), !targets.empty(), cfg.db.get(), *out, stats }; + Output::Info info{ cfg.query->seq_info(query_block_id), targets.empty(), cfg.db.get(), *out, stats }; TranslatedSequence query = query_seqs.translated_seq(align_mode.query_translated ? cfg.query->source_seqs()[query_block_id] : query_seqs[query_block_id], query_block_id * align_mode.query_contexts); const char *query_title = cfg.query->ids()[query_block_id]; const double query_self_aln_score = cfg.query->has_self_aln() ? cfg.query->self_aln_score(query_block_id) : 0.0; diff --git a/src/align/ungapped.cpp b/src/align/ungapped.cpp index c6eca77b5..3dc98a847 100644 --- a/src/align/ungapped.cpp +++ b/src/align/ungapped.cpp @@ -62,7 +62,7 @@ WorkTarget::WorkTarget(BlockId block_id, const Sequence& seq, int query_len, con WorkTarget ungapped_stage(FlatArray::DataIterator begin, FlatArray::DataIterator end, const Sequence *query_seq, const Bias_correction *query_cb, const ::Stats::Composition& query_comp, const int16_t** query_matrix, uint32_t block_id, Statistics& stat, const Block& targets, const Mode mode) { array, MAX_CONTEXT> diagonal_segments; - task_timer timer; + TaskTimer timer; const SequenceSet& ref_seqs = targets.seqs(), &ref_seqs_unmasked = targets.unmasked_seqs(); const bool masking = config.comp_based_stats == ::Stats::CBS::COMP_BASED_STATS_AND_MATRIX_ADJUST ? ::Stats::use_seg_masking(query_seq[0], ref_seqs_unmasked[block_id]) : true; WorkTarget target(block_id, masking ? ref_seqs[block_id] : ref_seqs_unmasked[block_id], ::Stats::count_true_aa(query_seq[0]), query_comp, query_matrix); diff --git a/src/basic/basic.cpp b/src/basic/basic.cpp index ef420eb02..06b71df4b 100644 --- a/src/basic/basic.cpp +++ b/src/basic/basic.cpp @@ -29,7 +29,7 @@ along with this program. If not, see . #include "../util/util.h" #include "../stats/standard_matrix.h" -const char* Const::version_string = "2.1.5"; +const char* Const::version_string = "2.1.6"; using std::string; using std::vector; using std::count; diff --git a/src/basic/config.cpp b/src/basic/config.cpp index e4f121aaa..fe0371023 100644 --- a/src/basic/config.cpp +++ b/src/basic/config.cpp @@ -21,6 +21,7 @@ along with this program. If not, see . ****/ #include +#include #include #include #include @@ -50,12 +51,12 @@ along with this program. If not, see . using std::thread; using std::stringstream; using std::endl; -using std::cerr; using std::ostream; -using std::cout; using std::unique_ptr; using std::string; using std::pair; +using std::cout; +using std::cerr; const EMap EnumTraits::to_string = { { Sensitivity::FASTER, "faster" }, @@ -115,61 +116,14 @@ pair block_size(int64_t memory_limit, Sensitivity s, bool lin) { return { std::max(b, 0.001), c }; } -void print_warnings() { - if (config.sensitivity >= Sensitivity::VERY_SENSITIVE || config.verbosity == 0 || config.swipe_all) - return; - if (config.command != Config::blastp && config.command != Config::blastx && config.command != Config::blastn) - return; - const double ram = total_ram(); - unsigned b = 2, c = 4; - stringstream msg; - if (config.sensitivity >= Sensitivity::SENSITIVE) { - if (ram >= 63) { - c = 1; - if(c < config.lowmem_) - msg << "use this parameter for better performance: -c1"; - } - } - else { - if (ram >= 511) { - b = 12; - c = 1; - } - else if (ram >= 255) { - b = 8; - c = 1; - } - else if (ram >= 127) { - b = 5; - c = 1; - } - else if (ram >= 63) { - b = 6; - } - else if (ram >= 31) { - b = 4; - } - if ((b > 2 && b > config.chunk_size) || c < config.lowmem_) { - msg << "increase the block size for better performance using these parameters : -b" << b; - if (c != 4) - msg << " -c" << c; - } - } - if (!msg.str().empty()) { - set_color(Color::YELLOW, true); - cerr << "The host system is detected to have " << (size_t)ram << " GB of RAM. It is recommended to " << msg.str() << endl; - reset_color(true); - } -} - -template -_t set_string_option(const string& s, const string& name, const vector>& values) { +template +T set_string_option(const string& s, const string& name, const vector>& values) { if (s.empty()) - return (_t)0; + return (T)0; for (auto& i : values) if (s == i.first) return i.second; - throw std::runtime_error("Invalid argument for option " + name + ". Allowed values are:" + std::accumulate(values.begin(), values.end(), string(), [](const string& s, const pair& v) { return s + ' ' + v.first; })); + throw std::runtime_error("Invalid argument for option " + name + ". Allowed values are:" + std::accumulate(values.begin(), values.end(), string(), [](const string& s, const pair& v) { return s + ' ' + v.first; })); } void Config::set_sens(Sensitivity sens) { diff --git a/src/basic/const.h b/src/basic/const.h index 5aa06eaaf..a70d0cd05 100644 --- a/src/basic/const.h +++ b/src/basic/const.h @@ -25,7 +25,7 @@ struct Const { enum { - build_version = 159, + build_version = 160, #ifdef SINGLE_THREADED seedp_bits = 0, #else diff --git a/src/chaining/greedy_align.cpp b/src/chaining/greedy_align.cpp index de13d61bf..f405af661 100644 --- a/src/chaining/greedy_align.cpp +++ b/src/chaining/greedy_align.cpp @@ -24,6 +24,7 @@ along with this program. If not, see . #include #include #include +#include #include "../basic/sequence.h" #include "../basic/match.h" #include "../stats/score_matrix.h" diff --git a/src/cluster/cascaded/cascaded.cpp b/src/cluster/cascaded/cascaded.cpp index 337fe1da7..93622e658 100644 --- a/src/cluster/cascaded/cascaded.cpp +++ b/src/cluster/cascaded/cascaded.cpp @@ -77,7 +77,7 @@ vector cluster(shared_ptr& db, const shared_ptrcount << endl; - task_timer timer("Allocating buffers"); + TaskTimer timer("Allocating buffers"); vector edges(callback->count); timer.go("Loading edges"); InputFile f(callback->edge_file); @@ -113,7 +113,7 @@ vector cascaded(shared_ptr& db, bool linear) { iota(centroids.begin(), centroids.end(), 0); for (int i = 0; i < (int)steps.size(); i++) { - task_timer timer; + TaskTimer timer; config.lin_stage1 = ends_with(steps[i], "_lin"); config.sensitivity = from_string(rstrip(steps[i], "_lin")); tie(centroids, *oid_filter) = update_clustering(*oid_filter, diff --git a/src/cluster/cascaded/recluster.cpp b/src/cluster/cascaded/recluster.cpp index 6f1490d51..cf692d492 100644 --- a/src/cluster/cascaded/recluster.cpp +++ b/src/cluster/cascaded/recluster.cpp @@ -41,7 +41,7 @@ using std::pair; namespace Cluster { static vector recluster(shared_ptr& db, const vector& clustering, int iteration) { - task_timer timer(("*** Initializing recluster iteration " + to_string(iteration + 1)).c_str()); + TaskTimer timer(("*** Initializing recluster iteration " + to_string(iteration + 1)).c_str()); FlatArray clusters; vector centroids; @@ -178,7 +178,7 @@ void recluster() { //config.strict_gvc = true; message_stream << "Coverage cutoff: " << config.member_cover << '%' << endl; - task_timer timer("Opening the database"); + TaskTimer timer("Opening the database"); shared_ptr db(SequenceFile::auto_create({ config.database }, SequenceFile::Flags::NEED_LETTER_COUNT | SequenceFile::Flags::ACC_TO_OID_MAPPING | SequenceFile::Flags::OID_TO_ACC_MAPPING, SequenceFile::Metadata())); config.db_size = db->letters(); timer.finish(); diff --git a/src/cluster/cascaded/wrapper.cpp b/src/cluster/cascaded/wrapper.cpp index c0ff249c5..e6fe3d397 100644 --- a/src/cluster/cascaded/wrapper.cpp +++ b/src/cluster/cascaded/wrapper.cpp @@ -34,7 +34,6 @@ using std::string; using std::vector; using std::ostream; using std::ofstream; -using std::cout; using std::numeric_limits; using std::tuple; using std::get; @@ -67,7 +66,7 @@ struct Config { double block_size; std::unique_ptr output_format; std::shared_ptr centroids; - task_timer total_time; + TaskTimer total_time; int64_t seqs_processed; int64_t letters_processed; std::vector centroid2oid; @@ -158,8 +157,8 @@ void Cascaded::run() { config.database.require(); init_thresholds(); config.hamming_ext = config.approx_min_id >= 50.0; - task_timer total_time; - task_timer timer("Opening the input file"); + TaskTimer total_time; + TaskTimer timer("Opening the input file"); shared_ptr db(SequenceFile::auto_create({ config.database }, SequenceFile::Flags::NEED_LETTER_COUNT | SequenceFile::Flags::OID_TO_ACC_MAPPING)); if (db->type() == SequenceFile::Type::BLAST) throw std::runtime_error("Clustering is not supported for BLAST databases."); diff --git a/src/cluster/helpers.cpp b/src/cluster/helpers.cpp index 0329c5423..279062881 100644 --- a/src/cluster/helpers.cpp +++ b/src/cluster/helpers.cpp @@ -33,7 +33,6 @@ using std::pair; using std::endl; using std::vector; using std::ofstream; -using std::cout; using std::ostream; using std::runtime_error; using std::string; diff --git a/src/cluster/incremental/common.h b/src/cluster/incremental/common.h index 305076e22..d57f59f96 100644 --- a/src/cluster/incremental/common.h +++ b/src/cluster/incremental/common.h @@ -17,7 +17,7 @@ struct Config { std::unique_ptr db; std::shared_ptr centroids; std::unique_ptr output_file; - task_timer total_time; + TaskTimer total_time; int64_t seqs_processed; int64_t letters_processed; std::vector oid2centroid; diff --git a/src/cluster/incremental/run.cpp b/src/cluster/incremental/run.cpp index a93820eca..16261fdc8 100644 --- a/src/cluster/incremental/run.cpp +++ b/src/cluster/incremental/run.cpp @@ -43,7 +43,7 @@ struct BestCentroid : public Consumer, public vector { static void self_align(Block& block, Config& cfg) { using Edge = Util::Algo::Edge; - task_timer timer(("CLUSTER Searching " + std::to_string(block.seqs().size()) + " unaligned sequences").c_str(), cfg.message_stream); + TaskTimer timer(("CLUSTER Searching " + std::to_string(block.seqs().size()) + " unaligned sequences").c_str(), cfg.message_stream); shared_ptr neighbors(new Callback()); shared_ptr unaligned_wrapper(new BlockWrapper(block)); config.self = true; @@ -101,7 +101,7 @@ static void search_vs_centroids(Block& block, const int round, Config& cfg) { cfg.message_stream << "CLUSTER searching vs. centroids sensitivity = " << to_string(cfg.sens[round]) << " #sequences = " << block.seqs().size() << " , #centroids = " << cfg.centroids->sequence_count() << endl; cfg.status_msg(); - task_timer timer(("Searching " + std::to_string(block.seqs().size()) + " against centroid sequences (" + to_string(cfg.sens[round]) + ")").c_str(), cfg.message_stream); + TaskTimer timer(("Searching " + std::to_string(block.seqs().size()) + " against centroid sequences (" + to_string(cfg.sens[round]) + ")").c_str(), cfg.message_stream); shared_ptr block_wrapper(new BlockWrapper(block)); shared_ptr best_centroid(new BestCentroid(block.seqs().size())); config.self = false; @@ -147,7 +147,7 @@ void Algo::run() { if (!config.resume.empty()) cfg.load_state(); - task_timer timer("CLUSTER Opening the input file", cfg.message_stream); + TaskTimer timer("CLUSTER Opening the input file", cfg.message_stream); const int64_t block_size = (int64_t)(cfg.block_size * 1e9), cache_limit = 0; // block_size; config.output_format = { "edge" }; unique_ptr block; diff --git a/src/cluster/output.cpp b/src/cluster/output.cpp index 6fa3e271e..4180379a9 100644 --- a/src/cluster/output.cpp +++ b/src/cluster/output.cpp @@ -130,7 +130,7 @@ void realign(const FlatArray& clusters, const vector& centroids, Seque score_matrix.set_db_letters(config.db_size ? config.db_size : db.letters()); OId centroid_offset = 0; int n1 = 0; - task_timer timer; + TaskTimer timer; Cfg cfg{ hsp_values, flag_any(db.format_flags(), SequenceFile::FormatFlags::TITLES_LAZY), clusters, centroids, db }; SequenceFile::LoadFlags flags = SequenceFile::LoadFlags::SEQS | SequenceFile::LoadFlags::CONVERT_ALPHABET | SequenceFile::LoadFlags::NO_CLOSE_WEAKLY; @@ -179,7 +179,7 @@ void realign(const FlatArray& clusters, const vector& centroids, Seque } void realign(const vector& clustering, SequenceFile& db, function& callback, HspValues hsp_values) { - task_timer timer("Finding clusters"); + TaskTimer timer("Finding clusters"); FlatArray clusters; vector centroids; tie(clusters, centroids) = cluster_sorted(clustering); diff --git a/src/cluster/realign.cpp b/src/cluster/realign.cpp index e74f94a9a..8e42f778d 100644 --- a/src/cluster/realign.cpp +++ b/src/cluster/realign.cpp @@ -52,7 +52,7 @@ void realign() { throw std::runtime_error("Unsupported output field for the realign workflow: " + Blast_tab_format::field_def.at(i).key); } - task_timer timer("Opening the output file"); + TaskTimer timer("Opening the output file"); OutputFile out(config.output_file); if (Blast_tab_format::header_format(Config::cluster) == Header::SIMPLE) dynamic_cast(output_format.get())->output_header(out, true); diff --git a/src/cluster/reassign.cpp b/src/cluster/reassign.cpp index d7d1f2e5e..699469069 100644 --- a/src/cluster/reassign.cpp +++ b/src/cluster/reassign.cpp @@ -41,7 +41,7 @@ void reassign() { config.clustering.require(); message_stream << "Coverage cutoff: " << config.member_cover << '%' << endl; - task_timer timer("Opening the database"); + TaskTimer timer("Opening the database"); shared_ptr db(SequenceFile::auto_create({ config.database }, SequenceFile::Flags::NEED_LETTER_COUNT | SequenceFile::Flags::ACC_TO_OID_MAPPING | SequenceFile::Flags::OID_TO_ACC_MAPPING, SequenceFile::Metadata())); config.db_size = db->letters(); timer.finish(); diff --git a/src/contrib/mcl/sparse_matrix_stream.h b/src/contrib/mcl/sparse_matrix_stream.h index 7dee684b5..6e9f5debe 100644 --- a/src/contrib/mcl/sparse_matrix_stream.h +++ b/src/contrib/mcl/sparse_matrix_stream.h @@ -25,7 +25,7 @@ along with this program. If not, see . #include #include #include -#include +// #include #include "../../util/data_structures/lazy_disjoint_set.h" #include "../util/io/consumer.h" #include "../../cluster/cluster.h" diff --git a/src/data/blastdb/blastdb.cpp b/src/data/blastdb/blastdb.cpp index a8e0d7c00..61e6bfeb2 100644 --- a/src/data/blastdb/blastdb.cpp +++ b/src/data/blastdb/blastdb.cpp @@ -403,7 +403,7 @@ void BlastDB::init_random_access(const size_t query_block, const size_t ref_bloc load_dictionary(query_block, ref_block); if (flag_any(flags_, Flags::FULL_TITLES)) return; - task_timer timer("Loading accessions"); + TaskTimer timer("Loading accessions"); vector paths; if(db_.get()) db_->FindVolumePaths(paths); diff --git a/src/data/dmnd/dmnd.cpp b/src/data/dmnd/dmnd.cpp index 93c38cd77..55aa92949 100644 --- a/src/data/dmnd/dmnd.cpp +++ b/src/data/dmnd/dmnd.cpp @@ -44,7 +44,6 @@ along with this program. If not, see . using std::tuple; using std::string; using std::list; -using std::cout; using std::endl; using std::unique_ptr; using std::pair; @@ -235,8 +234,8 @@ void DatabaseFile::make_db() else message_stream << "Database input file: " << input_file_name << endl; - task_timer total; - task_timer timer("Opening the database file", true); + TaskTimer total; + TaskTimer timer("Opening the database file", true); value_traits = (config.dbtype == SequenceType::amino_acid) ? amino_acid_traits : nucleotide_traits; FastaFile db_file({ input_file_name }, Metadata (), Flags::NONE, value_traits); @@ -413,7 +412,7 @@ bool DatabaseFile::is_diamond_db(const string &file_name) { void DatabaseFile::create_partition_fixednumber(size_t n) { size_t max_letters_balanced = static_cast(std::ceil(static_cast(ref_header.letters)/static_cast(n))); - cout << "Fixed number partitioning using " << max_letters_balanced << " (" << n << ")" << endl; + message_stream << "Fixed number partitioning using " << max_letters_balanced << " (" << n << ")" << endl; this->create_partition(max_letters_balanced); } @@ -425,7 +424,7 @@ void DatabaseFile::create_partition_balanced(size_t max_letters) { } void DatabaseFile::create_partition(size_t max_letters) { - task_timer timer("Create partition of DatabaseFile"); + TaskTimer timer("Create partition of DatabaseFile"); size_t letters = 0, seqs = 0, total_seqs = 0; int i_chunk = 0; @@ -647,7 +646,7 @@ const char* const SEQID_EXTENSION = ".seqid"; void DatabaseFile::prep_db() { config.database.require(); - task_timer timer("Opening the database"); + TaskTimer timer("Opening the database"); DatabaseFile db(config.database); timer.go("Writing seqid list"); db.make_seqid_list(); // db.file_name() + SEQID_EXTENSION); diff --git a/src/data/enum_seeds.h b/src/data/enum_seeds.h index 921c55826..63de63b4c 100644 --- a/src/data/enum_seeds.h +++ b/src/data/enum_seeds.h @@ -107,7 +107,7 @@ void enum_seeds_contiguous(SequenceSet* seqs, F* f, unsigned begin, unsigned end const Sequence seq = (*seqs)[i]; if (seq.length() < It::length()) continue; It it(seq); - size_t j = 0; + Loc j = 0; while (it.good()) { if (it.get(key)) if (filter->contains(key, 0)) diff --git a/src/data/fasta/fasta_file.cpp b/src/data/fasta/fasta_file.cpp index 536f9e3a6..ee7ad38ef 100644 --- a/src/data/fasta/fasta_file.cpp +++ b/src/data/fasta/fasta_file.cpp @@ -47,7 +47,7 @@ FastaFile::FastaFile(const std::vector& file_name, Metadata metadat return; #if EXTRA /*if (exists(file_name.front() + ".fai")) { - task_timer timer("Reading faidx file", 3); + TaskTimer timer("Reading faidx file", 3); int64_t seqs = 0, letters = 0; string acc; for (const auto& f : file_name) { @@ -292,7 +292,7 @@ void FastaFile::write_seq(const Sequence& seq, const std::string& id) { } void FastaFile::prep_db(const string& path) { - task_timer timer("Indexing FASTA file"); + TaskTimer timer("Indexing FASTA file"); TextInputFile f(path); FASTA_format fmt; vector seq; diff --git a/src/data/index.cpp b/src/data/index.cpp index ef328756a..9efb6acb9 100644 --- a/src/data/index.cpp +++ b/src/data/index.cpp @@ -18,7 +18,7 @@ void makeindex() { Block* block = db.load_seqs(MAX_LETTERS, nullptr, SequenceFile::LoadFlags::SEQS); - task_timer timer("Building index"); + TaskTimer timer("Building index"); HashedSeedSet index(*block, nullptr, 0.0, Search::soft_masking_algo(Search::sensitivity_traits[(int)align_mode.sequence_type].at(config.sensitivity))); timer.go("Writing to disk"); diff --git a/src/data/sequence_file.cpp b/src/data/sequence_file.cpp index e4ef162c6..6e883e467 100644 --- a/src/data/sequence_file.cpp +++ b/src/data/sequence_file.cpp @@ -408,7 +408,7 @@ void SequenceFile::load_dictionary(const size_t query_block, const size_t ref_bl { if (!dict_file_ && !config.multiprocessing) return; - task_timer timer("Loading dictionary", 3); + TaskTimer timer("Loading dictionary", 3); if (config.multiprocessing) { dict_oid_ = vector>(ref_blocks); if (flag_any(flags_, Flags::SELF_ALN_SCORES)) diff --git a/src/data/taxon_list.cpp b/src/data/taxon_list.cpp index 1871b80d4..f3621d30e 100644 --- a/src/data/taxon_list.cpp +++ b/src/data/taxon_list.cpp @@ -95,7 +95,7 @@ static void load_mapping_file(ExternalSorter>& sorter) void TaxonList::build(OutputFile &db, ExternalSorter>& acc2oid, OId seqs, Util::Table& stats) { - task_timer timer("Loading taxonomy mapping file"); + TaskTimer timer("Loading taxonomy mapping file"); ExternalSorter> acc2taxid; load_mapping_file(acc2taxid); diff --git a/src/data/taxonomy.cpp b/src/data/taxonomy.cpp index df3711fd9..afd705a89 100644 --- a/src/data/taxonomy.cpp +++ b/src/data/taxonomy.cpp @@ -105,7 +105,7 @@ size_t Taxonomy::load_names() { void Taxonomy::init() { - task_timer timer; + TaskTimer timer; if (!config.namesdmp.empty()) { timer.go("Loading taxonomy names"); size_t n = load_names(); diff --git a/src/data/taxonomy_nodes.cpp b/src/data/taxonomy_nodes.cpp index de826b327..1ac631ce8 100644 --- a/src/data/taxonomy_nodes.cpp +++ b/src/data/taxonomy_nodes.cpp @@ -46,7 +46,7 @@ TaxonomyNodes::TaxonomyNodes(const string& file_name, const bool init_cache) void TaxonomyNodes::save(Serializer &out) { - task_timer timer("Building taxonomy nodes"); + TaskTimer timer("Building taxonomy nodes"); out.unset(Serializer::VARINT); out << parent_; out.write_raw(rank_); diff --git a/src/dna/dna_index.cpp b/src/dna/dna_index.cpp index f90faecf5..1bffa750c 100644 --- a/src/dna/dna_index.cpp +++ b/src/dna/dna_index.cpp @@ -41,7 +41,7 @@ ref_buffer_(ref_buffer) SequenceSet &ref_seqs = cfg.target->seqs(); const SeedHistogram &ref_hst = cfg.target->hst(); - task_timer timer("Building reference seed array", true); + TaskTimer timer("Building reference seed array", true); const EnumCfg enum_ref{&ref_hst.partition(), 0, 1, cfg.seed_encoding, nullptr, false, false, diff --git a/src/dp/dp.h b/src/dp/dp.h index b59085469..0072fad35 100644 --- a/src/dp/dp.h +++ b/src/dp/dp.h @@ -207,13 +207,13 @@ namespace Swipe { namespace BandedSwipe { -DECL_DISPATCH(std::list, swipe, (const Targets& targets, Params& params)) -DECL_DISPATCH(std::list, swipe_set, (const SequenceSet::ConstIterator begin, const SequenceSet::ConstIterator end, Params& params)) -DECL_DISPATCH(unsigned, bin, (HspValues v, int query_len, int score, int ungapped_score, const int64_t dp_size, unsigned score_width, const Loc mismatch_est)) -DECL_DISPATCH(std::list, anchored_swipe, (Targets& targets, const DP::AnchoredSwipe::Config& cfg)) +std::list swipe(const Targets& targets, Params& params); +std::list swipe_set(const SequenceSet::ConstIterator begin, const SequenceSet::ConstIterator end, Params& params); +unsigned bin(HspValues v, int query_len, int score, int ungapped_score, const int64_t dp_size, unsigned score_width, const Loc mismatch_est); +std::list anchored_swipe(Targets& targets, const DP::AnchoredSwipe::Config& cfg); } } -DECL_DISPATCH(std::list, banded_3frame_swipe, (const TranslatedSequence &query, Strand strand, std::vector::iterator target_begin, std::vector::iterator target_end, DpStat &stat, bool score_only, bool parallel)) +std::list banded_3frame_swipe(const TranslatedSequence& query, Strand strand, std::vector::iterator target_begin, std::vector::iterator target_end, DpStat& stat, bool score_only, bool parallel); diff --git a/src/dp/pfscan/pfscan.cpp b/src/dp/pfscan/pfscan.cpp index 27cd3abaa..cc45163eb 100644 --- a/src/dp/pfscan/pfscan.cpp +++ b/src/dp/pfscan/pfscan.cpp @@ -2,6 +2,7 @@ #include "smith_waterman.h" #include "../../util/sequence/sequence.h" #include "../ungapped.h" +#include "../../util/simd/dispatch.h" using std::min; @@ -37,7 +38,7 @@ static Hsp align_dispatch_score_16(const Config& cfg) { if (h.score < SCHAR_MAX) cfg.stats.inc(Statistics::EXT_WASTED_16); if (h.score == (Score)SHRT_MAX - cfg16.score_bias) { - task_timer timer; + TaskTimer timer; using SC = StaticConfig; const Hsp h = banded_smith_waterman(cfg); cfg.stats.inc(Statistics::TIME_EXT_32, timer.microseconds()); @@ -51,7 +52,7 @@ static Hsp align_dispatch_score_16(const Config& cfg) { static Hsp align_dispatch_score(const Config& cfg) { if (cfg.query.length() >= SHRT_MAX || cfg.target.length() >= SHRT_MAX) { - task_timer timer; + TaskTimer timer; using SC = StaticConfig; const Hsp h = banded_smith_waterman(cfg); cfg.stats.inc(Statistics::TIME_EXT_32, timer.microseconds()); @@ -185,4 +186,8 @@ Hsp align_anchored(const Anchor& anchor, const Config& cfg) { return h; } -}}} \ No newline at end of file +} + +DISPATCH_2(Hsp, align_anchored, const Anchor&, anchor, const Config&, cfg) + +}} \ No newline at end of file diff --git a/src/dp/pfscan/pfscan.h b/src/dp/pfscan/pfscan.h index 9fb6aa178..e161df2d1 100644 --- a/src/dp/pfscan/pfscan.h +++ b/src/dp/pfscan/pfscan.h @@ -26,10 +26,10 @@ struct Config { } }; -#ifdef __SSE2__ -DECL_DISPATCH(Hsp, align16, (const Config& cfg)) -DECL_DISPATCH(Hsp, align8, (const Config& cfg)) -#endif -DECL_DISPATCH(Hsp, align_anchored, (const Anchor& anchor, const Config& cfg)) +//#ifdef __SSE2__ +//DECL_DISPATCH(Hsp, align16, (const Config& cfg)) +//DECL_DISPATCH(Hsp, align8, (const Config& cfg)) +//#endif +Hsp align_anchored(const Anchor& anchor, const Config& cfg); }} \ No newline at end of file diff --git a/src/dp/pfscan/smith_waterman.h b/src/dp/pfscan/smith_waterman.h index 84c5447da..3ec2a88ab 100644 --- a/src/dp/pfscan/smith_waterman.h +++ b/src/dp/pfscan/smith_waterman.h @@ -101,7 +101,7 @@ Hsp FLATTEN banded_smith_waterman(const Config& cfg) { constexpr Loc SCORE_MIN = (Loc)numeric_limits::min(), SCORE_MAX = (Loc)numeric_limits::max(); const Loc channels = ::DISPATCH_ARCH::ScoreTraits::CHANNELS; - task_timer timer; + TaskTimer timer; const Loc band = cfg.d_end - cfg.d_begin, qlen = cfg.query.length(), diff --git a/src/dp/scan_diags.cpp b/src/dp/scan_diags.cpp index 74c417c43..471d9fa19 100644 --- a/src/dp/scan_diags.cpp +++ b/src/dp/scan_diags.cpp @@ -23,6 +23,7 @@ along with this program. If not, see . #include "../util/simd.h" #include "dp.h" #include "score_vector_int8.h" +#include "../util/simd/dispatch.h" using namespace DISPATCH_ARCH; @@ -297,4 +298,11 @@ int diag_alignment(const int* s, int count) { return best; } -}} \ No newline at end of file +} + +DISPATCH_6V(scan_diags128, const LongScoreProfile&, qp, Sequence, s, int, d_begin, int, j_begin, int, j_end, int*, out) +DISPATCH_6V(scan_diags64, const LongScoreProfile&, qp, Sequence, s, int, d_begin, int, j_begin, int, j_end, int*, out) +DISPATCH_7V(scan_diags, const LongScoreProfile&, qp, Sequence, s, int, d_begin, int, d_end, int, j_begin, int, j_end, int*, out) +DISPATCH_2(int, diag_alignment, const int*, s, int, count) + +} \ No newline at end of file diff --git a/src/dp/scan_diags.h b/src/dp/scan_diags.h index b2cca2088..ab376a138 100644 --- a/src/dp/scan_diags.h +++ b/src/dp/scan_diags.h @@ -3,9 +3,9 @@ namespace DP { -DECL_DISPATCH(void, scan_diags128, (const LongScoreProfile& qp, Sequence s, int d_begin, int j_begin, int j_end, int* out)) -DECL_DISPATCH(void, scan_diags64, (const LongScoreProfile& qp, Sequence s, int d_begin, int j_begin, int j_end, int* out)) -DECL_DISPATCH(void, scan_diags, (const LongScoreProfile& qp, Sequence s, int d_begin, int d_end, int j_begin, int j_end, int* out)) -DECL_DISPATCH(int, diag_alignment, (const int* s, int count)) +void scan_diags128(const LongScoreProfile& qp, Sequence s, int d_begin, int j_begin, int j_end, int* out); +void scan_diags64(const LongScoreProfile& qp, Sequence s, int d_begin, int j_begin, int j_end, int* out); +void scan_diags(const LongScoreProfile& qp, Sequence s, int d_begin, int d_end, int j_begin, int j_end, int* out); +int diag_alignment(const int* s, int count); } \ No newline at end of file diff --git a/src/dp/score_profile.cpp b/src/dp/score_profile.cpp index 6f67168e2..72653eea0 100644 --- a/src/dp/score_profile.cpp +++ b/src/dp/score_profile.cpp @@ -3,6 +3,7 @@ #include "score_vector_int8.h" #include "score_vector_int16.h" #include "../util/util.h" +#include "../util/simd/dispatch.h" using std::array; @@ -51,4 +52,9 @@ LongScoreProfile make_profile16(Sequence seq, const int8_t* cbs, int64_ return make_profile(seq, cbs, padding); } -}} \ No newline at end of file +} + +DISPATCH_3(LongScoreProfile, make_profile8, Sequence, seq, const int8_t*, cbs, int64_t, padding); +DISPATCH_3(LongScoreProfile, make_profile16, Sequence, seq, const int8_t*, cbs, int64_t, padding); + +} \ No newline at end of file diff --git a/src/dp/score_profile.h b/src/dp/score_profile.h index c98bbd3bb..37bcb9025 100644 --- a/src/dp/score_profile.h +++ b/src/dp/score_profile.h @@ -59,7 +59,7 @@ struct LongScoreProfile namespace DP { -DECL_DISPATCH(LongScoreProfile, make_profile8, (Sequence seq, const int8_t* cbs, int64_t padding)) -DECL_DISPATCH(LongScoreProfile, make_profile16, (Sequence seq, const int8_t* cbs, int64_t padding)) +LongScoreProfile make_profile8(Sequence seq, const int8_t* cbs, int64_t padding); +LongScoreProfile make_profile16(Sequence seq, const int8_t* cbs, int64_t padding); } \ No newline at end of file diff --git a/src/dp/swipe/anchored_wrapper.cpp b/src/dp/swipe/anchored_wrapper.cpp index 2a2cb242b..26969453e 100644 --- a/src/dp/swipe/anchored_wrapper.cpp +++ b/src/dp/swipe/anchored_wrapper.cpp @@ -5,6 +5,7 @@ #include "../score_vector_int8.h" #include "../score_vector_int16.h" #include "../dp/ungapped.h" +#include "../util/simd/dispatch.h" using std::list; using std::vector; @@ -125,7 +126,7 @@ static void swipe_threads(DP::AnchoredSwipe::Target* targets, int64_t c } list anchored_swipe(Targets& targets, const DP::AnchoredSwipe::Config& cfg) { - task_timer total; + TaskTimer total; TargetVector target_vec; int64_t target_count = 0, target_len = 0; @@ -137,7 +138,7 @@ list anchored_swipe(Targets& targets, const DP::AnchoredSwipe::Config& cfg) max_target_len = std::max(max_target_len, t.seq.length()); } - task_timer timer; + TaskTimer timer; //target_vec.int8.reserve(target_count * 2); target_vec.int16.reserve(target_count * 2); cfg.stats.inc(Statistics::TIME_ANCHORED_SWIPE_ALLOC, timer.microseconds()); @@ -233,4 +234,8 @@ list anchored_swipe(Targets& targets, const DP::AnchoredSwipe::Config& cfg) return out; } -}}} \ No newline at end of file +} + +DISPATCH_2(std::list, anchored_swipe, Targets&, targets, const DP::AnchoredSwipe::Config&, cfg) + +}} \ No newline at end of file diff --git a/src/dp/swipe/banded_3frame_swipe.cpp b/src/dp/swipe/banded_3frame_swipe.cpp index 3d5c32ac6..a67fdbac5 100644 --- a/src/dp/swipe/banded_3frame_swipe.cpp +++ b/src/dp/swipe/banded_3frame_swipe.cpp @@ -26,6 +26,7 @@ along with this program. If not, see . #include "target_iterator.h" #include "../../util/data_structures/mem_buffer.h" #include "../score_vector_int16.h" +#include "../util/simd/dispatch.h" using std::list; using std::thread; @@ -35,17 +36,17 @@ using std::vector; namespace DISPATCH_ARCH { -template +template struct Banded3FrameSwipeMatrix { struct ColumnIterator { - ColumnIterator(_sv* hgap_front, _sv* score_front) : + ColumnIterator(Sv* hgap_front, Sv* score_front) : hgap_ptr_(hgap_front), score_ptr_(score_front) { - sm4 = ScoreTraits<_sv>::zero(); + sm4 = ScoreTraits::zero(); sm3 = *score_ptr_; sm2 = *(score_ptr_ + 1); } @@ -56,26 +57,26 @@ struct Banded3FrameSwipeMatrix sm3 = sm2; sm2 = *(score_ptr_ + 1); } - inline _sv hgap() const + inline Sv hgap() const { return *(hgap_ptr_ + 3); } - inline void set_hgap(const _sv& x) + inline void set_hgap(const Sv& x) { *hgap_ptr_ = x; } - inline void set_score(const _sv& x) + inline void set_score(const Sv& x) { *score_ptr_ = x; } void set_zero() { - *(score_ptr_ - 1) = ScoreTraits<_sv>::zero(); - *(score_ptr_ - 2) = ScoreTraits<_sv>::zero(); - *(score_ptr_ - 3) = ScoreTraits<_sv>::zero(); + *(score_ptr_ - 1) = ScoreTraits::zero(); + *(score_ptr_ - 2) = ScoreTraits::zero(); + *(score_ptr_ - 3) = ScoreTraits::zero(); } - _sv *hgap_ptr_, *score_ptr_; - _sv sm4, sm3, sm2; + Sv *hgap_ptr_, *score_ptr_; + Sv sm4, sm3, sm2; }; Banded3FrameSwipeMatrix(size_t band, size_t cols) : @@ -83,8 +84,8 @@ struct Banded3FrameSwipeMatrix { hgap_.resize(band + 3); score_.resize(band + 1); - std::fill(hgap_.begin(), hgap_.end(), _sv()); - std::fill(score_.begin(), score_.end(), _sv()); + std::fill(hgap_.begin(), hgap_.end(), Sv()); + std::fill(score_.begin(), score_.end(), Sv()); } inline ColumnIterator begin(size_t offset, size_t col) @@ -99,24 +100,28 @@ struct Banded3FrameSwipeMatrix private: const size_t band_; - static thread_local MemBuffer<_sv> hgap_, score_; +#ifdef USE_TLS + static thread_local MemBuffer hgap_, score_; +#else + MemBuffer hgap_, score_; +#endif }; -template +template struct Banded3FrameSwipeTracebackMatrix { - typedef typename ScoreTraits<_sv>::Score Score; + typedef typename ScoreTraits::Score Score; struct ColumnIterator { - ColumnIterator(_sv* hgap_front, _sv* score_front, _sv* score_front1) : + ColumnIterator(Sv* hgap_front, Sv* score_front, Sv* score_front1) : hgap_ptr_(hgap_front), score_ptr_(score_front), score_ptr1_(score_front1) { - sm4 = ScoreTraits<_sv>::zero(); + sm4 = ScoreTraits::zero(); sm3 = *(score_ptr_++); sm2 = *(score_ptr_); } @@ -127,26 +132,26 @@ struct Banded3FrameSwipeTracebackMatrix sm3 = sm2; sm2 = *score_ptr_; } - inline _sv hgap() const + inline Sv hgap() const { return *(hgap_ptr_ + 3); } - inline void set_hgap(const _sv& x) + inline void set_hgap(const Sv& x) { *hgap_ptr_ = x; } - inline void set_score(const _sv& x) + inline void set_score(const Sv& x) { *score_ptr1_ = x; } void set_zero() { - *(score_ptr1_ - 1) = ScoreTraits<_sv>::zero(); - *(score_ptr1_ - 2) = ScoreTraits<_sv>::zero(); - *(score_ptr1_ - 3) = ScoreTraits<_sv>::zero(); + *(score_ptr1_ - 1) = ScoreTraits::zero(); + *(score_ptr1_ - 2) = ScoreTraits::zero(); + *(score_ptr1_ - 3) = ScoreTraits::zero(); } - _sv *hgap_ptr_, *score_ptr_, *score_ptr1_; - _sv sm4, sm3, sm2; + Sv* hgap_ptr_, *score_ptr_, *score_ptr1_; + Sv sm4, sm3, sm2; }; struct TracebackIterator @@ -166,26 +171,26 @@ struct Banded3FrameSwipeTracebackMatrix } Score sm3() const { - return *(score_ - (band_ + 1) * ScoreTraits<_sv>::CHANNELS); + return *(score_ - (band_ + 1) * ScoreTraits::CHANNELS); } Score sm4() const { - return *(score_ - (band_ + 2) * ScoreTraits<_sv>::CHANNELS); + return *(score_ - (band_ + 2) * ScoreTraits::CHANNELS); } Score sm2() const { - return *(score_ - band_ * ScoreTraits<_sv>::CHANNELS); + return *(score_ - band_ * ScoreTraits::CHANNELS); } void walk_diagonal() { - score_ -= (band_ + 1) * ScoreTraits<_sv>::CHANNELS; + score_ -= (band_ + 1) * ScoreTraits::CHANNELS; --i; --j; assert(i >= -1 && j >= -1); } void walk_forward_shift() { - score_ -= (band_ + 2) * ScoreTraits<_sv>::CHANNELS; + score_ -= (band_ + 2) * ScoreTraits::CHANNELS; --i; --j; --frame; @@ -197,7 +202,7 @@ struct Banded3FrameSwipeTracebackMatrix } void walk_reverse_shift() { - score_ -= band_ * ScoreTraits<_sv>::CHANNELS; + score_ -= band_ * ScoreTraits::CHANNELS; --i; --j; ++frame; @@ -210,8 +215,8 @@ struct Banded3FrameSwipeTracebackMatrix pair walk_gap(int d0, int d1) { const int i0 = std::max(d0 + j, 0), j0 = std::max(i - d1, -1); - const Score *h = score_ - (band_ - 2) * ScoreTraits<_sv>::CHANNELS, *h0 = score_ - (j - j0) * (band_ - 2) * ScoreTraits<_sv>::CHANNELS; - const Score *v = score_ - 3 * ScoreTraits<_sv>::CHANNELS, *v0 = score_ - (i - i0 + 1) * 3 * ScoreTraits<_sv>::CHANNELS; + const Score *h = score_ - (band_ - 2) * ScoreTraits::CHANNELS, *h0 = score_ - (j - j0) * (band_ - 2) * ScoreTraits::CHANNELS; + const Score *v = score_ - 3 * ScoreTraits::CHANNELS, *v0 = score_ - (i - i0 + 1) * 3 * ScoreTraits::CHANNELS; const Score score = this->score(); const Score e = score_matrix.gap_extend(); Score g = score_matrix.gap_open() + e; @@ -225,8 +230,8 @@ struct Banded3FrameSwipeTracebackMatrix walk_vgap(v, l); return std::make_pair(op_insertion, l); } - h -= (band_ - 2) * ScoreTraits<_sv>::CHANNELS; - v -= 3 * ScoreTraits<_sv>::CHANNELS; + h -= (band_ - 2) * ScoreTraits::CHANNELS; + v -= 3 * ScoreTraits::CHANNELS; ++l; g += e; } @@ -235,7 +240,7 @@ struct Banded3FrameSwipeTracebackMatrix walk_vgap(v, l); return std::make_pair(op_insertion, l); } - v -= 3 * ScoreTraits<_sv>::CHANNELS; + v -= 3 * ScoreTraits::CHANNELS; ++l; g += e; } @@ -244,7 +249,7 @@ struct Banded3FrameSwipeTracebackMatrix walk_hgap(h, l); return std::make_pair(op_deletion, l); } - h -= (band_ - 2) * ScoreTraits<_sv>::CHANNELS; + h -= (band_ - 2) * ScoreTraits::CHANNELS; ++l; g += e; } @@ -272,7 +277,7 @@ struct Banded3FrameSwipeTracebackMatrix const int i_ = std::max(-i0, 0) * 3, i1 = (int)std::min(band_, size_t(dna_len - 2 - i0 * 3)); const Score *s = (Score*)(&score_[col*(band_ + 1) + i_]) + channel; - for (int i = i_; i < i1; ++i, s += ScoreTraits<_sv>::CHANNELS) + for (int i = i_; i < i1; ++i, s += ScoreTraits::CHANNELS) if (*s == score) return TracebackIterator(s, band_, i % 3, i0 + i / 3, j); throw std::runtime_error("Trackback error."); @@ -283,7 +288,7 @@ struct Banded3FrameSwipeTracebackMatrix { hgap_.resize(band + 3); score_.resize((band + 1) * (cols + 1)); - const _sv z = _sv(); + const Sv z = Sv(); std::fill(hgap_.begin(), hgap_.end(), z); std::fill(score_.begin(), score_.begin() + band + 1, z); for (size_t i = 0; i < cols; ++i) @@ -302,14 +307,20 @@ struct Banded3FrameSwipeTracebackMatrix private: const size_t band_; - static thread_local MemBuffer<_sv> hgap_; - MemBuffer<_sv> score_; +#ifdef USE_TLS + static thread_local MemBuffer hgap_; +#else + MemBuffer hgap_; +#endif + MemBuffer score_; }; -template thread_local MemBuffer<_sv> Banded3FrameSwipeMatrix<_sv>::hgap_; -template thread_local MemBuffer<_sv> Banded3FrameSwipeMatrix<_sv>::score_; -template thread_local MemBuffer<_sv> Banded3FrameSwipeTracebackMatrix<_sv>::hgap_; +#ifdef USE_TLS +template thread_local MemBuffer Banded3FrameSwipeMatrix::hgap_; +template thread_local MemBuffer Banded3FrameSwipeMatrix::score_; +template thread_local MemBuffer Banded3FrameSwipeTracebackMatrix::hgap_; +#endif template struct Banded3FrameSwipeMatrixRef @@ -561,7 +572,7 @@ list banded_3frame_swipe(const TranslatedSequence &query, Strand strand, ve { vector overflow16, overflow32; #ifdef __SSE2__ - task_timer timer("Banded 3frame swipe (sort)", parallel ? 3 : UINT_MAX); + TaskTimer timer("Banded 3frame swipe (sort)", parallel ? 3 : UINT_MAX); std::stable_sort(target_begin, target_end); list out; if (parallel) { @@ -607,4 +618,6 @@ list banded_3frame_swipe(const TranslatedSequence &query, Strand strand, ve #endif } -} \ No newline at end of file +} + +DISPATCH_7(std::list, banded_3frame_swipe, const TranslatedSequence&, query, Strand, strand, std::vector::iterator, target_begin, std::vector::iterator, target_end, DpStat&, stat, bool, score_only, bool, parallel) \ No newline at end of file diff --git a/src/dp/swipe/banded_matrix.h b/src/dp/swipe/banded_matrix.h index 3cf534f94..bd5e0eea6 100644 --- a/src/dp/swipe/banded_matrix.h +++ b/src/dp/swipe/banded_matrix.h @@ -129,7 +129,7 @@ struct Matrix Sv operator[](int i) const { return score_[i]; } -#ifdef __APPLE__ +#if defined(__APPLE__) || !defined(USE_TLS) MemBuffer hgap_, score_; #else static thread_local MemBuffer hgap_, score_; @@ -435,7 +435,7 @@ struct TracebackVectorMatrix return _sv(); } -#ifdef __APPLE__ +#if defined(__APPLE__) || !defined(USE_TLS) MemBuffer<_sv> hgap_, score_; #else static thread_local MemBuffer<_sv> hgap_, score_; @@ -445,7 +445,7 @@ struct TracebackVectorMatrix int band_; }; -#ifndef __APPLE__ +#if !defined(__APPLE__) && defined(USE_TLS) template thread_local MemBuffer Matrix::hgap_; template thread_local MemBuffer Matrix::score_; template thread_local MemBuffer TracebackVectorMatrix::hgap_; diff --git a/src/dp/swipe/banded_swipe.h b/src/dp/swipe/banded_swipe.h index a9d20bbc7..afdb70ff9 100644 --- a/src/dp/swipe/banded_swipe.h +++ b/src/dp/swipe/banded_swipe.h @@ -329,7 +329,7 @@ list swipe(const vector::const_iterator subject_begin, const vect } list out; - task_timer timer; + TaskTimer timer; for (int i = 0; i < targets.n_targets; ++i) { if (best[i] < ScoreTraits<_sv>::max_score() && !overflow_stats<_sv>(stats[i])) { int score = ScoreTraits<_sv>::int_score(best[i]); diff --git a/src/dp/swipe/full_matrix.h b/src/dp/swipe/full_matrix.h index 7abdf70d2..ce3091e16 100644 --- a/src/dp/swipe/full_matrix.h +++ b/src/dp/swipe/full_matrix.h @@ -85,14 +85,14 @@ struct Matrix return score_[i + 1]; } private: -#ifdef __APPLE__ +#if defined(__APPLE__) || !defined(USE_TLS) MemBuffer hgap_, score_; #else static thread_local MemBuffer hgap_, score_; #endif }; -#ifndef __APPLE__ +#if !defined(__APPLE__) && defined(USE_TLS) template thread_local MemBuffer Matrix::hgap_; template thread_local MemBuffer Matrix::score_; #endif @@ -240,7 +240,7 @@ struct TracebackVectorMatrix return Sv(); } -#ifdef __APPLE__ +#if defined(__APPLE__) || !defined(USE_TLS) MemBuffer hgap_, score_; #else static thread_local MemBuffer hgap_, score_; @@ -250,7 +250,7 @@ struct TracebackVectorMatrix int rows_, cols_; }; -#ifndef __APPLE__ +#if !defined(__APPLE__) && defined(USE_TLS) template thread_local MemBuffer TracebackVectorMatrix::hgap_; template thread_local MemBuffer TracebackVectorMatrix::score_; #endif diff --git a/src/dp/swipe/swipe_wrapper.cpp b/src/dp/swipe/swipe_wrapper.cpp index 306997e8d..3da0a00fa 100644 --- a/src/dp/swipe/swipe_wrapper.cpp +++ b/src/dp/swipe/swipe_wrapper.cpp @@ -37,6 +37,7 @@ along with this program. If not, see . #include "full_swipe.h" #include "banded_swipe.h" #include "../../util/geo/geo.h" +#include "../../util/simd/dispatch.h" using std::list; using std::atomic; @@ -251,7 +252,7 @@ static list swipe_threads(const It begin, const It end, vector &o atomic next(0); if (flag_any(p.flags, Flags::PARALLEL)) { - task_timer timer("Banded swipe (run)", config.target_parallel_verbosity); + TaskTimer timer("Banded swipe (run)", config.target_parallel_verbosity); const size_t n = config.threads_align ? config.threads_align : config.threads_; vector threads; vector> thread_out(n); @@ -313,7 +314,7 @@ static pair, vector> swipe_bin(const unsigned bin, const It if (!flag_any(p.flags, Flags::FULL_MATRIX)) sort(begin, end); p.stat.inc(Statistics::value(Statistics::EXT8 + bin), end - begin); - task_timer timer; + TaskTimer timer; switch (bin) { #ifdef __SSE4_1__ case 0: @@ -439,4 +440,10 @@ list swipe_set(const SequenceSet::ConstIterator begin, const SequenceSet::C return result.first; } -}}} +} + +DISPATCH_2(std::list, swipe, const Targets&, targets, Params&, params) +DISPATCH_3(std::list, swipe_set, const SequenceSet::ConstIterator, begin, const SequenceSet::ConstIterator, end, Params&, params) +DISPATCH_7(unsigned, bin, HspValues, v, int, query_len, int, score, int, ungapped_score, const int64_t, dp_size, unsigned, score_width, const Loc, mismatch_est) + +}} diff --git a/src/dp/ungapped_simd.cpp b/src/dp/ungapped_simd.cpp index b800970e7..bb3807704 100644 --- a/src/dp/ungapped_simd.cpp +++ b/src/dp/ungapped_simd.cpp @@ -23,6 +23,7 @@ along with this program. If not, see . #include "ungapped_simd.h" #include "../util/simd/transpose.h" #include "ungapped.h" +#include "../util/simd/dispatch.h" using namespace DISPATCH_ARCH; @@ -75,13 +76,18 @@ void window_ungapped_best(const Letter* query, const Letter** subjects, int subj } #endif #if ARCH_ID == 2 - else if (subject_count <= 16) - ::DP::ARCH_SSE4_1::window_ungapped(query, subjects, subject_count, window, out); + //else if (subject_count <= 16) + //::DP::ARCH_SSE4_1::window_ungapped(query, subjects, subject_count, window, out); else - ::DP::ARCH_AVX2::window_ungapped(query, subjects, subject_count, window, out); + window_ungapped(query, subjects, subject_count, window, out); #elif defined(__SSE4_1__) window_ungapped(query, subjects, subject_count, window, out); #endif } -}} \ No newline at end of file +} + +DISPATCH_5(void, window_ungapped, const Letter*, query, const Letter**, subjects, int, subject_count, int, window, int*, out); +DISPATCH_5(void, window_ungapped_best, const Letter*, query, const Letter**, subjects, int, subject_count, int, window, int*, out); + +} \ No newline at end of file diff --git a/src/dp/ungapped_simd.h b/src/dp/ungapped_simd.h index 31dd107a6..f9b4a1d99 100644 --- a/src/dp/ungapped_simd.h +++ b/src/dp/ungapped_simd.h @@ -17,12 +17,11 @@ along with this program. If not, see . ****/ #pragma once -#include "../util/simd.h" #include "../basic/value.h" namespace DP { -DECL_DISPATCH(void, window_ungapped, (const Letter* query, const Letter** subjects, int subject_count, int window, int* out)) -DECL_DISPATCH(void, window_ungapped_best, (const Letter* query, const Letter** subjects, int subject_count, int window, int* out)) +void window_ungapped(const Letter* query, const Letter** subjects, int subject_count, int window, int* out); +void window_ungapped_best(const Letter* query, const Letter** subjects, int subject_count, int window, int* out); } \ No newline at end of file diff --git a/src/lib/alp/sls_alp.hpp b/src/lib/alp/sls_alp.hpp index 7d3f6c474..ff158b934 100644 --- a/src/lib/alp/sls_alp.hpp +++ b/src/lib/alp/sls_alp.hpp @@ -37,7 +37,7 @@ Contents: Ascending ladder points simulation ******************************************************************************/ #include -#include +//#include #include #include #include diff --git a/src/lib/alp/sls_alp_data.cpp b/src/lib/alp/sls_alp_data.cpp index 8e5c4d62e..7f76be3d0 100644 --- a/src/lib/alp/sls_alp_data.cpp +++ b/src/lib/alp/sls_alp_data.cpp @@ -33,6 +33,7 @@ Contents: Input data for the ascending ladder points simulation ******************************************************************************/ +#include #include "sls_alp_data.hpp" using namespace Sls; diff --git a/src/lib/alp/sls_alp_data.hpp b/src/lib/alp/sls_alp_data.hpp index 2a70b5ada..e4855f508 100644 --- a/src/lib/alp/sls_alp_data.hpp +++ b/src/lib/alp/sls_alp_data.hpp @@ -38,7 +38,7 @@ Contents: Contains input data #include "sls_basic.hpp" #include -#include +// #include #include #include #include diff --git a/src/lib/alp/sls_alp_regression.hpp b/src/lib/alp/sls_alp_regression.hpp index cfe6742ac..778f514cf 100644 --- a/src/lib/alp/sls_alp_regression.hpp +++ b/src/lib/alp/sls_alp_regression.hpp @@ -38,7 +38,7 @@ Contents: Regression methods #include "sls_basic.hpp" #include -#include +// #include #include #include #include diff --git a/src/lib/alp/sls_alp_sim.hpp b/src/lib/alp/sls_alp_sim.hpp index 527fd7350..8cf483396 100644 --- a/src/lib/alp/sls_alp_sim.hpp +++ b/src/lib/alp/sls_alp_sim.hpp @@ -38,7 +38,7 @@ Contents: Simulation of Gumbel parameters #include -#include +// #include #include #include #include diff --git a/src/lib/alp/sls_pvalues.hpp b/src/lib/alp/sls_pvalues.hpp index c6e11a97a..53a0add3e 100644 --- a/src/lib/alp/sls_pvalues.hpp +++ b/src/lib/alp/sls_pvalues.hpp @@ -44,7 +44,7 @@ Contents: P-values calculation routines #include #include -#include +// #include namespace Sls { diff --git a/src/lib/interval_tree/interval_tree.hpp b/src/lib/interval_tree/interval_tree.hpp index 364141ba9..1ba0cc268 100644 --- a/src/lib/interval_tree/interval_tree.hpp +++ b/src/lib/interval_tree/interval_tree.hpp @@ -11,7 +11,7 @@ #include #include -#include +//#include namespace lib_interval_tree { diff --git a/src/lib/tantan/LambdaCalculator.cc b/src/lib/tantan/LambdaCalculator.cc index 5a6e06d9d..893b48e3b 100644 --- a/src/lib/tantan/LambdaCalculator.cc +++ b/src/lib/tantan/LambdaCalculator.cc @@ -7,7 +7,6 @@ #include #include #include -//#include using namespace std; static double roundToFewDigits(double x) diff --git a/src/masking/tantan.cpp b/src/masking/tantan.cpp index de7b1542e..014df4e27 100644 --- a/src/masking/tantan.cpp +++ b/src/masking/tantan.cpp @@ -28,6 +28,7 @@ A new repeat-masking method enables specific detection of homologous sequences, #include #include "../basic/value.h" #include "def.h" +#include "../util/simd/dispatch.h" using Eigen::Array; using Eigen::Dynamic; @@ -47,9 +48,15 @@ Mask::Ranges mask(Letter *seq, if (len == 0) return {}; +#ifdef USE_TLS thread_local std::array, AMINO_ACID_COUNT> e; thread_local Array pb; thread_local Array scale; +#else + std::array, AMINO_ACID_COUNT> e; + Array pb; + Array scale; +#endif Array f(0.0), d, t; const float b2b = 1.0f - p_repeat, f2f = 1.0f - p_repeat_end, b2f0 = p_repeat * (1.0f - repeat_growth) / (1.0f - pow(repeat_growth, (float)WINDOW)); float b = 1.0; @@ -121,4 +128,8 @@ Mask::Ranges mask(Letter *seq, return ranges; } -}}} \ No newline at end of file +} + +DISPATCH_8(Mask::Ranges, mask, Letter*, seq, int, len, const float**, likelihood_ratio_matrix, float, p_repeat, float, p_repeat_end, float, repeat_decay, float, p_mask, const Letter*, maskTable) + +}} \ No newline at end of file diff --git a/src/masking/tantan.h b/src/masking/tantan.h index adce0da3e..b3d47b76b 100644 --- a/src/masking/tantan.h +++ b/src/masking/tantan.h @@ -24,6 +24,6 @@ along with this program. If not, see . namespace Util { namespace tantan { -DECL_DISPATCH(Mask::Ranges, mask, (Letter *seq, int len, const float **likelihood_ratio_matrix, float p_repeat, float p_repeat_end, float repeat_decay, float p_mask, const Letter *maskTable)) +Mask::Ranges mask(Letter* seq, int len, const float** likelihood_ratio_matrix, float p_repeat, float p_repeat_end, float repeat_decay, float p_mask, const Letter* maskTable); }} diff --git a/src/output/daa/merge.cpp b/src/output/daa/merge.cpp index a3ad76139..ce4e3d75e 100644 --- a/src/output/daa/merge.cpp +++ b/src/output/daa/merge.cpp @@ -29,7 +29,7 @@ using std::endl; using std::vector; unordered_map build_mapping(unordered_map& acc2oid, StringSet& seq_ids, vector& seq_lens, DAA_file& f) { - task_timer timer(("Reading targets for file " + f.file().file_name).c_str()); + TaskTimer timer(("Reading targets for file " + f.file().file_name).c_str()); unordered_map r; for (uint32_t i = 0; i < f.db_seqs_used(); ++i) { auto it = acc2oid.emplace(f.ref_name(i), (uint32_t)acc2oid.size()); @@ -64,7 +64,7 @@ static int64_t write_file(DAA_file& f, OutputFile& out, const unordered_map files; const int n = (int)config.input_ref_file.size(); if (n == 0) diff --git a/src/output/daa/view.cpp b/src/output/daa/view.cpp index dd93ba4db..8c725c68b 100644 --- a/src/output/daa/view.cpp +++ b/src/output/daa/view.cpp @@ -36,8 +36,8 @@ along with this program. If not, see . using std::thread; using std::unique_ptr; -using std::endl; using std::vector; +using std::endl; const unsigned view_buf_size = 32; @@ -127,14 +127,14 @@ void view_worker(DAA_file *daa, View_writer *writer, TaskQueueinit_random_access(cfg.current_query_block, config.multiprocessing ? tmp_file_names.size() : tmp_file.size()); - task_timer timer("Joining output blocks"); + TaskTimer timer("Joining output blocks"); if (tmp_file_names.size() > 0) { JoinFetcher::init(tmp_file_names); diff --git a/src/output/output_format.h b/src/output/output_format.h index 6046a5679..c8d6e4892 100644 --- a/src/output/output_format.h +++ b/src/output/output_format.h @@ -136,8 +136,8 @@ struct PAF_format : public OutputFormat PAF_format(): OutputFormat(paf, HspValues::TRANSCRIPT, Output::Flags::NONE) {} - virtual void print_query_intro(Output::Info& info) const; - virtual void print_match(const HspContext& r, const Search::Config& metadata, TextBuffer &out); + virtual void print_query_intro(Output::Info& info) const override; + virtual void print_match(const HspContext& r, Output::Info& info) override; virtual ~PAF_format() { } virtual OutputFormat* clone() const diff --git a/src/output/paf_format.cpp b/src/output/paf_format.cpp index 857bfd58f..73e6289c0 100644 --- a/src/output/paf_format.cpp +++ b/src/output/paf_format.cpp @@ -27,17 +27,17 @@ void PAF_format::print_query_intro(Output::Info& info) const } } -void PAF_format::print_match(const HspContext& r, const Search::Config& cfg, TextBuffer &out) +void PAF_format::print_match(const HspContext& r, Output::Info& info) { - out.write_until(r.query_title.c_str(), Util::Seq::id_delimiters); - out << '\t' << r.query.source().length() << '\t' + info.out.write_until(r.query_title.c_str(), Util::Seq::id_delimiters); + info.out << '\t' << r.query.source().length() << '\t' << r.query_source_range().begin_ << '\t' << r.query_source_range().end_ - 1 << '\t' << (Frame(r.frame()).strand == FORWARD ? '+' : '-') << '\t'; - print_title(out, r.target_title.c_str(), false, false, "<>"); + print_title(info.out, r.target_title.c_str(), false, false, "<>"); - out << '\t' << r.subject_len << '\t' + info.out << '\t' << r.subject_len << '\t' << r.subject_range().begin_ << '\t' << r.subject_range().end_ - 1 << '\t' << r.identities() << '\t' @@ -46,6 +46,6 @@ void PAF_format::print_match(const HspContext& r, const Search::Config& cfg, Tex << "AS:i:" << (uint32_t)score_matrix.bitscore(r.score()) << '\t' << "ZR:i:" << r.score() << '\t' << "ZE:f:"; - out.print_e(r.evalue()); - out << '\n'; + info.out.print_e(r.evalue()); + info.out << '\n'; } \ No newline at end of file diff --git a/src/output/taxon_format.cpp b/src/output/taxon_format.cpp index 0a7b1a44a..fb98ec0d1 100644 --- a/src/output/taxon_format.cpp +++ b/src/output/taxon_format.cpp @@ -24,7 +24,6 @@ along with this program. If not, see . #include "../util/sequence/sequence.h" using std::set; -using std::endl; using std::vector; void Taxon_format::print_match(const HspContext &r, Output::Info &info) @@ -38,7 +37,7 @@ void Taxon_format::print_match(const HspContext &r, Output::Info &info) taxid = info.db->taxon_nodes().get_lca(taxid, *i); } catch (std::exception &) { - std::cerr << "Query=" << r.query_title << endl << "Subject=" << r.target_title << endl; + fprintf(stderr, "Query=%s Subject=%s\n", r.query_title.c_str(), r.target_title.c_str()); throw; } } diff --git a/src/run/double_indexed.cpp b/src/run/double_indexed.cpp index b4fe4e8b7..e3656909a 100644 --- a/src/run/double_indexed.cpp +++ b/src/run/double_indexed.cpp @@ -104,7 +104,7 @@ static void run_ref_chunk(SequenceFile &db_file, PtrVector &tmp_file, Config& cfg) { - task_timer timer; + TaskTimer timer; log_rss(); auto& query_seqs = cfg.query->seqs(); @@ -240,7 +240,7 @@ static void run_query_iteration(const unsigned query_iteration, PtrVector& tmp_file, Config& options) { - task_timer timer; + TaskTimer timer; auto P = Parallelizer::get(); auto& query_seqs = options.query->seqs(); auto& db_file = *options.db; @@ -423,7 +423,7 @@ static void run_query_chunk(Consumer &master_out, Config &options) { auto P = Parallelizer::get(); - task_timer timer; + TaskTimer timer; auto& db_file = *options.db; auto& query_seqs = options.query->seqs(); @@ -531,7 +531,7 @@ static void run_query_chunk(Consumer &master_out, options.query.reset(); } -static void master_thread(task_timer &total_timer, Config &options) +static void master_thread(TaskTimer &total_timer, Config &options) { log_rss(); SequenceFile* db_file = options.db.get(); @@ -582,7 +582,7 @@ static void master_thread(task_timer &total_timer, Config &options) db_file->create_partition_balanced((size_t)(config.chunk_size*1e9)); } - task_timer timer("Opening the input file", true); + TaskTimer timer("Opening the input file", true); if (!options.self) { if (config.query_file.empty() && !options.query_file) { std::cerr << "Query file parameter (--query/-q) is missing. Input will be read from stdin." << endl; @@ -600,7 +600,7 @@ static void master_thread(task_timer &total_timer, Config &options) if (config.command == ::Config::blastn) load_flags |= SequenceFile::LoadFlags::DNA_PRESERVATION; if (config.multiprocessing && config.mp_init) { - task_timer timer("Counting query blocks", true); + TaskTimer timer("Counting query blocks", true); size_t block_count = 0; do { @@ -727,7 +727,7 @@ static void master_thread(task_timer &total_timer, Config &options) void run(const shared_ptr& db, const shared_ptr& query, const shared_ptr& out, const shared_ptr& db_filter) { - task_timer total; + TaskTimer total; align_mode = AlignMode(AlignMode::from_command(config.command)); (align_mode.sequence_type == SequenceType::amino_acid) ? value_traits = amino_acid_traits : value_traits = nucleotide_traits; @@ -755,7 +755,7 @@ void run(const shared_ptr& db, const shared_ptr& que if (cfg.output_format->needs_taxon_ranks || taxon_culling) metadata_flags |= SequenceFile::Metadata::TAXON_RANKS; - task_timer timer; + TaskTimer timer; SequenceFile::Flags flags(SequenceFile::Flags::NEED_LETTER_COUNT); if (flag_any(cfg.output_format->flags, Output::Flags::ALL_SEQIDS)) flags |= SequenceFile::Flags::ALL_SEQIDS; diff --git a/src/run/main.cpp b/src/run/main.cpp index 2b825160d..67ae7e3ed 100644 --- a/src/run/main.cpp +++ b/src/run/main.cpp @@ -79,7 +79,7 @@ void cut(); #endif void split(); -namespace Benchmark { DECL_DISPATCH(void, benchmark, ()) } +namespace Benchmark { void benchmark(); } namespace Test { int run(); } namespace Cluster { diff --git a/src/search/search.h b/src/search/search.h index 9e96b6ade..2e84f52f6 100644 --- a/src/search/search.h +++ b/src/search/search.h @@ -36,6 +36,7 @@ along with this program. If not, see . #include "../util/data_structures/writer.h" #include "../data/flags.h" #include "kmer_ranking.h" +#include "../util/algo/join_result.h" // #define UNGAPPED_SPOUGE @@ -113,11 +114,7 @@ struct WorkSet { KmerRanking* kmer_ranking; }; -DECL_DISPATCH(void, stage1, (const SeedLoc* q, int32_t nq, const SeedLoc* s, int32_t ns, WorkSet& work_set)) -DECL_DISPATCH(void, stage1_query_lin, (const SeedLoc* q, int32_t nq, const SeedLoc* s, int32_t ns, WorkSet& work_set)) -DECL_DISPATCH(void, stage1_query_lin_ranked, (const SeedLoc* q, int32_t nq, const SeedLoc* s, int32_t ns, WorkSet& work_set)) -DECL_DISPATCH(void, stage1_target_lin, (const SeedLoc* q, int32_t nq, const SeedLoc* s, int32_t ns, WorkSet& work_set)) -DECL_DISPATCH(void, stage1_self, (const SeedLoc* q, int32_t nq, const SeedLoc* s, int32_t ns, WorkSet& work_set)) +void run_stage1(JoinIterator& it, Search::WorkSet* work_set, const Search::Config* cfg); } diff --git a/src/search/seed_complexity.cpp b/src/search/seed_complexity.cpp index 1517e8ca0..61a8c6963 100644 --- a/src/search/seed_complexity.cpp +++ b/src/search/seed_complexity.cpp @@ -76,7 +76,7 @@ void Search::mask_seeds(const Shape& shape, const SeedPartitionRange& range, Dou if (cfg.seed_encoding != SeedEncoding::SPACED_FACTOR) return; - task_timer timer("Masking low complexity seeds"); + TaskTimer timer("Masking low complexity seeds"); SequenceSet& query_seqs = cfg.query->seqs(); std::atomic_int seedp(range.begin()); std::atomic_size_t seed_count(0), masked_seed_count(0), query_count(0), target_count(0); diff --git a/src/search/stage0.cpp b/src/search/stage0.cpp index 6ca455b18..20dcd6d89 100644 --- a/src/search/stage0.cpp +++ b/src/search/stage0.cpp @@ -79,18 +79,10 @@ static void search_worker(atomic *seedp, const SeedPartitionRange *see unique_ptr work_set(new Search::WorkSet{ *context, *cfg, shape, {}, writer.get(), {}, {}, {}, context->kmer_ranking }); #endif int p; -#ifdef KEEP_TARGET_ID - auto f = config.lin_stage1 ? Search::stage1_query_lin_ranked - : (cfg->lin_stage1_target ? Search::stage1_target_lin - : (config.self && cfg->current_ref_block == 0 ? Search::stage1_self : Search::stage1)); -#else - auto f = config.lin_stage1 ? Search::stage1_query_lin - : (cfg->lin_stage1_target ? Search::stage1_target_lin - : (config.self && cfg->current_ref_block == 0 ? Search::stage1_self : Search::stage1)); -#endif - while ((p = (*seedp)++) < seedp_range->end()) - for (auto it = JoinIterator(query_seed_hits[p].begin(), ref_seed_hits[p].begin()); it; ++it) - f(it.r->begin(), (int32_t)it.r->size(), it.s->begin(), (int32_t)it.s->size(), *work_set); + while ((p = (*seedp)++) < seedp_range->end()) { + auto it = JoinIterator(query_seed_hits[p].begin(), ref_seed_hits[p].begin()); + run_stage1(it, work_set.get(), cfg); + } statistics += work_set->stats; } @@ -114,7 +106,7 @@ void search_shape(unsigned sid, int query_block, unsigned query_iteration, char const SeedPartitionRange range(p.begin(chunk), p.end(chunk)); current_range = range; - task_timer timer("Building reference seed array", true); + TaskTimer timer("Building reference seed array", true); SeedArray *ref_idx; const EnumCfg enum_ref{ &ref_hst.partition(), sid, sid + 1, cfg.seed_encoding, nullptr, false, false, cfg.seed_complexity_cut, query_seeds_bitset.get() || query_seeds_hashed.get() ? MaskingAlgo::NONE : cfg.soft_masking, diff --git a/src/search/stage1.h b/src/search/stage1.h new file mode 100644 index 000000000..7d7f31ea3 --- /dev/null +++ b/src/search/stage1.h @@ -0,0 +1,185 @@ +/**** +DIAMOND protein aligner +Copyright (C) 2019-2023 Max Planck Society for the Advancement of Science e.V. + +Code developed by Benjamin Buchfink + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +****/ + +#include "search.h" +#include "stage2.h" + +using std::vector; + +namespace Search { namespace DISPATCH_ARCH { + +using Container = vector>; + +static void all_vs_all(const FingerPrint* a, uint32_t na, const FingerPrint* b, uint32_t nb, FlatArray& out, unsigned hamming_filter_id) { + for (uint32_t i = 0; i < na; ++i) { + const FingerPrint e = a[i]; + out.next(); + for (uint32_t j = 0; j < nb; ++j) + if (e.match(b[j]) >= hamming_filter_id) + out.push_back(j); + } +} + +static void all_vs_all_self(const FingerPrint* a, uint32_t na, FlatArray& out, unsigned hamming_filter_id) { + for (uint32_t i = 0; i < na; ++i) { + const FingerPrint e = a[i]; + out.next(); + for (uint32_t j = i + 1; j < na; ++j) + if (e.match(a[j]) >= hamming_filter_id) + out.push_back(j); + } +} + +static void load_fps(const SeedLoc* p, size_t n, Container& v, const SequenceSet& seqs) +{ + v.clear(); + v.reserve(n); + const SeedLoc* end = p + n; + for (; p < end; ++p) { + v.emplace_back(seqs.data(*p)); + } +} + +static void FLATTEN stage1(const SeedLoc* q, int32_t nq, const SeedLoc* s, int32_t ns, WorkSet& work_set) +{ +#ifdef __APPLE__ + thread_local Container vq, vs; +#else + Container& vq = work_set.vq, &vs = work_set.vs; +#endif + + const int32_t tile_size = config.tile_size; + load_fps(s, ns, vs, work_set.cfg.target->seqs()); + work_set.stats.inc(Statistics::SEED_HITS, nq * ns); + load_fps(q, nq, vq, work_set.cfg.query->seqs()); + const int32_t qs = (int32_t)vq.size(), ss = (int32_t)vs.size(); + for (int32_t i = 0; i < qs; i += tile_size) { + for (int32_t j = 0; j < ss; j += tile_size) { + work_set.hits.clear(); + all_vs_all(vq.data() + i, std::min(tile_size, qs - i), vs.data() + j, std::min(tile_size, ss - j), work_set.hits, work_set.cfg.hamming_filter_id); + search_tile(work_set.hits, i, j, q, s, work_set); + } + } +} + +static void FLATTEN stage1_query_lin(const SeedLoc* q, int32_t nq, const SeedLoc* s, int32_t ns, WorkSet& work_set) +{ +#ifdef __APPLE__ + thread_local Container vq, vs; +#else + Container& vq = work_set.vq, &vs = work_set.vs; +#endif + + const int32_t tile_size = config.tile_size; + load_fps(q, 1, vq, work_set.cfg.query->seqs()); + load_fps(s, ns, vs, work_set.cfg.target->seqs()); + work_set.stats.inc(Statistics::SEED_HITS, ns); + + const int32_t ss = (int32_t)vs.size(); + for (int32_t j = 0; j < ss; j += tile_size) { + work_set.hits.clear(); + all_vs_all(vq.data(), 1, vs.data() + j, std::min(tile_size, ss - j), work_set.hits, work_set.cfg.hamming_filter_id); + search_tile(work_set.hits, 0, j, q, s, work_set); + } +} + +static void FLATTEN stage1_query_lin_ranked(const SeedLoc* q, int32_t nq, const SeedLoc* s, int32_t ns, WorkSet& work_set) +{ +#ifdef __APPLE__ + thread_local Container vq, vs; +#else + Container& vq = work_set.vq, &vs = work_set.vs; +#endif + + const int32_t tile_size = config.tile_size; + const int32_t ranking = work_set.kmer_ranking->highest_ranking(q, q + nq); + load_fps(q + ranking, 1, vq, work_set.cfg.query->seqs()); + load_fps(s, ns, vs, work_set.cfg.target->seqs()); + work_set.stats.inc(Statistics::SEED_HITS, ns); + + const int32_t ss = (int32_t)vs.size(); + for (int32_t j = 0; j < ss; j += tile_size) { + work_set.hits.clear(); + all_vs_all(vq.data(), 1, vs.data() + j, std::min(tile_size, ss - j), work_set.hits, work_set.cfg.hamming_filter_id); + search_tile(work_set.hits, ranking, j, q, s, work_set); + } +} + +static void FLATTEN stage1_target_lin(const SeedLoc* q, int32_t nq, const SeedLoc* s, int32_t ns, WorkSet& work_set) +{ +#ifdef __APPLE__ + thread_local Container vq, vs; +#else + Container& vq = work_set.vq, & vs = work_set.vs; +#endif + + const int32_t tile_size = config.tile_size; + load_fps(q, nq, vq, work_set.cfg.query->seqs()); + load_fps(s, 1, vs, work_set.cfg.target->seqs()); + work_set.stats.inc(Statistics::SEED_HITS, nq); + + const int32_t qs = (int32_t)vq.size(); + for (int32_t j = 0; j < qs; j += tile_size) { + work_set.hits.clear(); + all_vs_all(vq.data() + j, std::min(tile_size, qs - j), vs.data(), 1, work_set.hits, work_set.cfg.hamming_filter_id); + search_tile(work_set.hits, j, 0, q, s, work_set); + } +} + +static void FLATTEN stage1_self(const SeedLoc* q, int32_t nq, const SeedLoc* s, int32_t ns, WorkSet& work_set) +{ +#ifdef __APPLE__ + thread_local Container vs; +#else + Container& vs = work_set.vs; +#endif + + const int32_t tile_size = config.tile_size; + load_fps(s, ns, vs, work_set.cfg.target->seqs()); + + work_set.stats.inc(Statistics::SEED_HITS, ns*(ns - 1) / 2); + const int32_t ss = (int32_t)vs.size(); + for (int32_t i = 0; i < ss; i += tile_size) { + work_set.hits.clear(); + all_vs_all_self(vs.data() + i, std::min(tile_size, ss - i), work_set.hits, work_set.cfg.hamming_filter_id); + search_tile(work_set.hits, i, i, s, s, work_set); + for (int32_t j = i + tile_size; j < ss; j += tile_size) { + work_set.hits.clear(); + all_vs_all(vs.data() + i, std::min(tile_size, ss - i), vs.data() + j, std::min(tile_size, ss - j), work_set.hits, work_set.cfg.hamming_filter_id); + search_tile(work_set.hits, i, j, s, s, work_set); + } + } +} + +void run_stage1(JoinIterator& it, Search::WorkSet* work_set, const Search::Config* cfg) { +#ifdef KEEP_TARGET_ID + auto f = config.lin_stage1 ? stage1_query_lin_ranked + : (cfg->lin_stage1_target ? stage1_target_lin + : (config.self && cfg->current_ref_block == 0 ? stage1_self : stage1)); +#else + auto f = config.lin_stage1 ? stage1_query_lin + : (cfg->lin_stage1_target ? stage1_target_lin + : (config.self && cfg->current_ref_block == 0 ? stage1_self : stage1)); +#endif + for (; it; ++it) + f(it.r->begin(), (int32_t)it.r->size(), it.s->begin(), (int32_t)it.s->size(), *work_set); +} + +}} \ No newline at end of file diff --git a/src/search/stage1_2.cpp b/src/search/stage1_2.cpp new file mode 100644 index 000000000..4a53fbcb3 --- /dev/null +++ b/src/search/stage1_2.cpp @@ -0,0 +1,28 @@ +/**** +DIAMOND protein aligner +Copyright (C) 2019-2023 Max Planck Society for the Advancement of Science e.V. + +Code developed by Benjamin Buchfink + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +****/ + +#include "stage1.h" +#include "../util/simd/dispatch.h" + +namespace Search { + +DISPATCH_3V(run_stage1, JoinIterator&, it, Search::WorkSet*, work_set, const Search::Config*, cfg) + +} \ No newline at end of file diff --git a/src/search/stage2.cpp b/src/search/stage2.h similarity index 54% rename from src/search/stage2.cpp rename to src/search/stage2.h index 6115ce897..a0eab8082 100644 --- a/src/search/stage2.cpp +++ b/src/search/stage2.h @@ -1,8 +1,8 @@ /**** DIAMOND protein aligner -Copyright (C) 2020-2022 Max Planck Society for the Advancement of Science e.V. +Copyright (C) 2019-2023 Max Planck Society for the Advancement of Science e.V. -Code developed by Benjamin Buchfink +Code developed by Benjamin Buchfink This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -165,147 +165,4 @@ static void FLATTEN search_tile( } } -typedef vector> Container; - -static void all_vs_all(const FingerPrint* a, uint32_t na, const FingerPrint* b, uint32_t nb, FlatArray& out, unsigned hamming_filter_id) { - for (uint32_t i = 0; i < na; ++i) { - const FingerPrint e = a[i]; - out.next(); - for (uint32_t j = 0; j < nb; ++j) - if (e.match(b[j]) >= hamming_filter_id) - out.push_back(j); - } -} - -static void all_vs_all_self(const FingerPrint* a, uint32_t na, FlatArray& out, unsigned hamming_filter_id) { - for (uint32_t i = 0; i < na; ++i) { - const FingerPrint e = a[i]; - out.next(); - for (uint32_t j = i + 1; j < na; ++j) - if (e.match(a[j]) >= hamming_filter_id) - out.push_back(j); - } -} - -static void load_fps(const SeedLoc* p, size_t n, Container& v, const SequenceSet& seqs) -{ - v.clear(); - v.reserve(n); - const SeedLoc* end = p + n; - for (; p < end; ++p) { - v.emplace_back(seqs.data(*p)); - } -} - -void FLATTEN stage1(const SeedLoc* q, int32_t nq, const SeedLoc* s, int32_t ns, WorkSet& work_set) -{ -#ifdef __APPLE__ - thread_local Container vq, vs; -#else - Container& vq = work_set.vq, &vs = work_set.vs; -#endif - - const int32_t tile_size = config.tile_size; - load_fps(s, ns, vs, work_set.cfg.target->seqs()); - work_set.stats.inc(Statistics::SEED_HITS, nq * ns); - load_fps(q, nq, vq, work_set.cfg.query->seqs()); - const int32_t qs = (int32_t)vq.size(), ss = (int32_t)vs.size(); - for (int32_t i = 0; i < qs; i += tile_size) { - for (int32_t j = 0; j < ss; j += tile_size) { - work_set.hits.clear(); - all_vs_all(vq.data() + i, std::min(tile_size, qs - i), vs.data() + j, std::min(tile_size, ss - j), work_set.hits, work_set.cfg.hamming_filter_id); - search_tile(work_set.hits, i, j, q, s, work_set); - } - } -} - -void FLATTEN stage1_query_lin(const SeedLoc* q, int32_t nq, const SeedLoc* s, int32_t ns, WorkSet& work_set) -{ -#ifdef __APPLE__ - thread_local Container vq, vs; -#else - Container& vq = work_set.vq, &vs = work_set.vs; -#endif - - const int32_t tile_size = config.tile_size; - load_fps(q, 1, vq, work_set.cfg.query->seqs()); - load_fps(s, ns, vs, work_set.cfg.target->seqs()); - work_set.stats.inc(Statistics::SEED_HITS, ns); - - const int32_t ss = (int32_t)vs.size(); - for (int32_t j = 0; j < ss; j += tile_size) { - work_set.hits.clear(); - all_vs_all(vq.data(), 1, vs.data() + j, std::min(tile_size, ss - j), work_set.hits, work_set.cfg.hamming_filter_id); - search_tile(work_set.hits, 0, j, q, s, work_set); - } -} - -void FLATTEN stage1_query_lin_ranked(const SeedLoc* q, int32_t nq, const SeedLoc* s, int32_t ns, WorkSet& work_set) -{ -#ifdef __APPLE__ - thread_local Container vq, vs; -#else - Container& vq = work_set.vq, &vs = work_set.vs; -#endif - - const int32_t tile_size = config.tile_size; - const int32_t ranking = work_set.kmer_ranking->highest_ranking(q, q + nq); - load_fps(q + ranking, 1, vq, work_set.cfg.query->seqs()); - load_fps(s, ns, vs, work_set.cfg.target->seqs()); - work_set.stats.inc(Statistics::SEED_HITS, ns); - - const int32_t ss = (int32_t)vs.size(); - for (int32_t j = 0; j < ss; j += tile_size) { - work_set.hits.clear(); - all_vs_all(vq.data(), 1, vs.data() + j, std::min(tile_size, ss - j), work_set.hits, work_set.cfg.hamming_filter_id); - search_tile(work_set.hits, ranking, j, q, s, work_set); - } -} - -void FLATTEN stage1_target_lin(const SeedLoc* q, int32_t nq, const SeedLoc* s, int32_t ns, WorkSet& work_set) -{ -#ifdef __APPLE__ - thread_local Container vq, vs; -#else - Container& vq = work_set.vq, & vs = work_set.vs; -#endif - - const int32_t tile_size = config.tile_size; - load_fps(q, nq, vq, work_set.cfg.query->seqs()); - load_fps(s, 1, vs, work_set.cfg.target->seqs()); - work_set.stats.inc(Statistics::SEED_HITS, nq); - - const int32_t qs = (int32_t)vq.size(); - for (int32_t j = 0; j < qs; j += tile_size) { - work_set.hits.clear(); - all_vs_all(vq.data() + j, std::min(tile_size, qs - j), vs.data(), 1, work_set.hits, work_set.cfg.hamming_filter_id); - search_tile(work_set.hits, j, 0, q, s, work_set); - } -} - -void FLATTEN stage1_self(const SeedLoc* q, int32_t nq, const SeedLoc* s, int32_t ns, WorkSet& work_set) -{ -#ifdef __APPLE__ - thread_local Container vs; -#else - Container& vs = work_set.vs; -#endif - - const int32_t tile_size = config.tile_size; - load_fps(s, ns, vs, work_set.cfg.target->seqs()); - - work_set.stats.inc(Statistics::SEED_HITS, ns*(ns - 1) / 2); - const int32_t ss = (int32_t)vs.size(); - for (int32_t i = 0; i < ss; i += tile_size) { - work_set.hits.clear(); - all_vs_all_self(vs.data() + i, std::min(tile_size, ss - i), work_set.hits, work_set.cfg.hamming_filter_id); - search_tile(work_set.hits, i, i, s, s, work_set); - for (int32_t j = i + tile_size; j < ss; j += tile_size) { - work_set.hits.clear(); - all_vs_all(vs.data() + i, std::min(tile_size, ss - i), vs.data() + j, std::min(tile_size, ss - j), work_set.hits, work_set.cfg.hamming_filter_id); - search_tile(work_set.hits, i, j, s, s, work_set); - } - } -} - }} \ No newline at end of file diff --git a/src/stats/matrix_adjust_eigen.cpp b/src/stats/matrix_adjust_eigen.cpp index 0a1e943e9..385353312 100644 --- a/src/stats/matrix_adjust_eigen.cpp +++ b/src/stats/matrix_adjust_eigen.cpp @@ -18,7 +18,6 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . ****/ -#include #include #include "../lib/Eigen/Dense" #include "../basic/value.h" @@ -27,7 +26,6 @@ along with this program. If not, see . // #define DYNAMIC using namespace Eigen; -using std::cout; using std::endl; //#define DEBUG_OUT(x) cout << (x) << endl diff --git a/src/stats/score_matrix.cpp b/src/stats/score_matrix.cpp index 2902635af..545d04c99 100644 --- a/src/stats/score_matrix.cpp +++ b/src/stats/score_matrix.cpp @@ -66,8 +66,7 @@ ScoreMatrix::ScoreMatrix(const string & matrix, int gap_open, int gap_extend, in matrix8u_low_(standard_matrix_->scores.data(), stop_match_score, bias_, 16), matrix8u_high_(standard_matrix_->scores.data(), stop_match_score, bias_, 16, 16), matrix16_(standard_matrix_->scores.data(), stop_match_score) -{ - +{ evaluer.initParameters(alp_params(standard_matrix_, gap_open_, gap_extend_, mmseqs_compat)); ln_k_ = std::log(evaluer.parameters().K); init_background_scores(); diff --git a/src/test/test.cpp b/src/test/test.cpp index 517dd2d13..9113d1dd6 100644 --- a/src/test/test.cpp +++ b/src/test/test.cpp @@ -100,7 +100,7 @@ static void load_seqs(SequenceFile& file) { int run() { const bool bootstrap = config.bootstrap, log = config.debug_log, to_cout = config.output_file == "stdout"; - task_timer timer("Generating test dataset"); + TaskTimer timer("Generating test dataset"); FastaFile proteins("test1", true, FastaFile::WriteAccess()); shared_ptr query_file(new FastaFile("test2", true, FastaFile::WriteAccess())), db(new FastaFile("test3", true, FastaFile::WriteAccess())); diff --git a/src/test/test_cases.cpp b/src/test/test_cases.cpp index a394a995f..7b0182e2e 100644 --- a/src/test/test_cases.cpp +++ b/src/test/test_cases.cpp @@ -65,9 +65,9 @@ const vector ref_hashes = { 0x201a627d0d128fd5, 0xe787dcb23cc5b120, 0x5aa4baf48a888be9, -0x21f14583e88a13ac, -0xea4b4492a522c624, -0x713deb9a5ae4b9e, +0xa2519e06e3bfa2fd, +0x742af8fa3e7da75c, +0x67b3a14cdd541dc3, }; } \ No newline at end of file diff --git a/src/tools/benchmark.cpp b/src/tools/benchmark.cpp index 4620ca951..f1ae23176 100644 --- a/src/tools/benchmark.cpp +++ b/src/tools/benchmark.cpp @@ -44,16 +44,16 @@ along with this program. If not, see . #include "../dp/pfscan/simd.h" #include "../dp/swipe/anchored.h" #include "../dp/swipe/config.h" +#include "../util/simd/dispatch.h" void benchmark_io(); using std::vector; +using std::endl; using std::chrono::high_resolution_clock; using std::chrono::nanoseconds; using std::chrono::duration_cast; using std::list; -using std::cout; -using std::endl; using std::array; using namespace DISPATCH_ARCH; @@ -73,7 +73,7 @@ void benchmark_hamming(const Sequence& s1, const Sequence& s2) { f1.r1 = _mm_xor_si128(f1.r1, f1.r2); volatile unsigned y = f1.match(f2); } - cout << "SSE hamming distance:\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * 48) * 1000 << " ps/Cell" << endl; + message_stream << "SSE hamming distance:\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * 48) * 1000 << " ps/Cell" << endl; } #endif @@ -93,7 +93,7 @@ void benchmark_ungapped(const Sequence& s1, const Sequence& s2) high_resolution_clock::time_point t2 = high_resolution_clock::now(); std::chrono::nanoseconds time_span = duration_cast(t2 - t1); - cout << "Scalar ungapped extension:\t" << (double)time_span.count() / (n*64) * 1000 << " ps/Cell" << endl; + message_stream << "Scalar ungapped extension:\t" << (double)time_span.count() / (n*64) * 1000 << " ps/Cell" << endl; } #if defined(__SSSE3__) && defined(__SSE4_1__) @@ -112,7 +112,7 @@ void benchmark_ssse3_shuffle(const Sequence&s1, const Sequence&s2) sv = ScoreVector(i & 15, seq); volatile auto x = sv.data_; } - cout << "SSSE3 score shuffle:\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * CHANNELS) * 1000 << " ps/Letter" << endl; + message_stream << "SSSE3 score shuffle:\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * CHANNELS) * 1000 << " ps/Letter" << endl; } #endif @@ -122,28 +122,28 @@ void benchmark_ungapped_sse(const Sequence&s1, const Sequence&s2) { high_resolution_clock::time_point t1 = high_resolution_clock::now(); const Letter* targets[16]; - int out[16]; + //int out[16]; for (int i = 0; i < 16; ++i) targets[i] = s2.data(); for (size_t i = 0; i < n; ++i) { - ::DP::ARCH_SSE4_1::window_ungapped(s1.data(), targets, 16, 64, out); + //::DP::ARCH_SSE4_1::window_ungapped(s1.data(), targets, 16, 64, out); } - cout << "SSE ungapped extend:\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * 16 * 64) * 1000 << " ps/Cell" << endl; + message_stream << "SSE ungapped extend:\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * 16 * 64) * 1000 << " ps/Cell" << endl; #ifdef __AVX2__ { high_resolution_clock::time_point t1 = high_resolution_clock::now(); const Letter* targets[32]; - int out[32]; + // int out[32]; for (int i = 0; i < 32; ++i) targets[i] = s2.data(); for (size_t i = 0; i < n; ++i) { - ::DP::ARCH_AVX2::window_ungapped(s1.data(), targets, 32, 64, out); + //::DP::ARCH_AVX2::window_ungapped(s1.data(), targets, 32, 64, out); } - cout << "AVX2 ungapped extend:\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * 32 * 64) * 1000 << " ps/Cell" << endl; + message_stream << "AVX2 ungapped extend:\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * 32 * 64) * 1000 << " ps/Cell" << endl; } #endif } @@ -162,7 +162,7 @@ void benchmark_transpose() { transpose((const signed char**)v, 16, out, __m128i()); in[0] = out[0]; } - cout << "Matrix transpose 16x16 bytes:\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * 256) * 1000 << " ps/Letter" << endl; + message_stream << "Matrix transpose 16x16 bytes:\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * 256) * 1000 << " ps/Letter" << endl; #if ARCH_ID == 2 { @@ -176,7 +176,7 @@ void benchmark_transpose() { transpose((const signed char**)v, 32, out, __m256i()); in[0] = out[0]; } - cout << "Matrix transpose 32x32 bytes:\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * 32 * 32) * 1000 << " ps/Letter" << endl; + message_stream << "Matrix transpose 32x32 bytes:\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * 32 * 32) * 1000 << " ps/Letter" << endl; } #endif } @@ -213,7 +213,7 @@ void mt_swipe(const Sequence& s1, const Sequence& s2) { th.emplace_back(f); for (auto& t : th) t.join(); - cout << "MT_SWIPE (int8_t):\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / dp_size * 1000 << " ps/Cell" << endl; + message_stream << "MT_SWIPE (int8_t):\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / dp_size * 1000 << " ps/Cell" << endl; } void swipe(const Sequence&s1, const Sequence&s2) { @@ -237,7 +237,7 @@ void swipe(const Sequence&s1, const Sequence&s2) { for (size_t i = 0; i < n; ++i) { volatile list v = ::DP::BandedSwipe::swipe(targets, params); } - cout << "SWIPE (int8_t):\t\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / dp_size * 1000 << " ps/Cell" << endl; + message_stream << "SWIPE (int8_t):\t\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / dp_size * 1000 << " ps/Cell" << endl; t1 = high_resolution_clock::now(); targets[1] = targets[0]; @@ -245,13 +245,13 @@ void swipe(const Sequence&s1, const Sequence&s2) { for (size_t i = 0; i < n; ++i) { volatile list v = ::DP::BandedSwipe::swipe(targets, params); } - cout << "SWIPE (int16_t):\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / dp_size * 1000 << " ps/Cell" << endl; + message_stream << "SWIPE (int16_t):\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / dp_size * 1000 << " ps/Cell" << endl; t1 = high_resolution_clock::now(); for (size_t i = 0; i < n; ++i) { volatile list v = ::DP::BandedSwipe::swipe(targets, params); } - cout << "SWIPE (int8_t, Stats):\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / dp_size * 1000 << " ps/Cell" << endl; + message_stream << "SWIPE (int8_t, Stats):\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / dp_size * 1000 << " ps/Cell" << endl; t1 = high_resolution_clock::now(); for (size_t i = 0; i < 32; ++i) @@ -259,19 +259,19 @@ void swipe(const Sequence&s1, const Sequence&s2) { for (size_t i = 0; i < n; ++i) { volatile list v = ::DP::BandedSwipe::swipe(targets, params); } - cout << "SWIPE (int8_t, MatrixAdjust):\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / dp_size * 1000 << " ps/Cell" << endl; + message_stream << "SWIPE (int8_t, MatrixAdjust):\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / dp_size * 1000 << " ps/Cell" << endl; t1 = high_resolution_clock::now(); for (size_t i = 0; i < n; ++i) { volatile list v = ::DP::BandedSwipe::swipe(targets, params); } - cout << "SWIPE (int8_t, CBS):\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / dp_size * 1000 << " ps/Cell" << endl; + message_stream << "SWIPE (int8_t, CBS):\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / dp_size * 1000 << " ps/Cell" << endl; t1 = high_resolution_clock::now(); for (size_t i = 0; i < n; ++i) { volatile list v = ::DP::BandedSwipe::swipe(targets, params); } - cout << "SWIPE (int8_t, TB):\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / dp_size * 1000 << " ps/Cell" << endl; + message_stream << "SWIPE (int8_t, TB):\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / dp_size * 1000 << " ps/Cell" << endl; } #endif @@ -291,20 +291,20 @@ void banded_swipe(const Sequence &s1, const Sequence &s2) { for (size_t i = 0; i < n; ++i) { volatile auto out = ::DP::BandedSwipe::swipe(targets, params); } - cout << "Banded SWIPE (int16_t, CBS):\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * s1.length() * 65 * 16) * 1000 << " ps/Cell" << endl; + message_stream << "Banded SWIPE (int16_t, CBS):\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * s1.length() * 65 * 16) * 1000 << " ps/Cell" << endl; t1 = high_resolution_clock::now(); for (size_t i = 0; i < n; ++i) { volatile auto out = ::DP::BandedSwipe::swipe(targets, params); } - cout << "Banded SWIPE (int16_t):\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * s1.length() * 65 * 16) * 1000 << " ps/Cell" << endl; + message_stream << "Banded SWIPE (int16_t):\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * s1.length() * 65 * 16) * 1000 << " ps/Cell" << endl; params.v = HspValues::TRANSCRIPT; t1 = high_resolution_clock::now(); for (size_t i = 0; i < n; ++i) { volatile auto out = ::DP::BandedSwipe::swipe(targets, params); } - cout << "Banded SWIPE (int16_t, CBS, TB):" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * s1.length() * 65 * 16) * 1000 << " ps/Cell" << endl; + message_stream << "Banded SWIPE (int16_t, CBS, TB):" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * s1.length() * 65 * 16) * 1000 << " ps/Cell" << endl; } #if ARCH_ID == 2 @@ -335,7 +335,7 @@ void anchored_swipe(const Sequence& s1, const Sequence& s2) { DP::AnchoredSwipe::ARCH_AVX2::smith_waterman>(targets.data(), 32, options); volatile auto x = targets[0].score; } - cout << "Anchored Swipe (int8_t):\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * cols * 64 * 32) * 1000 << " ps/Cell" << endl; + message_stream << "Anchored Swipe (int8_t):\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * cols * 64 * 32) * 1000 << " ps/Cell" << endl; vector> targets16; for (int i = 0; i < 16; ++i) { @@ -347,7 +347,7 @@ void anchored_swipe(const Sequence& s1, const Sequence& s2) { DP::AnchoredSwipe::ARCH_AVX2::smith_waterman>(targets16.data(), 16, options); volatile auto x = targets[0].score; } - cout << "Anchored Swipe (int16_t):\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * cols * 64 * 16) * 1000 << " ps/Cell" << endl; + message_stream << "Anchored Swipe (int16_t):\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * cols * 64 * 16) * 1000 << " ps/Cell" << endl; DP::Targets dp_targets; Anchor a(DiagonalSegment(0, 0, 0, 0), 0, 0, 0, 0, 0); @@ -360,7 +360,7 @@ void anchored_swipe(const Sequence& s1, const Sequence& s2) { DP::BandedSwipe::anchored_swipe(dp_targets, cfg); volatile auto x = targets[0].score; } - cout << "Anchored Swipe2 (int16_t):\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * 128 * 64 * 16) * 1000 << " ps/Cell" << endl; + message_stream << "Anchored Swipe2 (int16_t):\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * 128 * 64 * 16) * 1000 << " ps/Cell" << endl; } //#endif @@ -382,9 +382,9 @@ void prefix_scan(const Sequence& s1, const Sequence& s2) { high_resolution_clock::time_point t1 = high_resolution_clock::now(); for (size_t i = 0; i < n; ++i) { - volatile auto out = DP::PrefixScan::align16(cfg); + //volatile auto out = DP::PrefixScan::align16(cfg); } - cout << "Prefix Scan (int16_t):\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (s2_.length() * 64 * n) * 1000 << " ps/Cell" << endl; + message_stream << "Prefix Scan (int16_t):\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (s2_.length() * 64 * n) * 1000 << " ps/Cell" << endl; //statistics += stat; //statistics.print(); //statistics.reset(); @@ -393,9 +393,9 @@ void prefix_scan(const Sequence& s1, const Sequence& s2) { stat.reset(); t1 = high_resolution_clock::now(); for (size_t i = 0; i < n; ++i) { - volatile auto out = DP::PrefixScan::align8(cfg); + //volatile auto out = DP::PrefixScan::align8(cfg); } - cout << "Prefix Scan (int8_t):\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (s2_.length() * 64 * n) * 1000 << " ps/Cell" << endl; + message_stream << "Prefix Scan (int8_t):\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (s2_.length() * 64 * n) * 1000 << " ps/Cell" << endl; statistics += stat; //statistics.print(); } @@ -413,7 +413,7 @@ void diag_scores(const Sequence& s1, const Sequence& s2) { DP::scan_diags128(p, s2, -32, 0, (int)s2.length(), scores); volatile int x = scores[i & 128]; } - cout << "Diagonal scores:\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * s2.length() * 128) * 1000 << " ps/Cell" << endl; + message_stream << "Diagonal scores:\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * s2.length() * 128) * 1000 << " ps/Cell" << endl; } #endif @@ -424,13 +424,13 @@ void evalue() { for (size_t i = 0; i < n; ++i) { x += score_matrix.evalue_norm((int)i, 300); } - cout << "Evalue:\t\t\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n) << " ns" << endl; + message_stream << "Evalue:\t\t\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n) << " ns" << endl; t1 = high_resolution_clock::now(); for (size_t i = 0; i < n; ++i) { x += score_matrix.evalue(300, 300, 300); } - cout << "Evalue (ALP):\t\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n) << " ns" << endl; + message_stream << "Evalue (ALP):\t\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n) << " ns" << endl; } void matrix_adjust(const Sequence& s1, const Sequence& s2) { @@ -454,14 +454,14 @@ void matrix_adjust(const Sequence& s1, const Sequence& s2) { config.cbs_it_limit); } - cout << "Matrix adjust:\t\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n) << " ms" << endl; + message_stream << "Matrix adjust:\t\t\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n) << " ms" << endl; t1 = high_resolution_clock::now(); for (size_t i = 0; i < n; ++i) { Stats::OptimizeTargetFrequencies(mat_final.data(), joint_probs, row_probs.data(), col_probs.data(), 0.44, config.cbs_err_tolerance, config.cbs_it_limit); } - cout << "Matrix adjust (vectorized):\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n) << " micros" << endl; + message_stream << "Matrix adjust (vectorized):\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n) << " micros" << endl; //Profiler::print(n); } @@ -523,4 +523,8 @@ void benchmark() { #endif } -}} +} + +DISPATCH_0V(benchmark) + +} diff --git a/src/tools/benchmark_io.cpp b/src/tools/benchmark_io.cpp index 70a1fa644..c1e4570f9 100644 --- a/src/tools/benchmark_io.cpp +++ b/src/tools/benchmark_io.cpp @@ -43,7 +43,7 @@ static void seed_hit_files() { const string file_name = "diamond_io_benchmark.tmp"; const size_t total_count = 1000000000, query_count = 50; - task_timer timer; + TaskTimer timer; if (!exists(file_name)) { timer.go("Writing output file"); @@ -96,7 +96,7 @@ static void seed_hit_files() { static void load_seqs() { if (config.chunk_size == 0.0) config.chunk_size = 2.0; - task_timer timer; + TaskTimer timer; timer.go("Opening the database"); SequenceFile* db = SequenceFile::auto_create({ config.database }, SequenceFile::Flags::NONE); timer.finish(); @@ -122,7 +122,7 @@ static void load_raw() { const size_t N = 2 * GIGABYTES; InputFile f(config.database); vector buf(N); - task_timer timer; + TaskTimer timer; size_t n; do { timer.go("Loading data"); @@ -135,7 +135,7 @@ static void load_raw() { static void load_mmap() { static const size_t BUF = 2 * GIGABYTES; - task_timer timer("Opening the database"); + TaskTimer timer("Opening the database"); SequenceFile* db = SequenceFile::auto_create({ config.database }, SequenceFile::Flags::NONE); timer.finish(); message_stream << "Type: " << to_string(db->type()) << endl; @@ -155,7 +155,7 @@ static void load_mmap() { } static void load_mmap_mt() { - task_timer timer("Opening the database"); + TaskTimer timer("Opening the database"); SequenceFile* db = SequenceFile::auto_create({ config.database }, SequenceFile::Flags::NONE); timer.finish(); message_stream << "Type: " << to_string(db->type()) << endl; @@ -180,7 +180,7 @@ static void load_mmap_mt() { #ifdef WITH_BLASTDB void load_blast_seqid() { const size_t N = 100000; - task_timer timer("Opening the database"); + TaskTimer timer("Opening the database"); SequenceFile* db = SequenceFile::auto_create({ config.database }, SequenceFile::Flags::NONE); timer.finish(); message_stream << "Type: " << to_string(db->type()) << endl; @@ -199,7 +199,7 @@ void load_blast_seqid() { } void load_blast_seqid_lin() { - task_timer timer("Opening the database"); + TaskTimer timer("Opening the database"); SequenceFile* db = SequenceFile::auto_create({ config.database }, SequenceFile::Flags::NONE); timer.finish(); message_stream << "Type: " << to_string(db->type()) << endl; @@ -223,7 +223,7 @@ static void sort() { typedef Deque Container; const size_t SIZE = 1 * GIGABYTES; const size_t N = SIZE / sizeof(T); - task_timer timer("Generating data"); + TaskTimer timer("Generating data"); Container v; v.reserve(N); std::default_random_engine generator; diff --git a/src/tools/benchmark_swipe.cpp b/src/tools/benchmark_swipe.cpp index 5d81006dd..8698d700e 100644 --- a/src/tools/benchmark_swipe.cpp +++ b/src/tools/benchmark_swipe.cpp @@ -8,7 +8,6 @@ using std::vector; using std::chrono::high_resolution_clock; using std::chrono::nanoseconds; using std::chrono::duration_cast; -using std::cout; using std::endl; using std::array; @@ -59,7 +58,7 @@ void swipe_cell_update() { volatile auto y = diagonal_cell[0].ident; volatile auto z = diagonal_cell[0].len; - cout << "SWIPE cell update (int8_t):\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * N * 32) * 1000 << " ps/Cell" << endl; + //cout << "SWIPE cell update (int8_t):\t" << (double)duration_cast(high_resolution_clock::now() - t1).count() / (n * N * 32) * 1000 << " ps/Cell" << endl; } #endif diff --git a/src/tools/greedy_vertex_cover.cpp b/src/tools/greedy_vertex_cover.cpp index 55f6b245a..38422c73d 100644 --- a/src/tools/greedy_vertex_cover.cpp +++ b/src/tools/greedy_vertex_cover.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -22,7 +21,6 @@ using std::string; using std::atomic; using std::vector; using std::unordered_map; -using std::cout; using std::numeric_limits; using std::runtime_error; //using Acc = FixedString<32>; @@ -34,7 +32,7 @@ void greedy_vertex_cover() { const double cov = std::max(config.query_or_target_cover, config.member_cover); const bool triplets = config.edge_format == "triplet"; message_stream << "Coverage cutoff: " << cov << '%' << endl; - task_timer timer("Reading mapping file"); + TaskTimer timer("Reading mapping file"); //unordered_map acc2oid; unordered_map acc2oid; acc2oid.reserve(Util::Tsv::count_lines(config.database)); diff --git a/src/tools/roc.cpp b/src/tools/roc.cpp index 682cea603..2af98ee11 100644 --- a/src/tools/roc.cpp +++ b/src/tools/roc.cpp @@ -353,7 +353,7 @@ void roc() { if (config.family_map_query.empty()) throw std::runtime_error("Missing option: --family-map-query"); - task_timer timer("Loading family mapping"); + TaskTimer timer("Loading family mapping"); acc2fam = FamilyMapping(config.family_map); timer.finish(); message_stream << "#Mappings: " << acc2fam.size() << endl; diff --git a/src/tools/tsv.cpp b/src/tools/tsv.cpp index 37ac6ef9c..a44e07e90 100644 --- a/src/tools/tsv.cpp +++ b/src/tools/tsv.cpp @@ -1,4 +1,3 @@ -#include #include "../util/tsv/file.h" #include "../basic/config.h" #include "../util/log_stream.h" @@ -32,7 +31,7 @@ void cut() { return t; }); File out(Schema{ Type::STRING }, "", Flags::WRITE); - task_timer timer("", 3); + TaskTimer timer("", 3); for (;;) { timer.go("Loading data"); const Table t = f.read(READ_SIZE, config.threads_); diff --git a/src/tools/view.cpp b/src/tools/view.cpp index 6d2343ccf..51a524891 100644 --- a/src/tools/view.cpp +++ b/src/tools/view.cpp @@ -127,7 +127,7 @@ void view_tsv() { if (config.query_file.size() > 1) throw std::runtime_error("Too many arguments for query file (--query/-q)"); - task_timer timer("Opening the database file"); + TaskTimer timer("Opening the database file"); unique_ptr db(SequenceFile::auto_create({ config.database }, SequenceFile::Flags::NO_FASTA)); score_matrix = ScoreMatrix("blosum62", -1, -1, 1, 0); score_matrix.set_db_letters(config.db_size ? config.db_size : db->letters()); @@ -181,7 +181,7 @@ void view_tsv() { q = query_idx++; } if (q % 1000 == 0) - std::cout << "#Query = " << q << " time = " << timer.seconds() << endl; + message_stream << "#Query = " << q << " time = " << timer.seconds() << endl; if (query.empty()) return; TextBuffer* out = view_query(query, buf, *db, *db, cfg, stats); diff --git a/src/util/algo/greedy_vertex_cover.cpp b/src/util/algo/greedy_vertex_cover.cpp index eebab1e87..d94b60d3a 100644 --- a/src/util/algo/greedy_vertex_cover.cpp +++ b/src/util/algo/greedy_vertex_cover.cpp @@ -84,7 +84,7 @@ static void fix_assignment(vector& centroids) { template vector greedy_vertex_cover(FlatArray>& neighbors, const SuperBlockId* member_counts, bool merge_recursive) { - task_timer timer("Computing edge counts"); + TaskTimer timer("Computing edge counts"); priority_queue> q; vector centroids(neighbors.size(), -1); for (Int i = 0; i < neighbors.size(); ++i) diff --git a/src/util/algo/join_result.h b/src/util/algo/join_result.h index 30cf12787..e3cfcdcaf 100644 --- a/src/util/algo/join_result.h +++ b/src/util/algo/join_result.h @@ -21,19 +21,19 @@ along with this program. If not, see . #include "../range.h" #include "../data_structures/double_array.h" -template +template struct JoinArrayIterator { - JoinArrayIterator(_t *ptr, _t *end): + JoinArrayIterator(T *ptr, T *end): ptr_(ptr), end_(end) {} - Range<_t*> operator*() { + Range operator*() { return { ptr_ + 1, ptr_ + 1 + *ptr_ }; } - Range<_t*>* operator->() { + Range* operator->() { range_ = this->operator*(); return &range_; } @@ -57,17 +57,17 @@ struct JoinArrayIterator { private: - Range<_t*> range_; - _t *ptr_, *end_; + Range range_; + T *ptr_, *end_; }; -template +template struct JoinIterator { - typename DoubleArray<_t>::Iterator r, s; + typename DoubleArray::Iterator r, s; - JoinIterator(typename DoubleArray<_t>::Iterator r, typename DoubleArray<_t>::Iterator s): + JoinIterator(typename DoubleArray::Iterator r, typename DoubleArray::Iterator s): r(r), s(s) {} diff --git a/src/util/io/file_sink.cpp b/src/util/io/file_sink.cpp index 352d0eb21..95d9abe63 100644 --- a/src/util/io/file_sink.cpp +++ b/src/util/io/file_sink.cpp @@ -19,7 +19,6 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . ****/ -#include #include #ifdef _MSC_VER #define NOMINMAX @@ -36,7 +35,6 @@ along with this program. If not, see . #include "file_sink.h" #include "../system.h" -using std::endl; using std::string; using std::runtime_error; diff --git a/src/util/io/file_source.cpp b/src/util/io/file_source.cpp index 920f07081..7f74c01c9 100644 --- a/src/util/io/file_source.cpp +++ b/src/util/io/file_source.cpp @@ -16,7 +16,6 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . ****/ -#include #include #ifdef _MSC_VER #define NOMINMAX @@ -33,7 +32,6 @@ along with this program. If not, see . #include "file_source.h" #include "../system.h" -using std::endl; using std::string; using std::runtime_error; diff --git a/src/util/io/input_file.cpp b/src/util/io/input_file.cpp index 4c2bcf6fd..a4ea8b463 100644 --- a/src/util/io/input_file.cpp +++ b/src/util/io/input_file.cpp @@ -16,8 +16,8 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . ****/ -#include #include +#include #include "../util/algo/MurmurHash3.h" #ifndef _MSC_VER #include diff --git a/src/util/log_stream.h b/src/util/log_stream.h index ce46954a2..b5ce635aa 100644 --- a/src/util/log_stream.h +++ b/src/util/log_stream.h @@ -55,31 +55,31 @@ extern MessageStream message_stream; extern MessageStream verbose_stream; extern MessageStream log_stream; -struct task_timer +struct TaskTimer { - task_timer(MessageStream& stream, unsigned level = 1) : + TaskTimer(MessageStream& stream, unsigned level = 1) : level_(level), msg_(nullptr), stream_(stream) { start(nullptr); } - task_timer(unsigned level = 1) : - task_timer(get_stream(), level) + TaskTimer(unsigned level = 1) : + TaskTimer(get_stream(), level) {} - task_timer(const char* msg, MessageStream& stream, unsigned level = 1) : + TaskTimer(const char* msg, MessageStream& stream, unsigned level = 1) : level_(level), msg_(msg), stream_(stream) { start(msg); } - task_timer(const char* msg, unsigned level = 1): - task_timer(msg, get_stream(), level) + TaskTimer(const char* msg, unsigned level = 1): + TaskTimer(msg, get_stream(), level) {} - ~task_timer() + ~TaskTimer() { -#if __cplusplus >= 201703L || defined(_MSC_VER) +#if (__cplusplus >= 201703L && !defined(__APPLE__)) || defined(_MSC_VER) if (!std::uncaught_exceptions()) #else if (!std::uncaught_exception()) diff --git a/src/util/parallel/thread_pool.h b/src/util/parallel/thread_pool.h index 59829d465..03d9efc99 100644 --- a/src/util/parallel/thread_pool.h +++ b/src/util/parallel/thread_pool.h @@ -26,7 +26,7 @@ along with this program. If not, see . #include #include #include -#include +#include #include "../log_stream.h" namespace Util { namespace Parallel { diff --git a/src/util/profiler.h b/src/util/profiler.h index 0cfb658d9..a01f42c63 100644 --- a/src/util/profiler.h +++ b/src/util/profiler.h @@ -24,7 +24,7 @@ struct Profiler { message_stream << i.first << ": " << (double)i.second/n/1e3 << " micros" << std::endl; } - task_timer timer; + TaskTimer timer; const char* key; static std::map times; diff --git a/src/util/simd.cpp b/src/util/simd.cpp index 880c8db65..25f32082d 100644 --- a/src/util/simd.cpp +++ b/src/util/simd.cpp @@ -102,6 +102,7 @@ Arch arch() { } std::string features() { + init_arch(); std::vector r; if (flags & SSSE3) r.push_back("ssse3"); diff --git a/src/util/simd.h b/src/util/simd.h index 1878ad953..c0b8cc476 100644 --- a/src/util/simd.h +++ b/src/util/simd.h @@ -57,7 +57,7 @@ std::string features(); namespace DISPATCH_ARCH { namespace SIMD { -template +template struct Vector {}; }} @@ -93,63 +93,4 @@ inline void print_16(__m256i x, std::ostream& s) { s << (int)v[i] << ' '; } -#endif - -#ifdef __SSE__ - -#if defined(__GNUC__) && !defined(__clang__) && defined(__SSE__) -#pragma GCC push_options -#pragma GCC target("arch=x86-64") -#elif defined(__clang__) && defined(__SSE__) -#pragma clang attribute push (__attribute__((target("arch=x86-64"))), apply_to=function) -#endif - -#include - -#ifdef WITH_AVX512 - -#define DECL_DISPATCH(ret, name, param) namespace ARCH_GENERIC { ret name param; }\ -namespace ARCH_SSE4_1 { ret name param; }\ -namespace ARCH_AVX2 { ret name param; }\ -namespace ARCH_AVX512 { ret name param; }\ -static inline std::function dispatch_target_##name() {\ -switch(::SIMD::arch()) {\ -case ::SIMD::Arch::SSE4_1: return ARCH_SSE4_1::name;\ -case ::SIMD::Arch::AVX2: return ARCH_AVX2::name;\ -case ::SIMD::Arch::AVX512: return ARCH_AVX512::name;\ -default: return ARCH_GENERIC::name;\ -}}\ -const std::function name = dispatch_target_##name(); - -#else - -#define DECL_DISPATCH(ret, name, param) namespace ARCH_GENERIC { ret name param; }\ -namespace ARCH_SSE4_1 { ret name param; }\ -namespace ARCH_AVX2 { ret name param; }\ -static inline std::function dispatch_target_##name() {\ -switch(::SIMD::arch()) {\ -case ::SIMD::Arch::SSE4_1: return ARCH_SSE4_1::name;\ -case ::SIMD::Arch::AVX2: return ARCH_AVX2::name;\ -default: return ARCH_GENERIC::name;\ -}}\ -const std::function name = dispatch_target_##name(); - -#endif - -#if defined(__GNUC__) && !defined(__clang__) && defined(__SSE__) -#pragma GCC pop_options -#elif defined(__clang__) && defined(__SSE__) -#pragma clang attribute pop -#endif - -#else - -#include - -#define DECL_DISPATCH(ret, name, param) namespace ARCH_GENERIC { ret name param; }\ -inline std::function dispatch_target_##name() {\ -return ARCH_GENERIC::name;\ -}\ -const std::function name = dispatch_target_##name(); - -#endif +#endif \ No newline at end of file diff --git a/src/util/simd/dispatch.h b/src/util/simd/dispatch.h new file mode 100644 index 000000000..bd6bd1a8c --- /dev/null +++ b/src/util/simd/dispatch.h @@ -0,0 +1,163 @@ +#pragma once + +#ifdef WITH_AVX2 +#define HAVE_AVX2(x) x +#else +#define HAVE_AVX2(x) +#endif + +#ifdef WITH_SSE4_1 +#define HAVE_SSE4_1(x) x +#else +#define HAVE_SSE4_1(x) +#endif + +#if ARCH_ID == 0 + +#define DISPATCH_0V(name)\ +HAVE_SSE4_1(namespace ARCH_SSE4_1 { void name(); })\ +HAVE_AVX2(namespace ARCH_AVX2 { void name(); })\ +void name() {\ +HAVE_SSE4_1(switch(::SIMD::arch()) {)\ +HAVE_AVX2(case ::SIMD::Arch::AVX2: ARCH_AVX2::name(); break;)\ +HAVE_SSE4_1(case ::SIMD::Arch::SSE4_1: ARCH_SSE4_1::name(); break;)\ +HAVE_SSE4_1(default:)\ +ARCH_GENERIC::name();\ +HAVE_SSE4_1(})\ +} + +#define DISPATCH_2(ret, name, t1, n1, t2, n2)\ +HAVE_SSE4_1(namespace ARCH_SSE4_1 { ret name(t1 n1, t2 n2); })\ +HAVE_AVX2(namespace ARCH_AVX2 { ret name(t1 n1, t2 n2); })\ +ret name(t1 n1, t2 n2) {\ +HAVE_SSE4_1(switch(::SIMD::arch()) {)\ +HAVE_AVX2(case ::SIMD::Arch::AVX2: return ARCH_AVX2::name(n1, n2);)\ +HAVE_SSE4_1(case ::SIMD::Arch::SSE4_1: return ARCH_SSE4_1::name(n1, n2);)\ +HAVE_SSE4_1(default:)\ +return ARCH_GENERIC::name(n1, n2);\ +HAVE_SSE4_1(})\ +} + +#define DISPATCH_3(ret, name, t1, n1, t2, n2, t3, n3)\ +HAVE_SSE4_1(namespace ARCH_SSE4_1 { ret name(t1 n1, t2 n2, t3 n3); })\ +HAVE_AVX2(namespace ARCH_AVX2 { ret name(t1 n1, t2 n2, t3 n3); })\ +ret name(t1 n1, t2 n2, t3 n3) {\ +HAVE_SSE4_1(switch(::SIMD::arch()) {)\ +HAVE_AVX2(case ::SIMD::Arch::AVX2: return ARCH_AVX2::name(n1, n2, n3);)\ +HAVE_SSE4_1(case ::SIMD::Arch::SSE4_1: return ARCH_SSE4_1::name(n1, n2, n3);)\ +HAVE_SSE4_1(default:)\ +return ARCH_GENERIC::name(n1, n2, n3);\ +HAVE_SSE4_1(})\ +} + +#define DISPATCH_3V(name, t1, n1, t2, n2, t3, n3)\ +HAVE_SSE4_1(namespace ARCH_SSE4_1 { void name(t1 n1, t2 n2, t3 n3); })\ +HAVE_AVX2(namespace ARCH_AVX2 { void name(t1 n1, t2 n2, t3 n3); })\ +void name(t1 n1, t2 n2, t3 n3) {\ +HAVE_SSE4_1(switch(::SIMD::arch()) {)\ +HAVE_AVX2(case ::SIMD::Arch::AVX2: ARCH_AVX2::name(n1, n2, n3); break;)\ +HAVE_SSE4_1(case ::SIMD::Arch::SSE4_1: ARCH_SSE4_1::name(n1, n2, n3); break;)\ +HAVE_SSE4_1(default:)\ +ARCH_GENERIC::name(n1, n2, n3);\ +HAVE_SSE4_1(})\ +} + +#define DISPATCH_4(ret, name, t1, n1, t2, n2, t3, n3, t4, n4)\ +HAVE_SSE4_1(namespace ARCH_SSE4_1 { ret name(t1 n1, t2 n2, t3 n3, t4 n4); })\ +HAVE_AVX2(namespace ARCH_AVX2 { ret name(t1 n1, t2 n2, t3 n3, t4 n4); })\ +ret name(t1 n1, t2 n2, t3 n3, t4 n4) {\ +HAVE_SSE4_1(switch(::SIMD::arch()) {)\ +HAVE_AVX2(case ::SIMD::Arch::AVX2: return ARCH_AVX2::name(n1, n2, n3, n4);)\ +HAVE_SSE4_1(case ::SIMD::Arch::SSE4_1: return ARCH_SSE4_1::name(n1, n2, n3, n4);)\ +HAVE_SSE4_1(default:)\ +return ARCH_GENERIC::name(n1, n2, n3, n4);\ +HAVE_SSE4_1(})\ +} + +#define DISPATCH_5(ret, name, t1, n1, t2, n2, t3, n3, t4, n4, t5, n5)\ +HAVE_SSE4_1(namespace ARCH_SSE4_1 { ret name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5); })\ +HAVE_AVX2(namespace ARCH_AVX2 { ret name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5); })\ +ret name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5) {\ +HAVE_SSE4_1(switch(::SIMD::arch()) {)\ +HAVE_AVX2(case ::SIMD::Arch::AVX2: return ARCH_AVX2::name(n1, n2, n3, n4, n5);)\ +HAVE_SSE4_1(case ::SIMD::Arch::SSE4_1: return ARCH_SSE4_1::name(n1, n2, n3, n4, n5);)\ +HAVE_SSE4_1(default:)\ +return ARCH_GENERIC::name(n1, n2, n3, n4, n5);\ +HAVE_SSE4_1(})\ +} + +#define DISPATCH_6(ret, name, t1, n1, t2, n2, t3, n3, t4, n4, t5, n5, t6, n6)\ +HAVE_SSE4_1(namespace ARCH_SSE4_1 { ret name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5, t6 n6); })\ +HAVE_AVX2(namespace ARCH_AVX2 { ret name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5, t6 n6); })\ +ret name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5, t6 n6) {\ +HAVE_SSE4_1(switch(::SIMD::arch()) {)\ +HAVE_AVX2(case ::SIMD::Arch::AVX2: return ARCH_AVX2::name(n1, n2, n3, n4, n5, n6);)\ +HAVE_SSE4_1(case ::SIMD::Arch::SSE4_1: return ARCH_SSE4_1::name(n1, n2, n3, n4, n5, n6);)\ +HAVE_SSE4_1(default:)\ +return ARCH_GENERIC::name(n1, n2, n3, n4, n5, n6);\ +HAVE_SSE4_1(})\ +} + +#define DISPATCH_6V(name, t1, n1, t2, n2, t3, n3, t4, n4, t5, n5, t6, n6)\ +HAVE_SSE4_1(namespace ARCH_SSE4_1 { void name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5, t6 n6); })\ +HAVE_AVX2(namespace ARCH_AVX2 { void name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5, t6 n6); })\ +void name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5, t6 n6) {\ +HAVE_SSE4_1(switch(::SIMD::arch()) {)\ +HAVE_AVX2(case ::SIMD::Arch::AVX2: ARCH_AVX2::name(n1, n2, n3, n4, n5, n6); break;)\ +HAVE_SSE4_1(case ::SIMD::Arch::SSE4_1: ARCH_SSE4_1::name(n1, n2, n3, n4, n5, n6); break;)\ +HAVE_SSE4_1(default:)\ +ARCH_GENERIC::name(n1, n2, n3, n4, n5, n6);\ +HAVE_SSE4_1(})\ +} + +#define DISPATCH_7(ret, name, t1, n1, t2, n2, t3, n3, t4, n4, t5, n5, t6, n6, t7, n7)\ +HAVE_SSE4_1(namespace ARCH_SSE4_1 { ret name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5, t6 n6, t7 n7); })\ +HAVE_AVX2(namespace ARCH_AVX2 { ret name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5, t6 n6, t7 n7); })\ +ret name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5, t6 n6, t7 n7) {\ +HAVE_SSE4_1(switch(::SIMD::arch()) {)\ +HAVE_AVX2(case ::SIMD::Arch::AVX2: return ARCH_AVX2::name(n1, n2, n3, n4, n5, n6, n7);)\ +HAVE_SSE4_1(case ::SIMD::Arch::SSE4_1: return ARCH_SSE4_1::name(n1, n2, n3, n4, n5, n6, n7);)\ +HAVE_SSE4_1(default:)\ +return ARCH_GENERIC::name(n1, n2, n3, n4, n5, n6, n7);\ +HAVE_SSE4_1(})\ +} + +#define DISPATCH_7V(name, t1, n1, t2, n2, t3, n3, t4, n4, t5, n5, t6, n6, t7, n7)\ +HAVE_SSE4_1(namespace ARCH_SSE4_1 { void name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5, t6 n6, t7 n7); })\ +HAVE_AVX2(namespace ARCH_AVX2 { void name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5, t6 n6, t7 n7); })\ +void name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5, t6 n6, t7 n7) {\ +HAVE_SSE4_1(switch(::SIMD::arch()) {)\ +HAVE_AVX2(case ::SIMD::Arch::AVX2: ARCH_AVX2::name(n1, n2, n3, n4, n5, n6, n7); break;)\ +HAVE_SSE4_1(case ::SIMD::Arch::SSE4_1: ARCH_SSE4_1::name(n1, n2, n3, n4, n5, n6, n7); break;)\ +HAVE_SSE4_1(default:)\ +ARCH_GENERIC::name(n1, n2, n3, n4, n5, n6, n7);\ +HAVE_SSE4_1(})\ +} + +#define DISPATCH_8(ret, name, t1, n1, t2, n2, t3, n3, t4, n4, t5, n5, t6, n6, t7, n7, t8, n8)\ +HAVE_SSE4_1(namespace ARCH_SSE4_1 { ret name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5, t6 n6, t7 n7, t8 n8); })\ +HAVE_AVX2(namespace ARCH_AVX2 { ret name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5, t6 n6, t7 n7, t8 n8); })\ +ret name(t1 n1, t2 n2, t3 n3, t4 n4, t5 n5, t6 n6, t7 n7, t8 n8) {\ +HAVE_SSE4_1(switch(::SIMD::arch()) {)\ +HAVE_AVX2(case ::SIMD::Arch::AVX2: return ARCH_AVX2::name(n1, n2, n3, n4, n5, n6, n7, n8);)\ +HAVE_SSE4_1(case ::SIMD::Arch::SSE4_1: return ARCH_SSE4_1::name(n1, n2, n3, n4, n5, n6, n7, n8);)\ +HAVE_SSE4_1(default:)\ +return ARCH_GENERIC::name(n1, n2, n3, n4, n5, n6, n7, n8);\ +HAVE_SSE4_1(})\ +} + +#else + +#define DISPATCH_0V(name) +#define DISPATCH_2(ret, name, t1, n1, t2, n2) +#define DISPATCH_3(ret, name, t1, n1, t2, n2, t3, n3) +#define DISPATCH_4(ret, name, t1, n1, t2, n2, t3, n3, t4, n4) +#define DISPATCH_5(ret, name, t1, n1, t2, n2, t3, n3, t4, n4, t5, n5) +#define DISPATCH_6(ret, name, t1, n1, t2, n2, t3, n3, t4, n4, t5, n5, t6, n6) +#define DISPATCH_7(ret, name, t1, n1, t2, n2, t3, n3, t4, n4, t5, n5, t6, n6, t7, n7) +#define DISPATCH_3V(name, t1, n1, t2, n2, t3, n3) +#define DISPATCH_6V(name, t1, n1, t2, n2, t3, n3, t4, n4, t5, n5, t6, n6) +#define DISPATCH_7V(name, t1, n1, t2, n2, t3, n3, t4, n4, t5, n5, t6, n6, t7, n7) +#define DISPATCH_8(ret, name, t1, n1, t2, n2, t3, n3, t4, n4, t5, n5, t6, n6, t7, n7, t8, n8) + +#endif \ No newline at end of file diff --git a/src/util/util.cpp b/src/util/util.cpp index 399769dbb..6c10629b8 100644 --- a/src/util/util.cpp +++ b/src/util/util.cpp @@ -18,15 +18,17 @@ along with this program. If not, see . #include #include -#include #include +#include #include "../basic/config.h" #include "log_stream.h" #include "util.h" #include "escape_sequences.h" #include "profiler.h" -using namespace std; +using std::string; +using std::vector; +using std::endl; MessageStream message_stream; MessageStream verbose_stream (false); diff --git a/src/util/util.h b/src/util/util.h index c295fed5f..6b912a467 100644 --- a/src/util/util.h +++ b/src/util/util.h @@ -174,7 +174,12 @@ std::string join(const char *c, const std::vector &v); template auto apply(const std::vector &v, F f) -> std::vector::type> { - std::vector::type> r; +#ifdef _MSC_VER + using R = typename std::invoke_result::type; +#else + using R = typename std::result_of::type; +#endif + std::vector r; r.reserve(v.size()); for (const auto &i : v) r.push_back(f(i));