Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some changes that I've made over the time. #265

Draft
wants to merge 18 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/ci_utility.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ on:
push:
branches:
- 'main'
pull_request:
types:
- unlabeled
workflow_dispatch:

concurrency:
Expand Down
4 changes: 2 additions & 2 deletions include/raptor/hierarchical_interleaved_bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,8 @@ class hierarchical_interleaved_bloom_filter<data_layout_mode>::membership_agent

if (current_filename_index < 0) // merged bin
{
if (sum >= threshold)
bulk_contains_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
// if (sum >= threshold)
bulk_contains_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
sum = 0u;
}
else if (bin + 1u == result.size() || // last bin
Expand Down
37 changes: 37 additions & 0 deletions util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,41 @@ target_link_libraries ("generate_reads_refseq" "common")
add_executable ("ibf_fpr" src/applications/ibf_fpr.cpp)
target_link_libraries ("ibf_fpr" "common")

add_executable ("check_fastq" src/applications/check_fastq.cpp)
target_link_libraries ("check_fastq" "common")
install (TARGETS "check_fastq" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")

add_executable ("compare_amr_genes" hibf/misc/helper/src/compare_amr_genes.cpp)
target_link_libraries ("compare_amr_genes" "common")
install (TARGETS "compare_amr_genes" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")

add_executable ("create_comparable_output" hibf/misc/helper/src/create_comparable_output.cpp)
target_link_libraries ("create_comparable_output" "common")
install (TARGETS "create_comparable_output" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")

add_executable ("normalise_bifrost_output" hibf/misc/helper/src/normalise_bifrost_output.cpp)
target_link_libraries ("normalise_bifrost_output" "common")
install (TARGETS "normalise_bifrost_output" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")

add_executable ("normalise_raptor_output" hibf/misc/helper/src/normalise_raptor_output.cpp)
target_link_libraries ("normalise_raptor_output" "common")
install (TARGETS "normalise_raptor_output" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")

add_executable ("normalise_yara_truth_file" hibf/misc/helper/src/normalise_yara_truth_file.cpp)
target_link_libraries ("normalise_yara_truth_file" "common")
install (TARGETS "normalise_yara_truth_file" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")

add_executable ("compare_mantis_raptor_output" hibf/misc/helper/src/compare_mantis_raptor_output.cpp)
target_link_libraries ("compare_mantis_raptor_output" "common")
install (TARGETS "compare_mantis_raptor_output" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")

# formerly create_truth_file
add_executable ("to_be_deleted" hibf/misc/helper/src/to_be_deleted.cpp)
target_link_libraries ("to_be_deleted" "common")
install (TARGETS "to_be_deleted" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")

add_executable ("compare_output" hibf/misc/helper/src/compare_output.cpp)
target_link_libraries ("compare_output" "common")
install (TARGETS "compare_output" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")

message (STATUS "${FontBold}You can run `make install` to build the application.${FontReset}")
3 changes: 3 additions & 0 deletions util/hibf/misc/helper/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@ cmake_minimum_required (VERSION 3.8)

add_executable ("fasta_to_fastq" fasta_to_fastq.cpp)
target_link_libraries ("fasta_to_fastq" PUBLIC seqan3::seqan3 sharg::sharg)

add_executable ("compare_output" compare_output.cpp)
target_link_libraries ("compare_output" PUBLIC seqan3::seqan3 sharg::sharg)
182 changes: 182 additions & 0 deletions util/hibf/misc/helper/src/compare_amr_genes.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
#include <cassert>
#include <fstream>
#include <iostream>
#include <ranges>
#include <seqan3/std/algorithm>
#include <string>
#include <unordered_map>
#include <vector>

int main(int argc, char ** argv)
{
if (argc != 3)
throw std::runtime_error{"Please provide a processed raptor result file and the truth file"};

std::ifstream raptor_result{argv[1]};
std::ifstream truth_file{argv[2]};

if (!raptor_result.good())
throw std::runtime_error{"Could not open file " + std::string{argv[1]}};
if (!truth_file.good())
throw std::runtime_error{"Could not open file " + std::string{argv[2]}};

std::unordered_map<std::string, uint64_t> user_bin_ids;

std::cout << "Reading in user bin ids from raptor header in " << argv[1] << "... ";
std::string line;
while (std::getline(raptor_result, line) && line[0] == '#' && line[1] != 'Q')
{
std::string const value{line.begin(), line.begin() + line.find('\t')};
std::string const key{line.begin() + line.find_last_of('/') + 1, line.begin() + line.find_first_of('.')};
uint64_t value_as_number = std::atoi(value.data());
user_bin_ids.emplace(key, value_as_number);
}
std::cout << "Done " << std::endl;

std::unordered_map<std::string, std::vector<uint64_t>> truth_set{};

std::cout << "Reading in truth set from file " << argv[2] << "... ";
// process header line
std::getline(truth_file, line);
std::vector<std::string> genes{};
for (auto && gene : line | std::views::split('\t'))
{
std::string gene_str = [](auto v)
{
std::string s;
for (auto c : v)
s.push_back(c);
return s;
}(gene);
genes.push_back(gene_str);
}
// process rest of files
while (std::getline(truth_file, line))
{
std::string sample_id{line.begin(), line.begin() + line.find('\t')};
uint64_t sample_idx = user_bin_ids[sample_id];

size_t current_pos{0};
for (auto && occ : line | std::views::split('\t'))
{
if (std::ranges::equal(occ, std::string{"1"})) // not 0
truth_set[genes[current_pos]].push_back(sample_idx);
++current_pos;
}
}
std::cout << "Done - Truth set has size " << truth_set.size() << std::endl;

std::cout << "Processing Results from raptor file " << argv[1] << "... ";

std::ofstream false_positives_file{"raptor.fps"};
std::ofstream false_negatives_file{"raptor.fns"};
uint64_t true_positives{0};
uint64_t false_positives{0};
uint64_t false_negatives{0};
uint64_t line_no{0};
uint64_t all_raptor{0};
size_t skipped_genes{};

while (std::getline(raptor_result, line))
{
auto gv = line | std::views::split('|') | std::views::drop(5);
std::string gene = [](auto v)
{
std::string s;
for (auto c : v)
s.push_back(c);
return s;
}(*gv.begin());

auto it = truth_set.find(gene);
if (it == truth_set.end())
{
++skipped_genes;
std::cerr << "Warning: Could not find gene '" << gene << "' in truth set.\n";
continue;
}

auto & truth_fields = it->second;

std::string raptor_fields{line.begin() + line.find('\t') + 1, line.end()};
auto raptor_fields_view = raptor_fields | std::views::split(',');

auto truth_it = truth_fields.begin();
auto raptor_it = raptor_fields_view.begin();

while (truth_it != truth_fields.end() && raptor_it != raptor_fields_view.end())
{
std::string raptor_str = [](auto v)
{
std::string s;
for (auto c : v)
s.push_back(c);
return s;
}(*raptor_it);
uint64_t raptor_value = std::atoi(raptor_str.data());

uint64_t truth_value = *truth_it;

if (truth_value != raptor_value) // If mantis results are empty, then...?
{
if (truth_value < raptor_value)
{
false_negatives_file << gene << ":" << truth_value << '\n';
++false_negatives;
// ++all_mantis;
++truth_it;
}
else
{
false_positives_file << gene << ":" << raptor_value << '\n';
++false_positives;
++all_raptor;
++raptor_it;
}
}
else
{
++true_positives;
// ++all_mantis;
++all_raptor;
++truth_it;
++raptor_it;
}
}

while (truth_it != truth_fields.end()) // process the rest of mantis
{
uint64_t truth_value = *truth_it;
++false_negatives;
false_negatives_file << gene << ":" << truth_value << '\n';
// ++all_mantis;
++truth_it;
}

while (raptor_it != raptor_fields_view.end()) // process the rest of raptor if any
{
std::string raptor_str = [](auto v)
{
std::string s;
for (auto c : v)
s.push_back(c);
return s;
}(*raptor_it);
uint64_t raptor_value = std::atoi(raptor_str.data());
false_positives_file << gene << ":" << raptor_value << '\n';
++false_positives;
++all_raptor;
++raptor_it;
}

++line_no;
}

std::cout << std::endl;
// std::cout << "Mantis total #hits:" << all_mantis << std::endl;
std::cout << "#Skipped genes: " << skipped_genes << std::endl;
std::cout << "Raptor total #hits:" << all_raptor << std::endl;
std::cout << "#True positives raptor: " << true_positives << std::endl;
std::cout << "#False positives raptor: " << false_positives << std::endl;
std::cout << "#False negatives raptor: " << false_negatives << std::endl;
}
Loading