Skip to content

Commit

Permalink
Minor changes
Browse files Browse the repository at this point in the history
incl. k -> K
  • Loading branch information
anton083 committed Aug 11, 2023
1 parent c831a5f commit f5fad73
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 15 deletions.
11 changes: 6 additions & 5 deletions ext/BioSequencesExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,19 @@ module BioSequencesExt
using VectorizedKmers, BioSequences
import VectorizedKmers: KmerCount, count_kmers!, KmerCountColumns, KmerCountRows

function count_kmers!(kmer_count::KmerCount{4, k, T}, seq::LongDNA{4}; reset::Bool=true) where {k, T}
reset && fill!(kmer_count.data, zero(T))
function count_kmers!(kmer_count::KmerCount{4, K, T}, seq::LongDNA{4}; reset::Bool=true) where {K, T}
counts = kmer_count.counts
reset && fill!(kmer_count, zero(T))
len = length(seq)
mask = UInt(1) << 2k - 1
mask = UInt(1) << 2K - 1
kmer = UInt(0)
i = 0
@inbounds for data_int in seq.data
for j in 0:4:63 # could maybe do some SIMD shit on middle k-mers
for j in 0:4:63 # could maybe do some SIMD shit on middle K-mers
i += 1
i > len && break # only necessary for the last element in seq.data
kmer = kmer << 2 & mask + trailing_zeros(data_int >> j) & 0b11
kmer_count[kmer + 1] += k <= i
counts[kmer + 1] += K <= i # maybe counting ALL k-mers is actually useful though? even incomplete ones that start with some A's
end
end
kmer_count
Expand Down
20 changes: 10 additions & 10 deletions ext/CUDAExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,31 @@ import VectorizedKmers: KmerCount, count_kmers!, KmerCountColumns, KmerCountRows
# store sequences in columns

"""
count_kmers!(kcc::KmerCountColumns{4, k, T, CuMatrix{T}}, sequences::CuMatrix{UInt8})
count_kmers!(kcc::KmerCountColumns{4, K, T, CuMatrix{T}}, sequences::CuMatrix{UInt8})
Turbocharged k-mer counting using CUDA. DNA sequences (all with same length) are stored in the columns of `sequences`.
Turbocharged K-mer counting powered by CUDA. DNA sequences (all with same length) are stored in the columns of `sequences`.
Values in `sequences` must be between 0 and 3.
Chars 'A', 'C', 'G', and 'T' can be converted to 0, 1, 3, and 2 respectively using the function:
`char -> UInt8(char) >> 1 & 0x03`
"""
function count_kmers!(kmer_count_columns::KmerCountColumns{4, k, T, M}, sequences::CuMatrix{UInt8}) where {k, T, M <: CuMatrix{T}}
data = kmer_count_columns.data
function count_kmers!(kmer_count_columns::KmerCountColumns{4, K, T, M}, sequences::CuMatrix{UInt8}) where {K, T, M <: CuMatrix{T}}
counts = kmer_count_columns.counts
seq_len, num_sequences = size(sequences)
CUDA.fill!(data, zero(T))
CUDA.fill!(counts, zero(T))

function kernel(data, sequences, k, seq_len, num_sequences, mask)
function kernel(counts, sequences, K, seq_len, num_sequences, masK)
seq_idx = (blockIdx().x - 1) * blockDim().x + threadIdx().x
if seq_idx <= num_sequences
kmer = unsigned(0)
for i in 1:k-1
for i in 1:K-1
base = sequences[i, seq_idx]
kmer = kmer << 2 + base
end
for i in k:seq_len
for i in K:seq_len
base = sequences[i, seq_idx]
kmer = kmer << 2 & mask + base
CUDA.@atomic data[kmer + 1, seq_idx] += one(T)
CUDA.@atomic counts[kmer + 1, seq_idx] += one(T)
end
end
return
Expand All @@ -45,7 +45,7 @@ function count_kmers!(kmer_count_columns::KmerCountColumns{4, k, T, M}, sequence
threads = 256
blocks = ceil(Int, num_sequences / threads)

@cuda threads=threads blocks=blocks kernel(data, sequences, k, seq_len, num_sequences, mask)
@cuda threads=threads blocks=blocks kernel(counts, sequences, K, seq_len, num_sequences, masK)

kmer_count_columns
end
Expand Down

0 comments on commit f5fad73

Please sign in to comment.