Skip to content

Commit

Permalink
Use OffsetArrays
Browse files Browse the repository at this point in the history
  • Loading branch information
AntonOresten committed Mar 6, 2024
1 parent c7fc591 commit 087e935
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 33 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ authors = ["Anton Oresten <anton.oresten42@gmail.com>"]
version = "0.9.0"

[deps]
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"

[weakdeps]
Expand Down
8 changes: 4 additions & 4 deletions ext/BioSequencesExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ function VectorizedKmers.count_kmers!(
i += 1
i > stop && break
kmer = ((kmer << 2) & mask) | ((data_int >> j) & 0b11)
kmer_array.values[kmer + 1] += first_count_index <= i
kmer_array.values.parent[kmer + 1] += first_count_index <= i
end
end
return kmer_array
Expand All @@ -47,7 +47,7 @@ function VectorizedKmers.count_kmers!(
i += 1
i > stop && break
kmer = ((kmer << 2) & mask) | (trailing_zeros(data_int >> j) & 0b11)
kmer_array.values[kmer + 1] += first_count_index <= i
kmer_array.values.parent[kmer + 1] += first_count_index <= i
end
end
return kmer_array
Expand All @@ -70,7 +70,7 @@ function VectorizedKmers.count_kmers!(
i += 1
i > stop && break
kmer = (kmer * 20 + ((data_int >> j) & 0xff) % 20) % mask
kmer_array.values[kmer + 1] += first_count_index <= i
kmer_array.values.parent[kmer + 1] += first_count_index <= i
end
end
return kmer_array
Expand All @@ -81,7 +81,7 @@ function VectorizedKmers.count_kmers!(
sequence::SeqOrView{<:NucleicAcidAlphabet{4}};
reset::Bool = true,
) where K
chunks = kmer_array.values.chunks
chunks = kmer_array.values.parent.chunks
reset && fill!(chunks, zero(UInt))
mask = one(UInt) << 2K - 1
kmer = zero(UInt)
Expand Down
2 changes: 1 addition & 1 deletion src/counting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ function count_kmers!(
kmer = zero(Int)
for (i, m) in enumerate(sequence)
kmer = kmer * N % mask + m
K <= i && (kmer_array.values[kmer + 1] += 1)
K <= i && (kmer_array[kmer] += 1)
end
return kmer_array
end
Expand Down
28 changes: 17 additions & 11 deletions src/kmer-array.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import StaticArraysCore: StaticArray
import OffsetArrays: OffsetArray

sizetuple(N::Int, K::Int) = NTuple{K, Int}(N for _ in 1:K)
hypercubify(A::AbstractArray, N::Int, K::Int) = reshape(A, sizetuple(N, K))
ktuple(N::T, K::Int) where T = NTuple{K, T}(N for _ in 1:K)
offset_axes(N::Int, K::Int, offset::Int=-1) = ktuple(1+offset:N+offset, K)
hypercubify(A::AbstractArray, N::Int, K::Int) = reshape(A, ktuple(N, K))

"""
KmerArray{N, K, T <: Real, A <: AbstractArray{T, K}} <: StaticArray{NTuple{K, N}, T, K}
Expand All @@ -12,29 +14,33 @@ hypercubify(A::AbstractArray, N::Int, K::Int) = reshape(A, sizetuple(N, K))
- `A` is the array type
"""
struct KmerArray{N, K, T <: Real, A <: AbstractArray{T, K}} <: StaticArray{NTuple{K, N}, T, K}
values::A
values::OffsetArray{T, K, A}

function KmerArray{N, K, T, A}(values::A) where {N, K, T <: Real, A <: AbstractArray{T, K}}
all(isequal(N), sizetuple(N, K)) || throw(ArgumentError("all dimensions of `values` must be equal to `N`"))
size(values) == sizetuple(N, K) || throw(ArgumentError("size(values) must be $(sizetuple(N, K))"))
function KmerArray{N, K, T, A}(values::OffsetArray{T, K, A}) where {N, K, T <: Real, A <: AbstractArray{T, K}}
all(isequal(N), ktuple(N, K)) || throw(ArgumentError("all dimensions of `values` must be equal to `N`"))
size(values) == ktuple(N, K) || throw(ArgumentError("size(values) must be $(ktuple(N, K))"))
return new{N, K, T, A}(values)
end
end

KmerArray{N, K, T, A}(values::A) where {N, K, T <: Real, A <: AbstractArray{T, K}} = KmerArray{N, K, T, A}(OffsetArray(values, offset_axes(N, K)))

KmerArray{N, K}(values::A) where {N, K, T <: Real, A <: AbstractArray{T, K}} = KmerArray{N, K, T, A}(values)
KmerArray{N, K}(values::AbstractArray) where {N, K} = KmerArray{N, K}(hypercubify(values, N, K))

KmerArray(values::AbstractArray) = KmerArray{size(values, 1), ndims(values)}(values)
KmerArray(N::Int, K::Int, T::Type{<:Real}=Int, zeros::Function=zeros) = KmerArray{N, K}(zeros(T, sizetuple(N, K)))
KmerArray(N::Int, K::Int, T::Type{<:Real}=Int, zeros::Function=zeros) = KmerArray{N, K}(zeros(T, ktuple(N, K)))

Base.size(ka::KmerArray) = size(ka.values)
Base.length(ka::KmerArray) = length(ka.values)

Base.getindex(ka::KmerArray, i::Vararg{Int, N}) where N = ka.values[i...]
Base.getindex(ka::KmerArray, i) = ka.values[i]
Base.axes(::KmerArray{N, K}) where {N, K} = offset_axes(N, K)

Base.getindex(ka::KmerArray, i::Vararg{<:Integer, N}) where N = ka.values[i...]
Base.getindex(ka::KmerArray, i::Integer) = ka.values.parent[i+1]

Base.setindex!(ka::KmerArray, v, i::Vararg{Int, N}) where N = (ka.values[i...] = v)
Base.setindex!(ka::KmerArray, v, i) = (ka.values[i] = v)
Base.setindex!(ka::KmerArray, v, i::Vararg{<:Integer, N}) where N = (ka.values[i...] = v)
Base.setindex!(ka::KmerArray, v, i::Integer) = (ka.values.parent[i+1] = v)

Base.show(io::IO, ka::KmerArray) = print(io, "$(typeof(ka)) with size $(size(ka.values))")
Base.show(io::IO, ::MIME"text/plain", ka::KmerArray) = show(io, ka)
Expand Down
4 changes: 2 additions & 2 deletions test/counting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
@testset "count_kmers!" begin
kv = KmerArray(4, 1)
@test count_kmers!(kv, [2, 0, 3, 3, 0, 1, 0]) == kv
@test kv.values == [3, 1, 1, 2]
@test kv == KmerArray([3, 1, 1, 2])
end

@testset "count_kmers" begin
@test count_kmers([2, 0, 3, 3, 0, 1, 0], 4, 1) == KmerArray{4, 1}([3, 1, 1, 2])
@test count_kmers([2, 0, 3, 3, 0, 1, 0], 4, 1) == KmerArray([3, 1, 1, 2])
end

@test_throws ErrorException VectorizedKmers.alphabet_size(String)
Expand Down
16 changes: 8 additions & 8 deletions test/ext/BioSequencesExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,21 @@ using BioSequences
@testset "count_kmers" begin

@testset "Single sequences" begin
@test count_kmers(dna"ACGT", 1).values == [1, 1, 1, 1]
@test count_kmers(rna"ACGU", 1).values == [1, 1, 1, 1]
@test count_kmers(view(dna"ACGT", 1:3), 1).values == [1, 1, 1, 0]
@test count_kmers(view(rna"ACGU", 2:4), 1).values == [0, 1, 1, 1]
@test count_kmers(dna"ACGT", 1) == KmerArray([1, 1, 1, 1])
@test count_kmers(rna"ACGU", 1) == KmerArray([1, 1, 1, 1])
@test count_kmers(view(dna"ACGT", 1:3), 1) == KmerArray([1, 1, 1, 0])
@test count_kmers(view(rna"ACGU", 2:4), 1) == KmerArray([0, 1, 1, 1])
@test count_kmers(dna"ACGT", 1) == count_kmers(dna"ACGT", 1, UInt)
@test count_kmers(dna"ACGT", 1) == count_kmers(LongDNA{2}(dna"ACGT"), 1)

@test count_kmers(view(LongDNA{2}(dna"A"^32*dna"T"), 33:33), 1) == [0, 0, 0, 1]
@test count_kmers(view(dna"A"^16*dna"T", 17:17), 1) == [0, 0, 0, 1]
@test count_kmers(view(LongDNA{2}(dna"A"^32*dna"T"), 33:33), 1) == KmerArray([0, 0, 0, 1])
@test count_kmers(view(dna"A"^16*dna"T", 17:17), 1) == KmerArray([0, 0, 0, 1])

kv = KmerArray(4, 1)
@test count_kmers!(kv, dna"ACGT").values == [1, 1, 1, 1]
@test count_kmers!(kv, dna"ACGT") == KmerArray([1, 1, 1, 1])

kv = KmerArray(4, 2)
@test count_kmers!(kv, dna"ACGT").values == [0; 1; 0; 0;; 0; 0; 1; 0;; 0; 0; 0; 1;; 0; 0; 0; 0]
@test count_kmers!(kv, dna"ACGT") == KmerArray([0; 1; 0; 0;; 0; 0; 1; 0;; 0; 0; 0; 1;; 0; 0; 0; 0])

seq = randdnaseq(50)
@test all([count_kmers(seq[i:j], 3) == count_kmers(view(seq, i:j), 3) for i in 1:50, j in 1:50])
Expand Down
14 changes: 7 additions & 7 deletions test/kmer-array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@
@test ka == KmerArray([2, 1, 1, 2])
@test size(ka) == (4,)
@test length(ka) == 4
@test ka[1] == 2
ka[1] = 3
@test ka[1] == 3
@test ka[0] == 2
ka[0] = 3
@test ka[0] == 3
@test repr(ka) == "KmerArray{4, 1, Int64, Vector{Int64}} with size (4,)"

ka = KmerArray(4, 2)
ka[1] = 3
@test ka[1] == 3
ka[1, 2] = 4
@test ka[1, 2] == 4
ka[0] = 3
@test ka[0] == 3
ka[0, 1] = 4
@test ka[0, 1] == 4

@test repr(MIME("text/plain"), ka) == repr(ka)
end
Expand Down

0 comments on commit 087e935

Please sign in to comment.