Skip to content

Commit

Permalink
Merge pull request #5 from MurrellGroup/kdtree
Browse files Browse the repository at this point in the history
Improve time complexity with KDTree
  • Loading branch information
AntonOresten authored Nov 7, 2024
2 parents ed21b41 + a545ba0 commit a0bae1f
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 34 deletions.
8 changes: 7 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
name = "AssigningSecondaryStructure"
uuid = "8ed43e74-60fb-4e11-99b9-91deed37aef7"
authors = ["Anton Oresten <anton.oresten42@gmail.com>"]
version = "0.5.0"
version = "0.5.1"

[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"

[compat]
LinearAlgebra = "1"
NearestNeighbors = "0.4.20"
SparseArrays = "1.11.0"
StaticArrays = "1.9.8"
julia = "1"

[extras]
Expand Down
62 changes: 43 additions & 19 deletions src/assign.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,44 +5,68 @@ using LinearAlgebra: diag
# 5-turn >>555<<
# MINIMAL X X X
# LONGER GGG HHHH IIIII
function get_helices(Hbonds::AbstractMatrix{Bool})
turn3 = [diag(Hbonds, 3) .> 0; falses(3)]
turn4 = [diag(Hbonds, 4) .> 0; falses(4)]
turn5 = [diag(Hbonds, 5) .> 0; falses(5)]
function get_helices(Hbonds::SparseMatrixCSC{Bool, Int})
n = size(Hbonds, 1)

# "Minimal" helices: the previous and current
# residue is bonding to a residue n steps ahead respectively
# Extract diagonals from sparse matrix
turn3 = [Hbonds[i, i+3] for i in 1:(n - 3)]
turn3 = vcat(turn3, falses(3))

turn4 = [Hbonds[i, i+4] for i in 1:(n - 4)]
turn4 = vcat(turn4, falses(4))

turn5 = [Hbonds[i, i+5] for i in 1:(n - 5)]
turn5 = vcat(turn5, falses(5))

# "Minimal" helices
h3 = [get(turn3, i-1, false) & turn3[i] for i in eachindex(turn3)]
h4 = [get(turn4, i-1, false) & turn4[i] for i in eachindex(turn4)]
h5 = [get(turn5, i-1, false) & turn5[i] for i in eachindex(turn5)]

# Longer helices: smearing out the minimal helix
# residues to the residues they bond to
helix4 = reduce(.|, circshift(h4, i) for i in 0:3)
# Longer helices
helix4 = reduce(.|, (circshift(h4, i) for i in 0:3))

mask = .!(helix4 .| circshift(helix4, 1))
h3 = h3 .& mask
h5 = h5 .& mask

helix3 = reduce(.|, circshift(h3, i) for i in 0:2)
helix5 = reduce(.|, circshift(h5, i) for i in 0:4)
helix3 = reduce(.|, (circshift(h3, i) for i in 0:2))
helix5 = reduce(.|, (circshift(h5, i) for i in 0:4))
helix = helix3 .| helix4 .| helix5

return helix |> collect
end

function get_bridges(Hbonds::AbstractMatrix{Bool})
Parallel_Bridge = falses(size(Hbonds))
Antiparallel_Bridge = falses(size(Hbonds))
for j in 2:size(Hbonds, 2)-1, i in 2:size(Hbonds, 1)-1
Parallel_Bridge[i,j] = (Hbonds[i-1,j] & Hbonds[j,i+1]) |
(Hbonds[j-1,i] & Hbonds[i,j+1])
Antiparallel_Bridge[i,j] = (Hbonds[i,j] & Hbonds[j,i]) |
(Hbonds[i-1,j+1] & Hbonds[j-1,i+1])
function get_bridges(Hbonds::SparseMatrixCSC{Bool, Int})
n = size(Hbonds, 1)
Parallel_Bridge = spzeros(Bool, n, n)
Antiparallel_Bridge = spzeros(Bool, n, n)

is, js, vals = findnz(Hbonds)

# Iterate over non-zero entries in Hbonds
for (i0, j0) in zip(is, js)

for (i, j) in Iterators.product(i0-1:i0+1, j0-1:j0+1)

1 < i < n && 1 < j < n || continue

# Parallel bridges
if (Hbonds[i-1, j] && Hbonds[j, i+1]) || (Hbonds[j-1, i] && Hbonds[i, j+1])
Parallel_Bridge[i, j] = true
end

# Antiparallel bridges
if (Hbonds[i, j] && Hbonds[j, i]) || (Hbonds[i-1, j+1] && Hbonds[j-1, i+1])
Antiparallel_Bridge[i, j] = true
end
end
end

return Parallel_Bridge, Antiparallel_Bridge
end


function get_strands(Hbonds::AbstractMatrix{Bool})
Parallel_Bridge, Antiparallel_Bridge = get_bridges(Hbonds)
return mapslices(any, Parallel_Bridge .| Antiparallel_Bridge, dims=1) |> vec |> collect
Expand Down
49 changes: 35 additions & 14 deletions src/hydrogen_bonds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,25 +29,46 @@ function get_hydrogen_positions(coords::Array{T,3}) where T<:Real
return [fill(T(NaN), 3) H_pos]
end

# i:C=O, j:N-H
using NearestNeighbors
using SparseArrays
using StaticArrays

function get_hbond_map(coords::Array{T,3}, cutoff::Real=CUTOFF) where T<:Real
C_pos = coords[:, 3, :]
C_pos = @views coords[:, 3, :]
O_pos = get_oxygen_positions(coords)
N_pos = coords[:, 1, :]
N_pos = @views coords[:, 1, :]
H_pos = get_hydrogen_positions(coords)

ON_dist = col_norms(O_pos .- reshape(N_pos, 3, 1, :))
CH_dist = col_norms(C_pos .- reshape(H_pos, 3, 1, :))
OH_dist = col_norms(O_pos .- reshape(H_pos, 3, 1, :))
CN_dist = col_norms(C_pos .- reshape(N_pos, 3, 1, :))
num_residues = size(coords, 3)
Hbonds = spzeros(Bool, num_residues, num_residues)

# Build KD-tree for N positions
N_tree = KDTree(N_pos)

E = T(Q1Q2 * F) * (1 ./ ON_dist + 1 ./ CH_dist - 1 ./ OH_dist - 1 ./ CN_dist)
E = dropdims(E, dims=1)
C_pos_static = SVector{3, T}.(eachslice(C_pos, dims=2))
O_pos_static = SVector{3, T}.(eachslice(O_pos, dims=2))
N_pos_static = SVector{3, T}.(eachslice(N_pos, dims=2))
H_pos_static = SVector{3, T}.(eachslice(H_pos, dims=2))

Hbonds = E .< cutoff
Hbonds[diagind(Hbonds, 0)] .= false
Hbonds[diagind(Hbonds, 1)] .= false
Hbonds[diagind(Hbonds, 2)] .= false
# For each O_i, find N_j within cutoff
for i in 1:num_residues
O_i = O_pos_static[i]
idxs = inrange(N_tree, O_i, 5.0)
for j in idxs
# Exclude self and neighboring residues
if abs(i - j) > 2 # Exclude i == j, i+1, i+2
# Compute E[i,j] as before
ON_dist = norm(O_pos_static[i] - N_pos_static[j])
CH_dist = norm(C_pos_static[i] - H_pos_static[j])
OH_dist = norm(O_pos_static[i] - H_pos_static[j])
CN_dist = norm(C_pos_static[i] - N_pos_static[j])
E_ij = T(Q1Q2 * F) * (1 / ON_dist + 1 / CH_dist - 1 / OH_dist - 1 / CN_dist)
if E_ij < cutoff
Hbonds[i, j] = true
end
end
end
end

return Hbonds
end
end

0 comments on commit a0bae1f

Please sign in to comment.