diff --git a/Project.toml b/Project.toml index 5f2c8d4..50da567 100644 --- a/Project.toml +++ b/Project.toml @@ -1,25 +1,17 @@ name = "AssigningSecondaryStructure" uuid = "8ed43e74-60fb-4e11-99b9-91deed37aef7" authors = ["Anton Oresten "] -version = "0.4.2" +version = "0.5.0" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -[weakdeps] -Backboner = "9ac9c2a2-1cfe-46d3-b3fd-6fa470ea56a7" - -[extensions] -BackbonerExt = "Backboner" - [compat] -Backboner = "0.11" LinearAlgebra = "1" julia = "1" [extras] -Backboner = "9ac9c2a2-1cfe-46d3-b3fd-6fa470ea56a7" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "Backboner"] +test = ["Test"] diff --git a/ext/BackbonerExt.jl b/ext/BackbonerExt.jl deleted file mode 100644 index d3b1fe7..0000000 --- a/ext/BackbonerExt.jl +++ /dev/null @@ -1,13 +0,0 @@ -module BackbonerExt - -using AssigningSecondaryStructure -import Backboner - -get_coords(chain::Backboner.Protein.Chain) = reshape(chain.backbone.coords, 3, 3, :) - -AssigningSecondaryStructure.assign_secondary_structure(chains::Vector{Backboner.Protein.Chain}) = assign_secondary_structure(get_coords.(chains)) -AssigningSecondaryStructure.assign_secondary_structure(chain::Backboner.Protein.Chain) = assign_secondary_structure([chain])[1] -AssigningSecondaryStructure.assign_secondary_structure(path::AbstractString, format) = assign_secondary_structure(Backboner.Protein.readchains(path, format)) -AssigningSecondaryStructure.assign_secondary_structure(path::AbstractString) = assign_secondary_structure(Backboner.Protein.readchains(path)) - -end \ No newline at end of file diff --git a/kd.ipynb b/kd.ipynb new file mode 100644 index 0000000..9aa49b2 --- /dev/null +++ b/kd.ipynb @@ -0,0 +1,111 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m\u001b[1m Activating\u001b[22m\u001b[39m project at `c:\\Users\\anton\\.julia\\dev\\AssigningSecondaryStructure`\n" + ] + } + ], + "source": [ + "using Pkg; Pkg.activate(\".\")" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: using LinearAlgebra.I in module Main conflicts with an existing identifier.\n" + ] + } + ], + "source": [ + "using AssigningSecondaryStructure\n", + "import AssigningSecondaryStructure as ASS\n", + "\n", + "using Backboner, NearestNeighbors\n", + "using LinearAlgebra" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10-element Vector{Bool}:\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "coords = reshape(Protein.readpdb(\"test/data/1ASS.pdb\")[1].backbone.coords, 3, 3, :)\n", + "L = size(coords, 3)\n", + "\n", + "Ca_pos = coords[:, 2, :]\n", + "\n", + "pad_pos = fill(NaN, 3)\n", + "C_pos = coords[:, 3, :]\n", + "O_pos = [ASS.get_oxygen_positions(coords) pad_pos]\n", + "N_pos = coords[:, 1, :]\n", + "H_pos = [pad_pos ASS.get_hydrogen_positions(coords)]\n", + "\n", + "kdtree = KDTree(Ca_pos, Euclidean())\n", + "\n", + "for i in axes(coords, 3)[[4]]\n", + " js, dists = knn(kdtree, view(Ca_pos, :, 1), 10, true)\n", + " Hbonds = map(js) do j\n", + " i <= j <= i+2 && return false\n", + " r_ON = norm(O_pos[:, i] - N_pos[:, j])\n", + " r_CH = norm(C_pos[:, i] - H_pos[:, j])\n", + " r_OH = norm(O_pos[:, i] - H_pos[:, j])\n", + " r_CN = norm(C_pos[:, i] - N_pos[:, j])\n", + " E = ASS.Q1Q2 * ASS.F * (1/r_ON + 1/r_CH - 1/r_OH - 1/r_CN)\n", + " return E < ASS.CUTOFF\n", + " end\n", + " display(Hbonds)\n", + "end" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 1.10.0", + "language": "julia", + "name": "julia-1.10" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/AssigningSecondaryStructure.jl b/src/AssigningSecondaryStructure.jl index 5292596..3fa0e7a 100644 --- a/src/AssigningSecondaryStructure.jl +++ b/src/AssigningSecondaryStructure.jl @@ -1,7 +1,8 @@ module AssigningSecondaryStructure -include("hbonds.jl") -include("dssp.jl") +export assign_secondary_structure!, assign_secondary_structure + +include("hydrogen_bonds.jl") include("assign.jl") end diff --git a/src/assign.jl b/src/assign.jl index 92fda72..06d185e 100644 --- a/src/assign.jl +++ b/src/assign.jl @@ -1,23 +1,79 @@ -export assign_secondary_structure!, assign_secondary_structure +using LinearAlgebra: diag -function assign_secondary_structure! end +# 3-turn >>3<< +# 4-turn >>44<< +# 5-turn >>555<< +# MINIMAL X X X +# LONGER GGG HHHH IIIII +function get_helices(Hbonds::AbstractMatrix{Bool}) + turn3 = [diag(Hbonds, 3) .> 0; falses(3)] + turn4 = [diag(Hbonds, 4) .> 0; falses(4)] + turn5 = [diag(Hbonds, 5) .> 0; falses(5)] + + # "Minimal" helices: the previous and current + # residue is bonding to a residue n steps ahead respectively + h3 = [get(turn3, i-1, false) & turn3[i] for i in eachindex(turn3)] + h4 = [get(turn4, i-1, false) & turn4[i] for i in eachindex(turn4)] + h5 = [get(turn5, i-1, false) & turn5[i] for i in eachindex(turn5)] + + # Longer helices: smearing out the minimal helix + # residues to the residues they bond to + helix4 = reduce(.|, circshift(h4, i) for i in 0:3) + + mask = .!(helix4 .| circshift(helix4, 1)) + h3 = h3 .& mask + h5 = h5 .& mask + + helix3 = reduce(.|, circshift(h3, i) for i in 0:2) + helix5 = reduce(.|, circshift(h5, i) for i in 0:4) + helix = helix3 .| helix4 .| helix5 + + return helix |> collect +end + +function get_bridges(Hbonds::AbstractMatrix{Bool}) + Parallel_Bridge = falses(size(Hbonds)) + Antiparallel_Bridge = falses(size(Hbonds)) + for j in 2:size(Hbonds, 2)-1, i in 2:size(Hbonds, 1)-1 + Parallel_Bridge[i,j] = (Hbonds[i-1,j] & Hbonds[j,i+1]) | + (Hbonds[j-1,i] & Hbonds[i,j+1]) + Antiparallel_Bridge[i,j] = (Hbonds[i,j] & Hbonds[j,i]) | + (Hbonds[i-1,j+1] & Hbonds[j-1,i+1]) + end + return Parallel_Bridge, Antiparallel_Bridge +end + +function get_strands(Hbonds::AbstractMatrix{Bool}) + Parallel_Bridge, Antiparallel_Bridge = get_bridges(Hbonds) + return mapslices(any, Parallel_Bridge .| Antiparallel_Bridge, dims=1) |> vec |> collect +end """ + assign_secondary_structure(chain_backbone::Array{T,3}}) where T<:Real assign_secondary_structure(chain_backbones::Vector{Array{T,3}}) where T<:Real Given a vector of chain backbones, each represented as a 3-dimensional array of size 3x3xL, this function assigns the secondary structure to each residue. Returns a vector of vectors of integers, each of which is the secondary structure assignment -for the corresponding chain and their respective residues. The integers are encoded as follows: -- `1`: loop; neither helix nor strand. -- `2`: helix; α, 3_10, or π. -- `3`: strand; part of a β-sheet. +for the corresponding chain and their respective residues. The secondary structure encoded as follows: +- `1`: loop (neither helix nor strand) +- `2`: helix; α, 3_10, or π +- `3`: strand; part of a β-sheet """ -function assign_secondary_structure(chain_backbones::Vector{Array{T,3}}) where T<:Real - concatenated_backbone = cat(chain_backbones..., dims=3) - secondary_structure = dssp(convert(Array{Float64}, concatenated_backbone)) - lengths = size.(chain_backbones, 3) - chain_ends = [0; cumsum(lengths)] - secondary_structure_by_chain = [secondary_structure[i+1:j] for (i,j) in zip(chain_ends, chain_ends[2:end])] - return secondary_structure_by_chain +function assign_secondary_structure(coords::Array{T,3}) where T <: Real + size(coords)[1:2] == (3, 3) || throw(DimensionMismatch("Expected 3x3xL array, got $(size(coords))")) + size(coords, 3) < 5 && return ones(Int, size(coords, 3)) + Hbonds = get_hbond_map(coords) + + helix = get_helices(Hbonds) + strand = get_strands(Hbonds) + loop = .!(helix .| strand) + + # 1 for helix, 2 for strand, 3 for loop + secondary_structure = vec(mapslices(findfirst, [loop helix strand], dims=2)) + return secondary_structure end + +assign_secondary_structure(chain_backbones::Vector{<:Array{<:Real,3}}) = assign_secondary_structure.(chain_backbones) + +function assign_secondary_structure! end \ No newline at end of file diff --git a/src/dssp.jl b/src/dssp.jl deleted file mode 100644 index 96f6048..0000000 --- a/src/dssp.jl +++ /dev/null @@ -1,63 +0,0 @@ -using LinearAlgebra: diag - -# 3-turn >>3<< -# 4-turn >>44<< -# 5-turn >>555<< -# MINIMAL X X X -# LONGER GGG HHHH IIIII -function get_helices(Hbonds::AbstractMatrix{Bool}) - turn3 = [diag(Hbonds, 3) .> 0; falses(3)] - turn4 = [diag(Hbonds, 4) .> 0; falses(4)] - turn5 = [diag(Hbonds, 5) .> 0; falses(5)] - - # "Minimal" helices: the previous and current - # residue is bonding to a residue n steps ahead respectively - h3 = [get(turn3, i-1, false) & turn3[i] for i in eachindex(turn3)] - h4 = [get(turn4, i-1, false) & turn4[i] for i in eachindex(turn4)] - h5 = [get(turn5, i-1, false) & turn5[i] for i in eachindex(turn5)] - - # Longer helices: smearing out the minimal helix - # residues to the residues they bond to - helix4 = reduce(.|, circshift(h4, i) for i in 0:3) - - mask = .!(helix4 .| circshift(helix4, 1)) - h3 = h3 .& mask - h5 = h5 .& mask - - helix3 = reduce(.|, circshift(h3, i) for i in 0:2) - helix5 = reduce(.|, circshift(h5, i) for i in 0:4) - helix = helix3 .| helix4 .| helix5 - - return helix |> collect -end - -function get_bridges(Hbonds::AbstractMatrix{Bool}) - Parallel_Bridge = falses(size(Hbonds)) - Antiparallel_Bridge = falses(size(Hbonds)) - for j in 2:size(Hbonds, 2)-1, i in 2:size(Hbonds, 1)-1 - Parallel_Bridge[i,j] = (Hbonds[i-1,j] & Hbonds[j,i+1]) | - (Hbonds[j-1,i] & Hbonds[i,j+1]) - Antiparallel_Bridge[i,j] = (Hbonds[i,j] & Hbonds[j,i]) | - (Hbonds[i-1,j+1] & Hbonds[j-1,i+1]) - end - return Parallel_Bridge, Antiparallel_Bridge -end - -function get_strands(Hbonds::AbstractMatrix{Bool}) - Parallel_Bridge, Antiparallel_Bridge = get_bridges(Hbonds) - return mapslices(any, Parallel_Bridge .| Antiparallel_Bridge, dims=1) |> vec |> collect -end - -function dssp(coords::Array{T,3}) where T <: Real - size(coords)[1:2] == (3, 3) || throw(DimensionMismatch("Expected 3x3xL array, got $(size(coords))")) - size(coords, 3) < 5 && return ones(Int, size(coords, 3)) - Hbonds = get_Hbonds(coords) - - helix = get_helices(Hbonds) - strand = get_strands(Hbonds) - loop = .!(helix .| strand) - - # 1 for helix, 2 for strand, 3 for loop - secondary_structure = vec(mapslices(findfirst, [loop helix strand], dims=2)) - return secondary_structure -end \ No newline at end of file diff --git a/src/hbonds.jl b/src/hydrogen_bonds.jl similarity index 95% rename from src/hbonds.jl rename to src/hydrogen_bonds.jl index f8012ae..1325cea 100644 --- a/src/hbonds.jl +++ b/src/hydrogen_bonds.jl @@ -30,7 +30,7 @@ function get_hydrogen_positions(coords::Array{T,3}) where T<:Real end # i:C=O, j:N-H -function get_Hbonds(coords::Array{T,3}, cutoff::Real=CUTOFF) where T<:Real +function get_hbond_map(coords::Array{T,3}, cutoff::Real=CUTOFF) where T<:Real C_pos = coords[:, 3, :] O_pos = get_oxygen_positions(coords) N_pos = coords[:, 1, :]