diff --git a/Project.toml b/Project.toml index 8ee60f7..d84e5bd 100644 --- a/Project.toml +++ b/Project.toml @@ -1,35 +1,25 @@ name = "Backboner" uuid = "9ac9c2a2-1cfe-46d3-b3fd-6fa470ea56a7" authors = ["Anton Oresten "] -version = "0.11.4" +version = "0.12.0" [deps] -BioStructures = "de9282ab-8554-53be-b2d6-f6c222edabfc" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Rotations = "6038ab10-8711-5258-84ad-4b1120ba62dc" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" -[weakdeps] -Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" - -[extensions] -ZygoteIdealizationExt = ["Zygote"] - [compat] -BioStructures = "4" LinearAlgebra = "1" NNlib = "0.9" PrecompileTools = "1" Rotations = "1" StaticArrays = "1" -Zygote = "0.6" julia = "1" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Test", "Zygote"] +test = ["Test"] diff --git a/README.md b/README.md index 0aa0057..39692a1 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,10 @@ This package offers types and functions for working with molecular *backbones*, Backbones can be represented with different types: - `Backbone`: a type containing a 3xN matrix of coordinates -- `ChainedBonds`: a type that holds vectors of bond lengths, bond angles, and dihedral angles +- `ChainedBonds`: a type that holds vectors of bond lengths, bond angles, and torsion angles - `Frames`: a collection of rotations and translations (e.g. for representing orientations and locations of protein residues) -The `Protein` submodule contains utilities for working specifically with proteins. A protein can be loaded from a PDB file using the `Backboner.Protein.readpdb` function, which returns a `Vector{Backboner.Protein.Chain}`. Conversely, a `Vector{Backboner.Protein.Chain}` instance can be written to a PDB file using the `writepdb` function. +Most functions are implemented especially with differentiability in mind, and can be used in combination with automatic differentiation packages like [Zygote.jl](https://github.com/FluxML/Zygote.jl). ## Installation @@ -26,48 +26,25 @@ add Backboner ## Example usage ```julia -julia> using Backboner, Backboner.Protein - -julia> chains = readpdb("test/data/1ZAK.pdb") -2-element Vector{Chain}: - Chain A with 220 residues - Chain B with 220 residues - -julia> backbone = chains[1].backbone -660-element Backbone{Float64, Matrix{Float64}}: - [22.346, 17.547, 23.294] - [22.901, 18.031, 21.993] - [23.227, 16.793, 21.163] - [24.115, 16.923, 20.175] - [24.478, 15.779, 19.336] - ⋮ - [21.480, 14.668, 4.974] - [22.041, 14.866, 3.569] - [21.808, 13.861, 2.734] - [22.263, 13.862, 1.355] - [21.085, 14.233, 0.446] +julia> using Backboner + +julia> backbone = Backbone(rand(Float32, 3, 9)) +10-element Backbone{Float32, Matrix{Float32}}: + [0.48967552, 0.91008425, 0.5339774] + [0.2951318, 0.38963223, 0.8952989] + [0.83763623, 0.5279301, 0.3407849] + [0.88848716, 0.643387, 0.76827604] + [0.697279, 0.63588345, 0.0889622] + [0.08590478, 0.6086006, 0.6478121] + [0.06308746, 0.6704904, 0.55852276] + [0.46147835, 0.56259614, 0.7884294] + [0.9694153, 0.052023113, 0.08127427] julia> ChainedBonds(backbone) -ChainedBonds{Float64, Vector{Float64}} with 659 bonds, 658 angles, and 657 dihedrals +ChainedBonds{Float32, Vector{Float32}} with 8 bond lengths, 7 bond angles, and 6 torsion angles julia> is_knotted(backbone) false - -julia> import Zygote # unlock the `idealize` method for backbones - -julia> idealize(backbone, Float64[1.46, 1.52, 1.33], Float64[1.94, 2.04, 2.13]) -660-element Backbone{Float64, Matrix{Float64}}: - [22.348574, 17.582397, 23.289886] - [22.90583, 17.977451, 21.999538] - [23.216103, 16.762234, 21.140835] - [24.204561, 16.88515, 20.259508] - [24.52946, 15.827013, 19.307465] - ⋮ - [21.501173, 14.705252, 4.9825864] - [22.007494, 14.864742, 3.5582967] - [21.822643, 13.836198, 2.7356021] - [22.24875, 13.874594, 1.3396943] - [21.091076, 14.233609, 0.42247167] ``` [^1]: In some contexts, the term *backbone* may be used more loosely, and allow for atoms that are not part of the main continuous chain of atoms. This package does not support storing e.g. oxygen and beta-carbon atoms in the matrix of coordinates, as they are not part of the continuous chain of atoms. diff --git a/docs/make.jl b/docs/make.jl index f4baae6..e6ea6f9 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -10,8 +10,7 @@ makedocs(; doctest = false, pages = [ "Overview" => "index.md", - "Backboner API" => "backboner.md", - "Protein API" => "protein.md", + "API Reference" => "API.md", ], authors = "Anton Oresten", checkdocs = :all, diff --git a/docs/src/backboner.md b/docs/src/API.md similarity index 100% rename from docs/src/backboner.md rename to docs/src/API.md diff --git a/docs/src/index.md b/docs/src/index.md index 3497a16..761af8b 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -13,12 +13,14 @@ end [![Build Status](https://github.com/MurrellGroup/Backboner.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/MurrellGroup/Backboner.jl/actions/workflows/CI.yml?query=branch%3Amain) [![Coverage](https://codecov.io/gh/MurrellGroup/Backboner.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/MurrellGroup/Backboner.jl) -Backboner is a Julia package that offers a set of types and functions for working with molecular *backbones*: defined here as continuous chains of bonded atoms.[^1] The package provides a few different types for representing backbones: +This package offers types and functions for working with molecular *backbones*, defined here as continuous chains of bonded atoms.[^1] + +Backbones can be represented with different types: - `Backbone`: a type containing a 3xN matrix of coordinates -- `ChainedBonds`: a type that holds vectors of bond lengths, bond angles, and dihedral angles +- `ChainedBonds`: a type that holds vectors of bond lengths, bond angles, and torsion angles - `Frames`: a collection of rotations and translations (e.g. for representing orientations and locations of protein residues) -The `Protein` submodule contains functions and types for working specifically with proteins. A protein can be loaded from a PDB file using the `Backboner.Protein.readpdb` function, which returns a `Vector{Backboner.Protein.Chain}`. Conversely, a `Vector{Backboner.Protein.Chain}` instance can be written to a PDB file using the `writepdb` function. +Most functions are implemented especially with differentiability in mind, and can be used in combination with automatic differentiation packages like [Zygote.jl](https://github.com/FluxML/Zygote.jl). [View the source code on GitHub](https://github.com/MurrellGroup/Backboner.jl) (licensed under MIT). @@ -33,48 +35,25 @@ add Backboner ## Example usage ```julia -julia> using Backboner, Backboner.Protein - -julia> chains = readpdb("test/data/1ZAK.pdb") -2-element Vector{Chain}: - Chain A with 220 residues - Chain B with 220 residues - -julia> backbone = chains[1].backbone -660-element Backbone{Float64, Matrix{Float64}}: - [22.346, 17.547, 23.294] - [22.901, 18.031, 21.993] - [23.227, 16.793, 21.163] - [24.115, 16.923, 20.175] - [24.478, 15.779, 19.336] - ⋮ - [21.48, 14.668, 4.974] - [22.041, 14.866, 3.569] - [21.808, 13.861, 2.734] - [22.263, 13.862, 1.355] - [21.085, 14.233, 0.446] +julia> using Backboner + +julia> backbone = Backbone(rand(Float32, 3, 9)) +10-element Backbone{Float32, Matrix{Float32}}: + [0.48967552, 0.91008425, 0.5339774] + [0.2951318, 0.38963223, 0.8952989] + [0.83763623, 0.5279301, 0.3407849] + [0.88848716, 0.643387, 0.76827604] + [0.697279, 0.63588345, 0.0889622] + [0.08590478, 0.6086006, 0.6478121] + [0.06308746, 0.6704904, 0.55852276] + [0.46147835, 0.56259614, 0.7884294] + [0.9694153, 0.052023113, 0.08127427] julia> ChainedBonds(backbone) -ChainedBonds{Float64, Vector{Float64}} with 659 bonds, 658 angles, and 657 dihedrals +ChainedBonds{Float32, Vector{Float32}} with 8 bond lengths, 7 bond angles, and 6 torsion angles julia> is_knotted(backbone) false - -julia> import Zygote # unlock the `idealize` method for backbones - -julia> idealize(backbone, Float64[1.46, 1.52, 1.33], Float64[1.94, 2.04, 2.13]) -660-element Backbone{Float64, Matrix{Float64}}: - [22.348574, 17.582397, 23.289886] - [22.90583, 17.977451, 21.999538] - [23.216103, 16.762234, 21.140835] - [24.204561, 16.88515, 20.259508] - [24.52946, 15.827013, 19.307465] - ⋮ - [21.501173, 14.705252, 4.9825864] - [22.007494, 14.864742, 3.5582967] - [21.822643, 13.836198, 2.7356021] - [22.24875, 13.874594, 1.3396943] - [21.091076, 14.233609, 0.42247167] ``` [^1]: In some contexts, the term *backbone* may be used more loosely, and allow for atoms that are not part of the main continuous chain of atoms. This package does not support storing e.g. oxygen and beta-carbon atoms in the matrix of coordinates, as they are not part of the continuous chain of atoms. diff --git a/docs/src/protein.md b/docs/src/protein.md deleted file mode 100644 index 7a5b1be..0000000 --- a/docs/src/protein.md +++ /dev/null @@ -1,7 +0,0 @@ -# Protein API - -These following functions need to be imported explicitly with `using Backboner.Protein` - -```@autodocs -Modules = [Backboner.Protein] -``` \ No newline at end of file diff --git a/ext/ZygoteIdealizationExt.jl b/ext/ZygoteIdealizationExt.jl deleted file mode 100644 index 46797f5..0000000 --- a/ext/ZygoteIdealizationExt.jl +++ /dev/null @@ -1,95 +0,0 @@ -module ZygoteIdealizationExt - -using Backboner - -import Zygote: Params, withgradient - -col_lengths(m::Matrix{<:Real}) = vec(sqrt.(sum(abs2.(m), dims=1))) - -function length_loss( - m::Matrix{T}, ideal::Vector{T}; - mask::Vector{Bool} = ones(Bool, size(m, 2)-1), -) where T <: Real - lengths = col_lengths(m[:, 2:end] .- m[:, 1:end-1]) - return sum(((lengths .- ideal).^2) .* mask) -end - -function col_angles( - ab::Matrix{T}, cb::Matrix{T}, -) where T <: Real - dots = sum(ab .* cb, dims=1) - ab_norms = sqrt.(sum(ab.^2, dims=1)) - cb_norms = sqrt.(sum(cb.^2, dims=1)) - angles = acos.(clamp.(dots ./ (ab_norms .* cb_norms), T(-1.0), T(1.0))) - return vec(angles) -end - -function angle_loss( - m::Matrix{T}, ideal::Vector{T}; - mask::Vector{Bool} = ones(Bool, size(a, 2)-2), -) where T <: Real - a = m[:, 1:end-2] - b = m[:, 2:end-1] - c = m[:, 3:end] - angles = col_angles(a .- b, c .- b) - return sum(((sin.(angles) .- sin.(ideal)).^2 .+ (cos.(angles) .- cos.(ideal)).^2) .* mask) -end - -function ideal_loss( - coords::Matrix{T}, - offsets::Matrix{T}, - lengths::Vector{T}, - angles::Vector{T}, - length_mask::Vector{Bool} = ones(Bool, size(original_xyz, 2)-1), -) where T <: Real - angle_mask = Vector(length_mask[2:end] .& length_mask[1:end-1]) - offset_coords = coords .+ offsets - offset_loss = sum(abs2.(offsets)) - l_loss = length_loss(offset_coords, lengths, mask = length_mask) - a_loss = angle_loss(offset_coords, angles, mask = angle_mask) - return offset_loss, l_loss, a_loss -end - - -""" - idealize( - backbone::Backbone{T}, - ideal_lengths::Vector{T}, - ideal_angles::Vector{T}; - mask_tolerance = 0.5, - n_iterations = 300, - ) where T <: Real - -Idealizes a `Backbone` by adjusting the coordinates to match the ideal lengths and angles. -""" -function Backboner.idealize( - backbone::Backbone{T}, - ideal_lengths::Vector{T}, - ideal_angles::Vector{T}; - mask_tolerance = 0.5, - n_iterations = 300, -) where T <: Real - bonds = ChainedBonds(backbone) - ideal_lengths = [ideal for (_, ideal) in zip(get_bond_lengths(bonds), Iterators.cycle(ideal_lengths))] - ideal_angles = [ideal for (_, ideal) in zip(get_bond_angles(bonds), Iterators.cycle(ideal_angles))] - length_mask = Vector(abs.(get_bond_lengths(bonds) .- ideal_lengths) .< mask_tolerance) - - offsets = zeros(T, size(backbone.coords)) - - function total_loss(coords, offsets, w1, w2, w3) - offset_loss, l_loss, a_loss = ideal_loss(coords, offsets, ideal_lengths, ideal_angles, length_mask) - return w1*offset_loss^3 + w2*l_loss + w3*a_loss - end - - for i in 1:n_iterations - l, g = withgradient(Params([offsets])) do - total_loss(backbone.coords, offsets, 0, 1.4, 1) * 0.13 - end - - offsets .-= g.grads[offsets] - end - - return Backbone(backbone.coords .+ offsets) -end - -end \ No newline at end of file diff --git a/src/Backboner.jl b/src/Backboner.jl index b27d18b..0692064 100644 --- a/src/Backboner.jl +++ b/src/Backboner.jl @@ -8,22 +8,16 @@ export prepend_bonds!, prepend_bonds export get_bond_vectors export get_bond_lengths export get_bond_angles -export get_torsional_angles +export get_torsion_angles export Frames export is_knotted -export idealize - -export Protein - include("backbone.jl") include("bonds.jl") include("frames.jl") include("knots.jl") -include("idealization.jl") -include("protein/protein.jl") using PrecompileTools @@ -33,8 +27,9 @@ using PrecompileTools bonds = ChainedBonds(backbone) Backbone(bonds) - frames = Frames(backbone, Protein.STANDARD_TRIANGLE_ANGSTROM) - Backbone(frames, Protein.STANDARD_TRIANGLE_ANGSTROM) + standard = randn(3,3) + frames = Frames(backbone, standard) + Backbone(frames, standard) is_knotted(backbone) end diff --git a/src/bonds.jl b/src/bonds.jl index 9331123..585c564 100644 --- a/src/bonds.jl +++ b/src/bonds.jl @@ -28,29 +28,26 @@ function batched_cross(A::AbstractMatrix{T}, B::AbstractMatrix{T}) where T <: Re return [C1 C2 C3]' end -function _get_torsional_angles(bond_vectors::AbstractMatrix{<:Real}) +function _get_torsion_angles(bond_vectors::AbstractMatrix{<:Real}) crosses = batched_cross(bond_vectors[:, begin:end-1], bond_vectors[:, begin+1:end]) normalized_crosses = normalize_slices(crosses) cross_crosses = batched_cross(normalized_crosses[:, begin:end-1], normalized_crosses[:, begin+1:end]) normalized_bond_vectors = normalize_slices(bond_vectors) sin_values = dots(cross_crosses, normalized_bond_vectors[:, begin+1:end-1]) cos_values = dots(normalized_crosses[:, begin:end-1], normalized_crosses[:, begin+1:end]) - torsional_angles = atan.(sin_values, cos_values) - return torsional_angles + torsion_angles = atan.(sin_values, cos_values) + return torsion_angles end get_bond_vectors(backbone::Backbone) = get_atom_displacements(backbone, 1, 1, 1) get_bond_lengths(backbone::Backbone) = _get_bond_lengths(get_bond_vectors(backbone)) |> vec get_bond_angles(backbone::Backbone) = _get_bond_angles(get_bond_vectors(backbone)) |> vec -get_torsional_angles(backbone::Backbone) = _get_torsional_angles(get_bond_vectors(backbone)) |> vec - -# compat -const get_dihedrals = get_torsional_angles +get_torsion_angles(backbone::Backbone) = _get_torsion_angles(get_bond_vectors(backbone)) |> vec """ ChainedBonds{T <: Real, V <: AbstractVector{T}} -A lazy way to store a backbone as a series of bond lengths, bond angles, and torsional_angles. +A lazy way to store a backbone as a series of bond lengths, bond angles, and torsion_angles. # Examples @@ -70,32 +67,32 @@ julia> backbone = Protein.readpdb("test/data/1ZAK.pdb")["A"].backbone [21.085, 14.233, 0.446] julia> bonds = ChainedBonds(backbone) -ChainedBonds{Float64, Vector{Float64}} with 659 bond lengths, 658 bond angles, and 657 torsional angles +ChainedBonds{Float64, Vector{Float64}} with 659 bond lengths, 658 bond angles, and 657 torsion angles ``` """ struct ChainedBonds{T<:Real,V<:AbstractVector{T}} bond_lengths::V bond_angles::V - torsional_angles::V + torsion_angles::V - function ChainedBonds{T,V}(bond_lengths::V, bond_angles::V, torsional_angles::V) where {T<:Real,V<:AbstractVector{T}} - length(bond_lengths) == length(bond_angles) + 1 == length(torsional_angles) + 2 || 1 >= length(bond_lengths) >= length(bond_angles) == length(torsional_angles) == 0 || - throw(ArgumentError("bond_lengths, bond_angles, and torsional_angles must have compatible lengths")) - return new{T,V}(bond_lengths, bond_angles, torsional_angles) + function ChainedBonds{T,V}(bond_lengths::V, bond_angles::V, torsion_angles::V) where {T<:Real,V<:AbstractVector{T}} + length(bond_lengths) == length(bond_angles) + 1 == length(torsion_angles) + 2 || 1 >= length(bond_lengths) >= length(bond_angles) == length(torsion_angles) == 0 || + throw(ArgumentError("bond_lengths, bond_angles, and torsion_angles must have compatible lengths")) + return new{T,V}(bond_lengths, bond_angles, torsion_angles) end end -@inline ChainedBonds{T}(bond_lengths::V, bond_angles::V, torsional_angles::V) where {T<:Real,V<:AbstractVector{T}} = ChainedBonds{T,V}(bond_lengths, bond_angles, torsional_angles) -@inline ChainedBonds(bond_lengths::V, bond_angles::V, torsional_angles::V) where {T<:Real,V<:AbstractVector{T}} = ChainedBonds{T,V}(bond_lengths, bond_angles, torsional_angles) +@inline ChainedBonds{T}(bond_lengths::V, bond_angles::V, torsion_angles::V) where {T<:Real,V<:AbstractVector{T}} = ChainedBonds{T,V}(bond_lengths, bond_angles, torsion_angles) +@inline ChainedBonds(bond_lengths::V, bond_angles::V, torsion_angles::V) where {T<:Real,V<:AbstractVector{T}} = ChainedBonds{T,V}(bond_lengths, bond_angles, torsion_angles) get_bond_lengths(bonds::ChainedBonds) = bonds.bond_lengths get_bond_angles(bonds::ChainedBonds) = bonds.bond_angles -get_torsional_angles(bonds::ChainedBonds) = bonds.torsional_angles +get_torsion_angles(bonds::ChainedBonds) = bonds.torsion_angles function Base.reverse!(bonds::ChainedBonds) reverse!(bonds.bond_lengths) reverse!(bonds.bond_angles) - reverse!(bonds.torsional_angles) + reverse!(bonds.torsion_angles) return bonds end @@ -105,14 +102,14 @@ function ChainedBonds(backbone::Backbone) bond_vectors = get_bond_vectors(backbone) bond_lengths = _get_bond_lengths(bond_vectors) |> vec bond_angles = _get_bond_angles(bond_vectors) |> vec - torsional_angles = _get_torsional_angles(bond_vectors) |> vec - return ChainedBonds(bond_lengths, bond_angles, torsional_angles) + torsion_angles = _get_torsion_angles(bond_vectors) |> vec + return ChainedBonds(bond_lengths, bond_angles, torsion_angles) end Base.length(bonds::ChainedBonds) = length(bonds.bond_lengths) function Base.show(io::IO, bonds::ChainedBonds) - print(io, "$(summary(bonds)) with $(length(bonds.bond_lengths)) bond lengths, $(length(bonds.bond_angles)) bond angles, and $(length(bonds.torsional_angles)) torsional angles") + print(io, "$(summary(bonds)) with $(length(bonds.bond_lengths)) bond lengths, $(length(bonds.bond_angles)) bond angles, and $(length(bonds.torsion_angles)) torsion angles") end @@ -132,6 +129,22 @@ function get_backbone_start(bonds::ChainedBonds{T}) where T return Backbone(coords) end +function get_next_atom(bonds, i, A, B, C) + AB, BC = B - A, C - B + n_AB, n_BC = normalize(AB), normalize(BC) + CD_init = n_BC * bonds.bond_lengths[i-1] + + N = normalize(cross(n_AB, n_BC)) # wont work if AB == BC + bond_angle_rotation = AngleAxis(π - bonds.bond_angles[i-2], N...) + CD_rot1 = bond_angle_rotation * CD_init + + dihedral_rotation = AngleAxis(bonds.torsion_angles[i-3], n_BC...) + CD_rot2 = dihedral_rotation * CD_rot1 + + D = C + CD_rot2 + return D +end + # backbone_start don't get adjusted to fit the bonds function Backbone( bonds::ChainedBonds{T}; @@ -143,69 +156,56 @@ function Backbone( @assert 3 <= l <= L coords[:, 1:l] = backbone_start.coords for i in l+1:L - A, B, C = eachcol(coords[:, i-3:i-1]) - AB, BC = B - A, C - B - n_AB, n_BC = normalize(AB), normalize(BC) - CD_init = n_BC * bonds.bond_lengths[i-1] - - N = normalize(cross(n_AB, n_BC)) # wont work if AB == BC - bond_angle_rotation = AngleAxis(π - bonds.bond_angles[i-2], N...) - CD_rot1 = bond_angle_rotation * CD_init - - dihedral_rotation = AngleAxis(bonds.torsional_angles[i-3], n_BC...) - CD_rot2 = dihedral_rotation * CD_rot1 - - D = C + CD_rot2 - coords[:, i] = D + coords[:, i] = get_next_atom(bonds, i, eachcol(coords[:, i-3:i-1])...) end return Backbone(coords) end -function append_bonds!(bonds::ChainedBonds, bond_lengths::AbstractVector{<:Real}, bond_angles::AbstractVector{<:Real}, torsional_angles::AbstractVector{<:Real}) +function append_bonds!(bonds::ChainedBonds, bond_lengths::AbstractVector{<:Real}, bond_angles::AbstractVector{<:Real}, torsion_angles::AbstractVector{<:Real}) length(bonds) >= 2 || throw(ArgumentError("bonds must have at least 2 bonds")) - length(bond_lengths) == length(bond_angles) == length(torsional_angles) || throw(ArgumentError("bond_lengths, bond_angles, and torsional_angles must have the same length")) + length(bond_lengths) == length(bond_angles) == length(torsion_angles) || throw(ArgumentError("bond_lengths, bond_angles, and torsion_angles must have the same length")) append!(bonds.bond_lengths, bond_lengths) append!(bonds.bond_angles, bond_angles) - append!(bonds.torsional_angles, torsional_angles) + append!(bonds.torsion_angles, torsion_angles) return bonds end @inline append_bonds(bonds::ChainedBonds, args...) = append_bonds!(deepcopy(bonds), args...) """ - append_bonds(backbone, bond_lengths, bond_angles, torsional_angles) + append_bonds(backbone, bond_lengths, bond_angles, torsion_angles) """ -function append_bonds(backbone::Backbone, bond_lengths::AbstractVector{<:Real}, bond_angles::AbstractVector{<:Real}, torsional_angles::AbstractVector{<:Real}) +function append_bonds(backbone::Backbone, bond_lengths::AbstractVector{<:Real}, bond_angles::AbstractVector{<:Real}, torsion_angles::AbstractVector{<:Real}) length(backbone) >= 3 || throw(ArgumentError("backbone must have at least 3 atoms")) - length(bond_lengths) == length(bond_angles) == length(torsional_angles) || throw(ArgumentError("bond_lengths, bond_angles, and torsional_angles must have the same length")) + length(bond_lengths) == length(bond_angles) == length(torsion_angles) || throw(ArgumentError("bond_lengths, bond_angles, and torsion_angles must have the same length")) last_three_atoms_backbone = backbone[end-2:end] bonds_end = ChainedBonds(last_three_atoms_backbone) - append_bonds!(bonds_end, bond_lengths, bond_angles, torsional_angles) + append_bonds!(bonds_end, bond_lengths, bond_angles, torsion_angles) backbone_end = Backbone(bonds_end, backbone_start=last_three_atoms_backbone) new_backbone = Backbone(cat(backbone.coords, backbone_end[4:end].coords, dims=2)) return new_backbone end -function prepend_bonds!(bonds::ChainedBonds, bond_lengths::AbstractVector{<:Real}, bond_angles::AbstractVector{<:Real}, torsional_angles::AbstractVector{<:Real}) +function prepend_bonds!(bonds::ChainedBonds, bond_lengths::AbstractVector{<:Real}, bond_angles::AbstractVector{<:Real}, torsion_angles::AbstractVector{<:Real}) length(bonds) >= 2 || throw(ArgumentError("bonds must have at least 2 bonds")) - length(bond_lengths) == length(bond_angles) == length(torsional_angles) || throw(ArgumentError("bond_lengths, bond_angles, and torsional_angles must have the same length")) + length(bond_lengths) == length(bond_angles) == length(torsion_angles) || throw(ArgumentError("bond_lengths, bond_angles, and torsion_angles must have the same length")) prepend!(bonds.bond_lengths, bond_lengths) prepend!(bonds.bond_angles, bond_angles) - prepend!(bonds.torsional_angles, torsional_angles) + prepend!(bonds.torsion_angles, torsion_angles) return bonds end @inline prepend_bonds(bonds::ChainedBonds, args...) = prepend_bonds!(deepcopy(bonds), args...) """ - prepend_bonds(backbone, bond_lengths, bond_angles, torsional_angles) + prepend_bonds(backbone, bond_lengths, bond_angles, torsion_angles) """ -function prepend_bonds(backbone::Backbone, bond_lengths::AbstractVector{<:Real}, bond_angles::AbstractVector{<:Real}, torsional_angles::AbstractVector{<:Real}) +function prepend_bonds(backbone::Backbone, bond_lengths::AbstractVector{<:Real}, bond_angles::AbstractVector{<:Real}, torsion_angles::AbstractVector{<:Real}) length(backbone) >= 3 || throw(ArgumentError("backbone must have at least 3 atoms")) first_three_atoms_backbone = backbone[1:3] bonds_start = ChainedBonds(first_three_atoms_backbone) - prepend_bonds!(bonds_start, bond_lengths, bond_angles, torsional_angles) + prepend_bonds!(bonds_start, bond_lengths, bond_angles, torsion_angles) reverse!(bonds_start) backbone_start = Backbone(bonds_start, backbone_start=first_three_atoms_backbone[3:-1:1]) new_backbone = Backbone(cat(backbone_start[end:-1:4].coords, backbone.coords, dims=2)) diff --git a/src/frames.jl b/src/frames.jl index 6937bc0..00f4c7f 100644 --- a/src/frames.jl +++ b/src/frames.jl @@ -1,8 +1,10 @@ using LinearAlgebra using NNlib -centroid(A::AbstractArray{<:Real}; dims=2) = sum(A; dims) ./ size(A, 2) +centroid(A::AbstractArray{<:Real}; dims=2) = sum(A; dims) ./ prod(size(A)[dims]) +#= FIXME: P currently gets aligned to Q, should be the other way around? +Fix in breaking release, and document the change. =# # TODO: batched version? possible? batched svd? function kabsch_algorithm(P::AbstractMatrix{T}, Q::AbstractMatrix{T}) where T <: Real size(P) == size(Q) || throw(ArgumentError("P and Q must have the same size")) diff --git a/src/idealization.jl b/src/idealization.jl deleted file mode 100644 index e899970..0000000 --- a/src/idealization.jl +++ /dev/null @@ -1,6 +0,0 @@ -""" -!!! note - Zygote must be imported in order to activate the `ZygoteIdealizationExt` extension, - which defines the `idealize(::Backbone)` method. -""" -function idealize end \ No newline at end of file diff --git a/src/protein/BioStructures-interface.jl b/src/protein/BioStructures-interface.jl deleted file mode 100644 index 1ef3cc9..0000000 --- a/src/protein/BioStructures-interface.jl +++ /dev/null @@ -1,185 +0,0 @@ -using BioStructures: PDBFormat, MMCIFFormat - -import BioStructures - -const ProteinFileFormat = Union{PDBFormat, MMCIFFormat} -const AMINOACIDS = Set("ACDEFGHIKLMNPQRSTVWY") -const BACKBONE_ATOM_NAMES = ["N", "CA", "C"] - -threeletter_to_oneletter(threeletter::AbstractString) = Char(get(BioStructures.threeletter_to_aa, threeletter, 'X')) -oneletter_resname(residue::BioStructures.AbstractResidue) = threeletter_to_oneletter(BioStructures.resname(residue)) - -backbone_atom_selector(atom::BioStructures.AbstractAtom) = BioStructures.atomnameselector(atom, BACKBONE_ATOM_NAMES) -backbone_residue_selector(residue::BioStructures.AbstractResidue) = - oneletter_resname(residue) in AMINOACIDS && - BioStructures.countatoms(residue, backbone_atom_selector) == 3 && - BioStructures.standardselector(residue) && - !BioStructures.disorderselector(residue) - -function get_atom(residue::BioStructures.AbstractResidue, atom_name::AbstractString) - selector = atom -> BioStructures.atomnameselector(atom, [atom_name]) - residue_atoms = BioStructures.collectatoms(residue, selector) - return only(residue_atoms) -end - -function Backboner.Backbone(residue::BioStructures.AbstractResidue) - atom_name_to_atom_coords = atom_name -> BioStructures.coords(get_atom(residue, atom_name)) - residue_coords = stack(atom_name_to_atom_coords, BACKBONE_ATOM_NAMES) - return Backbone(residue_coords) -end - -function Backboner.Backbone(residues::Vector{BioStructures.AbstractResidue}) - chain_coords = mapreduce(residue -> Backboner.Backbone(residue).coords, hcat, residues; init=Matrix{Float64}(undef, 3, 0)) - return Backbone(chain_coords) -end - -Backboner.Backbone(chain::BioStructures.Chain, selector) = Backboner.Backbone(BioStructures.collectresidues(chain, selector)) - -aminoacid_sequence(residues::Vector{BioStructures.AbstractResidue}) = oneletter_resname.(residues) -aminoacid_sequence(chain::BioStructures.Chain, selector) = aminoacid_sequence(BioStructures.collectresidues(chain, selector)) - -function get_residue_atoms(residues::Vector{BioStructures.AbstractResidue}) - residue_atoms = Vector{Atom}[] - for res in residues - push!(residue_atoms, Atom.(BioStructures.collectatoms(res, a -> BioStructures.standardselector(a) && !BioStructures.disorderselector(a)))) - end - return residue_atoms -end - -function Protein.Chain(residues::Vector{BioStructures.AbstractResidue}) - id = only(unique(BioStructures.chainid.(residues))) - backbone = Backbone(residues) - aavector = aminoacid_sequence(residues) - resnums = BioStructures.resnumber.(residues) - ins_codes = BioStructures.inscode.(residues) - modelnum = only(unique(BioStructures.modelnumber.(residues))) - residue_atoms = get_residue_atoms(residues) - return Protein.Chain(backbone; id, resnums, ins_codes, aavector, modelnum, residue_atoms) -end - -function Protein.Chain(chain::BioStructures.Chain, selector=backbone_residue_selector) - residues = BioStructures.collectresidues(chain, selector) - isempty(residues) && return Protein.Chain(Backbone(Matrix{Float64}(undef, 3, 0)); id=BioStructures.chainid(chain), modelnum=BioStructures.modelnumber(chain)) - return Protein.Chain(residues) -end - -function collectchains(struc::BioStructures.MolecularStructure, selector=backbone_residue_selector) - chains = Protein.Chain[] - for model in struc, chain in model - if !isempty(chain) - protein_chain = Protein.Chain(chain, selector) - !isempty(protein_chain) && push!(chains, protein_chain) - end - end - return chains -end - -const pdbextension_to_format = Dict(ext => format for (format, ext) in BioStructures.pdbextension) - -get_format(path::AbstractString) = get(pdbextension_to_format, lowercase(last(splitext(path))[2:end]), PDBFormat) - -""" - readchains(path, format) -> chains::Vector{Protein.Chain} - -Loads a protein structure from a PDB file. - -Exported formats: `PDBFormat`, `MMCIFFormat` - -## Examples - -```jldoctest -julia> readchains("example.pdb") # detects PDB format from extension - -julia> readchains("example.cif") # detects mmCIF format from extension - -julia> readchains("example.abc", PDBFormat) # force PDB format - -julia> readchains("example.xyz", MMCIFFormat) # force mmCIF format -``` -""" -readchains(path::AbstractString, format::Type{<:ProteinFileFormat}) = collectchains(read(path, format)) -readchains(path::AbstractString) = readchains(path, get_format(path)) - -""" - readpdb(path) -> chains::Vector{Protein.Chain} -""" -readpdb(path::AbstractString) = readchains(path, PDBFormat) - -""" - readcif(path) -> chains::Vector{Protein.Chain} -""" -readcif(path::AbstractString) = readchains(path, MMCIFFormat) - -""" - writepdb(path, chains::AbstractVector{Protein.Chain}) - writepdb(path, chain::Protein.Chain) -""" -function writepdb(path::AbstractString, chains::AbstractVector{Protein.Chain}) - atom_records = BioStructures.AtomRecord[] - index = 0 - residue_index = 0 - for chain in chains - residue_backbone_coords = reshape(chain.backbone.coords, 3, 3, :) - assign_missing_oxygens!(chain) - for (i, residue) in enumerate(chain) - resname = get(threeletter_aa_names, residue.aa, "XXX") # threletter_aa_names in residue.jl - residue_index += 1 - residue_atoms = [ - [Atom(name, coords) for (name, coords) in zip(BACKBONE_ATOM_NAMES, eachcol(view(residue_backbone_coords, :, :, i)))]; - filter(a -> !(a.name in BACKBONE_ATOM_NAMES), residue.atoms)] - for atom in residue_atoms - index += 1 - push!(atom_records, BioStructures.AtomRecord(false, index, atom.name, ' ', resname, chain.id, - residue.num, residue.ins_code, atom.coords, 1.0, 0.0, strip(atom.name)[1:1], "")) - end - end - end - pdblines = BioStructures.pdbline.(atom_records) - open(path, "w") do io - for line in pdblines - println(io, line) - end - end - return nothing -end - -writepdb(path::AbstractString, chain::Protein.Chain) = writepdb(path, [chain]) - -# compat -writepdb(chains::Union{Protein.Chain, AbstractVector{Protein.Chain}}, path::AbstractString) = writepdb(path, chains) - -""" - writechains(path, chains::AbstractVector{Protein.Chain}, format) - writechains(path, chain::Protein.Chain, format) - -Write a protein structure (represented as a `Vector{Protein.Chain}`s) to file with the specified format. - -Exported formats: `PDBFormat`, `MMCIFFormat` - -## Examples - -```jldoctest -julia> writechains("example.pdb", chains) # detects PDB format from extension - -julia> writechains("example.cif", chains) # detects mmCIF format from extension - -julia> writechains("example.abc", chains, PDBFormat) # force PDB format - -julia> writechains("example.xyz", chains, MMCIFFormat) # force mmCIF format -``` -""" -writechains(path::AbstractString, chains::AbstractVector{Protein.Chain}, ::Type{PDBFormat}) = writepdb(path, chains) - -function writechains(path::AbstractString, chains::AbstractVector{Protein.Chain}, format::Type{<:ProteinFileFormat}) - struc = mktempdir() do temp_dir - temp_path = joinpath(temp_dir, "temp.pdb") - writechains(temp_path, chains, PDBFormat) - read(temp_path, PDBFormat) # loads BioStructures.MolecularStructure - end - write_function = format == PDBFormat ? BioStructures.writepdb : BioStructures.writemmcif - write_function(path, struc) -end - -writechains(path::AbstractString, chains::AbstractVector{Protein.Chain}) = writechains(path, chains, get_format(path)) - -writechains(path, chain::Protein.Chain, args...) = writechains(path, [chain], args...) \ No newline at end of file diff --git a/src/protein/atom.jl b/src/protein/atom.jl deleted file mode 100644 index a2ecfc5..0000000 --- a/src/protein/atom.jl +++ /dev/null @@ -1,14 +0,0 @@ -struct Atom - name::String - coords::AbstractVector{<:Real} - - function Atom(name::AbstractString, coords::AbstractVector{<:Real}) - length(coords) == 3 || throw(ArgumentError("coords must be a 3-vector")) - return new(strip(name), coords) - end -end - -Atom(atom::BioStructures.Atom) = Atom(atom.name, atom.coords) - -Base.summary(atom::Atom) = "Atom $(atom.name) at $(atom.coords)" -Base.show(io::IO, atom::Atom) = print(io, summary(atom)) \ No newline at end of file diff --git a/src/protein/chain.jl b/src/protein/chain.jl deleted file mode 100644 index 993b300..0000000 --- a/src/protein/chain.jl +++ /dev/null @@ -1,232 +0,0 @@ -using Backboner: get_atom_distances - -""" - Chain <: AbstractVector{Residue} - -A `Chain` represents a chain of a protein, and is a vector of `Residue`s, which are instantiated from indexing the chain. - -## Fields -- `id::String`: A string identifier (usually a single letter). -- `backbone::Backbone`: A backbone with a length divisible by 3, to ensure 3 atoms per residue (N, Ca, C). -- `modelnum::Int`: The model number of the chain. -- `resnums::Vector{Int}`: storing the residue numbers. -- `aavector::Vector{Char}`: storing the amino acid sequence. -- `ssvector::Vector{Char}`: storing the secondary structure. -""" -mutable struct Chain <: AbstractVector{Residue} - id::String - backbone::Backbone - modelnum::Int - resnums::Vector{Int} - ins_codes::Vector{Char} - aavector::Vector{Char} - ssvector::Vector{Char} - residue_atoms::Vector{Vector{Atom}} - - function Chain( - backbone::Backbone; - id::AbstractString = "A", - modelnum::Int = 1, - resnums::Vector{Int} = collect(1:length(backbone) ÷ 3), - ins_codes::Vector{Char} = fill(' ', length(backbone) ÷ 3), - aavector::Vector{Char} = fill('G', length(backbone) ÷ 3), - ssvector::Union{Vector{Char}, Vector{<:Integer}} = fill(' ', length(backbone) ÷ 3), - residue_atoms::Vector{Vector{Atom}} = [Atom[] for i in resnums], - ) - L, r = divrem(length(backbone), 3) - iszero(r) || throw(ArgumentError("backbone must have a length divisible by 3")) - length(resnums) == L || throw(ArgumentError("length of resnums must be equal to length of backbone divided by 3")) - length(ins_codes) == L || throw(ArgumentError("length of ins_codes must be equal to length of backbone divided by 3")) - length(aavector) == L || throw(ArgumentError("length of aavector must be equal to length of backbone divided by 3")) - length(ssvector) == L || throw(ArgumentError("length of ssvector must be equal to length of backbone divided by 3")) - ssvector isa Vector{<:Integer} && (ssvector = get.(('-', 'H', 'E'), ssvector, ' ')) - chain = new(id, backbone, modelnum, resnums, ins_codes, aavector, ssvector, residue_atoms) - assign_backbone_atoms!(chain) - return chain - end - - Chain(id::AbstractString, backbone::Backbone; kwargs...) = Chain(backbone; id=id, kwargs...) -end - -@inline Base.:(==)(chain1::Chain, chain2::Chain) = chain1.id == chain2.id && chain1.backbone == chain2.backbone && chain1.ssvector == chain2.ssvector -@inline Base.length(chain::Chain) = length(chain.backbone) ÷ 3 -@inline Base.size(chain::Chain) = Tuple(length(chain)) -@inline Base.getindex(chain::Chain, i::Integer) = Residue(chain.resnums[i], chain.residue_atoms[i], chain.aavector[i], chain.ssvector[i], chain.ins_codes[i]) - -function Base.getindex(chain::Protein.Chain, I::AbstractVector{<:Integer}) - backbone = Backbone(reshape(chain.backbone.coords, 3, 3, :)[:, :, I]) - Protein.Chain(backbone; id=chain.id, modelnum=chain.modelnum, resnums=chain.resnums[I], ins_codes=chain.ins_codes[I], aavector=chain.aavector[I], ssvector=chain.ssvector[I], residue_atoms=chain.residue_atoms[I]) -end - -Base.summary(chain::Chain) = "Chain $(chain.id) with $(length(chain)) residue$(length(chain) == 1 ? "" : "s")" -Base.show(io::IO, chain::Chain) = print(io, summary(chain)) - -@inline Base.getindex(protein::AbstractVector{Chain}, id::AbstractString) = protein[findfirst(chain -> chain.id == id, protein)] -@inline Base.getindex(protein::AbstractVector{Chain}, id::Symbol) = protein[String(id)] - -has_assigned_ss(ssvector::Vector{Char}) = all(!=(' '), ssvector) -has_assigned_ss(chain::Chain) = has_assigned_ss(chain.ssvector) -has_assigned_ss(protein::AbstractVector{Chain}) = all(has_assigned_ss, protein) - -Backboner.is_knotted(chain::Chain) = is_knotted(@view(chain.backbone[2:3:end])) - -function offset!(chain::Chain, pos::AbstractVector{<:Real}) - for i in 1:length(chain) - for atom in chain.residue_atoms[i] - atom.coords .+= pos - end - end -end - -function assign_backbone_atoms!(chain::Chain) - reshaped_backbone = reshape(chain.backbone.coords, 3, 3, :) - for (residue_backbone_coords, residue) in zip(eachslice(reshaped_backbone, dims=3), chain) - atom_names = map(atom -> atom.name, residue.atoms) - for (name, coords) in Iterators.reverse(zip(BACKBONE_ATOM_NAMES, eachcol(residue_backbone_coords))) - j = findfirst(==(name), atom_names) - !isnothing(j) && deleteat!(residue.atoms, j) - isnothing(j) && (j = 1) - insert!(residue.atoms, j, Atom(name, coords)) - end - end -end - -""" - nitrogen_coords(chain::Chain) - nitrogen_coords(backbone::Backbone) - -Returns the coordinates of all nitrogen atoms in a chain, as a 3xN matrix. -""" -nitrogen_coords(chain::Chain) = nitrogen_coords(chain.backbone) -nitrogen_coords(backbone::Backbone) = @views backbone[1:3:end].coords - -""" - alphacarbon_coords(chain::Chain) - alphacarbon_coords(backbone::Backbone) - -Returns the coordinates of all alphacarbon atoms in a chain, as a 3xN matrix. -""" -alphacarbon_coords(chain::Chain) = alphacarbon_coords(chain.backbone) -alphacarbon_coords(backbone::Backbone) = @views backbone[2:3:end].coords - -""" - carbonyl_coords(chain::Chain) - carbonyl_coords(backbone::Backbone) - -Returns the coordinates of all carbonyl atoms in a chain, as a 3xN matrix. -""" -carbonyl_coords(chain::Chain) = carbonyl_coords(chain.backbone) -carbonyl_coords(backbone::Backbone) = @views backbone[3:3:end].coords - - -""" - nitrogen_alphacarbon_distances(chain::Chain) - nitrogen_alphacarbon_distances(backbone::Backbone) - nitrogen_alphacarbon_distances(bonds::ChainedBonds) - -Calculate the distances between all pairs of contiguous nitrogen and alpha-carbon atoms in a chain. -Returns a vector of distances of length `length(chain)`. -""" -nitrogen_alphacarbon_distances(chain::Chain) = nitrogen_alphacarbon_distances(chain.backbone) -nitrogen_alphacarbon_distances(backbone::Backbone) = get_atom_distances(backbone, 1, 1, 3) |> vec -nitrogen_alphacarbon_distances(bonds::ChainedBonds) = get_bond_lengths(bonds)[1:3:end] - -""" - alphacarbon_carbonyl_distances(chain::Chain) - alphacarbon_carbonyl_distances(backbone::Backbone) - alphacarbon_carbonyl_distances(bonds::ChainedBonds) - -Calculate the distances between all pairs of contiguous alpha-carbon and carbonyl atoms in a chain. -Returns a vector of distances of length `length(chain)`. -""" -alphacarbon_carbonyl_distances(chain::Chain) = alphacarbon_carbonyl_distances(chain.backbone) -alphacarbon_carbonyl_distances(backbone::Backbone) = get_atom_distances(backbone, 2, 1, 3) |> vec -alphacarbon_carbonyl_distances(bonds::ChainedBonds) = get_bond_lengths(bonds)[2:3:end] - -""" - carbonyl_nitrogen_distances(chain::Chain) - carbonyl_nitrogen_distances(backbone::Backbone) - carbonyl_nitrogen_distances(bonds::ChainedBonds) - -Calculate the distances between all pairs of contiguous carbonyl and nitrogen atoms in a chain. -Returns a vector of distances of length `length(chain) - 1`. -""" -carbonyl_nitrogen_distances(chain::Chain) = carbonyl_nitrogen_distances(chain.backbone) -carbonyl_nitrogen_distances(backbone::Backbone) = get_atom_distances(backbone, 3, 1, 3) |> vec -carbonyl_nitrogen_distances(bonds::ChainedBonds) = get_bond_lengths(bonds)[3:3:end] - - -""" - nitrogen_angles(chain::Chain) - nitrogen_angles(backbone::Backbone) - nitrogen_angles(bonds::ChainedBonds) - -Calculate the angles at the nitrogen atoms (C-N-Ca angles) of a chains backbone, or take directly from a precalculated ChainedBonds instance. -""" -nitrogen_angles(chain::Chain) = nitrogen_angles(chain.backbone) -nitrogen_angles(backbone::Backbone) = nitrogen_angles(ChainedBonds(backbone)) -nitrogen_angles(bonds::ChainedBonds) = @view get_bond_angles(bonds)[3:3:end] - -""" - alphacarbon_angles(chain::Chain) - alphacarbon_angles(bonds::ChainedBonds) - -Calculate the angles at the alphacarbon atoms (N-Ca-C angles) of a chains backbone, or take directly from a precalculated ChainedBonds instance. -""" -alphacarbon_angles(chain::Chain) = alphacarbon_angles(chain.backbone) -alphacarbon_angles(backbone::Backbone) = alphacarbon_angles(ChainedBonds(backbone)) -alphacarbon_angles(bonds::ChainedBonds) = @view get_bond_angles(bonds)[1:3:end] - -""" - carbonyl_angles(chain::Chain) - carbonyl_angles(bonds::ChainedBonds) - -Calculate the angles at the carbonyl atoms (Ca-C-N angles) of a chain's backbone, or take directly from a precalculated ChainedBonds instance. -""" -carbonyl_angles(chain::Chain) = carbonyl_angles(chain.backbone) -carbonyl_angles(backbone::Backbone) = carbonyl_angles(ChainedBonds(backbone)) -carbonyl_angles(bonds::ChainedBonds) = @view get_bond_angles(bonds)[2:3:end] - - -""" - phi_angles(chain::Chain) - phi_angles(backbone::Backbone) - phi_angles(bonds::ChainedBonds) - -Calculate the phi (φ) angles of a chain's backbone, or take directly from a precalculated ChainedBonds instance. -""" -phi_angles(chain::Chain) = phi_angles(chain.backbone) -phi_angles(backbone::Backbone) = phi_angles(ChainedBonds(backbone)) -phi_angles(bonds::ChainedBonds) = @view get_torsional_angles(bonds)[3:3:end] - -""" - psi_angles(chain::Chain) - psi_angles(backbone::Backbone) - psi_angles(bonds::ChainedBonds) - -Calculate the psi (ψ) angles of a chain's backbone, or take directly from a precalculated ChainedBonds instance. -""" -psi_angles(chain::Chain) = psi_angles(chain.backbone) -psi_angles(backbone::Backbone) = psi_angles(ChainedBonds(backbone)) -psi_angles(bonds::ChainedBonds) = @view get_torsional_angles(bonds)[1:3:end] - -""" - omega_angles(chain::Chain) - omega_angles(backbone::Backbone) - omega_angles(bonds::ChainedBonds) - -Calculate the omega (Ω) angles of a chain's backbone, or take directly from a precalculated ChainedBonds instance. -""" -omega_angles(chain::Chain) = omega_angles(chain.backbone) -omega_angles(backbone::Backbone) = omega_angles(ChainedBonds(backbone)) -omega_angles(bonds::ChainedBonds) = @view get_torsional_angles(bonds)[2:3:end] - - -# TODO: append_residues! function. also get the Residue type sorted out. user shouldn't need to create it manually. -#=function append_residues!( - chain::Chain, - dihedral_angles::AbstractVector{<:Real}, - bond_lengths::AbstractVector{<:Real} = IDEALIZED_BOND_LENGTHS, - bond_angles::AbstractVector{<:Real} = IDEALIZED_BOND_ANGLES, -) -end=# \ No newline at end of file diff --git a/src/protein/idealization.jl b/src/protein/idealization.jl deleted file mode 100644 index f40199b..0000000 --- a/src/protein/idealization.jl +++ /dev/null @@ -1,27 +0,0 @@ -### Frame idealization - -const STANDARD_TRIANGLE_ANGSTROM = [ - -1.066 -0.200 1.266; - 0.645 -0.527 -0.118; - 0.000 0.000 0.000; -] # N Ca C - -# alias for backwards compatibility -const STANDARD_RESIDUE_ANGSTROM = STANDARD_TRIANGLE_ANGSTROM - -### Bond idealization - -const BACKBONE_BOND_LENGTHS = Float64[1.45775, 1.52307, 1.33208] -const BACKBONE_BOND_ANGLES = Float64[1.93731, 2.03926, 2.12710] - -function Backboner.idealize(chain::Protein.Chain, ideal_lengths=BACKBONE_BOND_LENGTHS, ideal_angles=BACKBONE_BOND_ANGLES; kwargs...) - return Protein.Chain( - chain.id, - Backboner.idealize(chain.backbone, ideal_lengths, ideal_angles; kwargs...), - modelnum=chain.modelnum, - resnums=chain.resnums, - ins_codes=chain.ins_codes, - aavector=chain.aavector, - ssvector=chain.ssvector, - ) -end diff --git a/src/protein/oxygen.jl b/src/protein/oxygen.jl deleted file mode 100644 index 9cfda91..0000000 --- a/src/protein/oxygen.jl +++ /dev/null @@ -1,107 +0,0 @@ -## Warning: wonky code ahead - -using LinearAlgebra - -function get_rotation_matrix( - point1::AbstractVector{T}, center::AbstractVector{T}, point3::AbstractVector{T} -) where T <: Real - direction1 = point3 - center - direction2 = point1 - center - basis1 = normalize(direction1) - orthogonal_component = direction2 - basis1 * (basis1' * direction2) - basis2 = normalize(orthogonal_component) - basis3 = cross(basis1, basis2) - rotation_matrix = [basis1 basis2 basis3] - return rotation_matrix -end - -const IDEALIZED_OXYGEN_VECTOR = [-0.672, -1.034, 0.003] - -function _estimate_oxygen_position( - CA::AbstractVector{T}, C::AbstractVector{T}, next_N::AbstractVector{T}, -) where T <: Real - R = get_rotation_matrix(CA, C, next_N) - return R * IDEALIZED_OXYGEN_VECTOR + C # inverse of R' * (oxygen_pos - C) -end - -# last oxygen is put on the same plane as the last residue triangle -# 1.2 angstroms away from the C atom -# with a 120 degree angle between the C-Ca and C-O bonds -function _estimate_oxygen_position_last( - N_pos::V, Ca_pos::V, C_pos::V, -) where V <: AbstractVector{T} where T <: Real - angle = 2π/3 - bond_length = 1.2 - v = Ca_pos - C_pos - w = cross(v, Ca_pos - N_pos) - u = cross(w, v) - O_pos = C_pos + normalize(u)*bond_length*cos(angle - 0.5π) - normalize(v)*bond_length*sin(angle - 0.5π) - return O_pos -end - -function estimate_oxygen_position(chain::Chain, i::Int) - 1 <= i <= length(chain) || throw(ArgumentError("Index $i out of bounds for chain of length $(length(chain))")) - if i == length(chain) - N = chain.backbone[3*(i-1)+1] - CA = chain.backbone[3*(i-1)+2] - C = chain.backbone[3*(i-1)+3] - return _estimate_oxygen_position_last(N, CA, C) - else - CA = chain.backbone[3*(i-1)+2] - C = chain.backbone[3*(i-1)+3] - next_N = chain.backbone[3*(i-1)+4] - return _estimate_oxygen_position(CA, C, next_N) - end -end - -function get_oxygen(chain::Chain, i::Int) - residue = chain[i] - k = findfirst(==("O") ∘ (atom -> atom.name), residue.atoms) - isnothing(k) && return estimate_oxygen_position(chain, i) - return residue.atoms[k].coords -end - -""" - oxygen_coords(chain::Chain) - -Add oxygen atoms to the backbone of a protein, turning the coordinate array from size 3x3xL to 3x4xL-1, -where L is the length of the backbone. - -# Example -```jldoctest -julia> chains = readpdb("test/data/1ZAK.pdb") -2-element Vector{Chain}: - Chain A with 220 residues - Chain B with 220 residues - -julia> oxygen_coords(chains["A"]) # returns the estimated position of oxygen atoms in chain A (~0.05 Å mean deviation) -3×220 Matrix{Float64}: - 22.6697 25.1719 24.7761 25.8559 … 24.7911 22.7649 22.6578 21.24 - 15.7257 13.505 13.5151 11.478 15.0888 12.2361 15.8825 14.2933 - 21.4295 19.5663 22.8638 25.3283 7.95346 4.81901 3.24164 -0.742424 -``` - -!!! note - The `oxygen_coords` function finds the oxygen atoms to the backbone using idealized geometry, and oxygens atom will on average deviate [0.05 Å](https://github.com/MurrellGroup/Backboner.jl/blob/main/test/protein/oxygen.jl) from original PDB positions. - Moreover, the last oxygen atom is essentially given a random (although deterministic) orientation, as that information is lost when the backbone is reduced to 3 atoms, and there's no next nitrogen atom to compare with. -""" -function oxygen_coords(chain::Chain) - return stack(get_oxygen(chain, i) for i in 1:length(chain)) -end - -function assign_oxygens!(chain::Chain) - for (i, residue) in enumerate(chain) - j = findfirst(==("O") ∘ (atom -> atom.name), residue.atoms) - isnothing(j) || deleteat!(residue.atoms, j) - insert!(residue.atoms, 4, Atom("O", estimate_oxygen_position(chain, i))) - end - return chain -end - -function assign_missing_oxygens!(chain::Chain) - for (i, residue) in enumerate(chain) - !any(==("O") ∘ (atom -> atom.name), residue.atoms) && insert!(residue.atoms, 4, Atom("O", estimate_oxygen_position(chain, i))) - end -end - -ncaco_coords(chain::Chain) = cat(reshape(chain.backbone.coords, 3, 3, :), reshape(oxygen_coords(chain), 3, 1, :), dims=2) diff --git a/src/protein/protein.jl b/src/protein/protein.jl deleted file mode 100644 index 69b573e..0000000 --- a/src/protein/protein.jl +++ /dev/null @@ -1,44 +0,0 @@ -module Protein - -# TODO: un-export Atom and Residue as they shouldn't really be public (breaking) - -using ..Backboner - -import BioStructures - -include("atom.jl") - -include("residue.jl") - -include("chain.jl") -export Chain -export has_assigned_ss - -include("oxygen.jl") -export oxygen_coords - -include("BioStructures-interface.jl") -export nitrogen_coords, alphacarbon_coords, carbonyl_coords -export nitrogen_alphacarbon_distances, alphacarbon_carbonyl_distances, carbonyl_nitrogen_distances -export phi_angles, psi_angles, omega_angles - -include("idealization.jl") -export readchains, writechains -export readpdb, writepdb -export readcif, writecif -export PDBFormat, MMCIFFormat - -PDBEntry(pdbid::AbstractString; format=BioStructures.PDBFormat, kwargs...) = mktempdir() do dir - path = BioStructures.downloadpdb(pdbid; dir, format, kwargs...) - Backboner.Protein.readchains(path, format) -end - -macro pdb_str(pdbid) - quote - PDBEntry($(esc(pdbid))) - end -end - -export PDBEntry, @pdb_str - -end \ No newline at end of file diff --git a/src/protein/residue.jl b/src/protein/residue.jl deleted file mode 100644 index 605037a..0000000 --- a/src/protein/residue.jl +++ /dev/null @@ -1,23 +0,0 @@ -const threeletter_aa_names = Dict{Char, String}([Char(v) => k for (k, v) in BioStructures.threeletter_to_aa]) - -# not user-facing - -struct Residue - num::Integer - atoms::Vector{Atom} - aa::Char - ss::Char - ins_code::Char - - function Residue(num::Integer, atoms::Vector{Atom}, aa::Char='G', ss::Char=' ', ins_code::Char=' ') - return new(num, atoms, aa, ss, ins_code) - end -end - -function Base.show(io::IO, residue::Residue) - num = residue.num - aa = get(threeletter_aa_names, residue.aa, "XXX") - ss = residue.ss == ' ' ? "" : "($(residue.ss))" - ins_code = residue.ins_code == ' ' ? "" : residue.ins_code - print(io, "Residue $num$ins_code $aa with $(length(residue.atoms)) atom$(length(residue.atoms) == 1 ? "" : "s") $ss") -end \ No newline at end of file diff --git a/test/bonds.jl b/test/bonds.jl index bf5a7a0..138fc45 100644 --- a/test/bonds.jl +++ b/test/bonds.jl @@ -46,16 +46,14 @@ @test get_bond_angles(backbone) ≈ [π/2, π/2, π/2, π/2, π/2] end - @testset "torsional angles" begin - @test get_torsional_angles(backbone) ≈ [π/2, π, -π/2, π/4] + @testset "torsion angles" begin + @test get_torsion_angles(backbone) ≈ [π/2, π, -π/2, π/4] end end @testset "ChainedBonds" begin - protein = Backboner.Protein.readpdb("data/1ASS.pdb") - chain = protein["A"] - backbone = chain.backbone + backbone = Backbone(rand(3,100)) bonds = ChainedBonds(backbone) @testset "invertibility" begin @@ -63,12 +61,10 @@ bonds2 = ChainedBonds(Backbone(ChainedBonds(backbone))) @test get_bond_lengths(bonds1) ≈ get_bond_lengths(bonds2) @test get_bond_angles(bonds1) ≈ get_bond_angles(bonds2) - @test get_torsional_angles(bonds1) ≈ get_torsional_angles(bonds2) + @test get_torsion_angles(bonds1) ≈ get_torsion_angles(bonds2) end - io = IOBuffer() - show(io, bonds) - @test String(take!(io)) == "ChainedBonds{Float64, Vector{Float64}} with 455 bond lengths, 454 bond angles, and 453 torsional angles" + @test sprint(show, bonds) == "ChainedBonds{Float64, Vector{Float64}} with 99 bond lengths, 98 bond angles, and 97 torsion angles" end @testset "append_bonds" begin @@ -83,7 +79,7 @@ subbackbone = backbone[1:n-k] bonds = ChainedBonds(backbone) - new_backbone = append_bonds(subbackbone, get_bond_lengths(bonds)[end-k+1:end], get_bond_angles(bonds)[end-k+1:end], get_torsional_angles(bonds)[end-k+1:end]) + new_backbone = append_bonds(subbackbone, get_bond_lengths(bonds)[end-k+1:end], get_bond_angles(bonds)[end-k+1:end], get_torsion_angles(bonds)[end-k+1:end]) @test coords(new_backbone) ≈ coords(backbone) end @@ -99,7 +95,7 @@ subbackbone = backbone[end-k+1:end] bonds = ChainedBonds(backbone) - new_backbone = prepend_bonds(subbackbone, get_bond_lengths(bonds)[1:n-k], get_bond_angles(bonds)[1:n-k], get_torsional_angles(bonds)[1:n-k]) + new_backbone = prepend_bonds(subbackbone, get_bond_lengths(bonds)[1:n-k], get_bond_angles(bonds)[1:n-k], get_torsion_angles(bonds)[1:n-k]) @test new_backbone.coords ≈ backbone.coords end diff --git a/test/ext/ZygoteIdealizationExt.jl b/test/ext/ZygoteIdealizationExt.jl deleted file mode 100644 index 89a8aa5..0000000 --- a/test/ext/ZygoteIdealizationExt.jl +++ /dev/null @@ -1,14 +0,0 @@ -import Zygote - -@testset "ZygoteIdealizationExt" begin - - @testset "idealize" begin - backbone = Protein.readpdb("data/1ZAK.pdb")["A"].backbone - idealized_backbone = idealize(backbone, Float64[1.46, 1.52, 1.33], Float64[1.94, 2.04, 2.13]) - idealized_bonds = ChainedBonds(idealized_backbone) - @test all(isapprox.(get_bond_lengths(idealized_bonds), [ideal for (_, ideal) in zip(1:length(backbone)-1, Iterators.cycle(Float64[1.46, 1.52, 1.33]))], atol=1e-3)) - @test all(isapprox.(get_bond_angles(idealized_bonds), [ideal for (_, ideal) in zip(1:length(backbone)-2, Iterators.cycle(Float64[1.94, 2.04, 2.13]))], atol=1e-3)) - @test backbone == idealize(backbone, Float64[0], Float64[0], mask_tolerance=0.5) - end - -end \ No newline at end of file diff --git a/test/knots.jl b/test/knots.jl index ace1b9c..11e54ee 100644 --- a/test/knots.jl +++ b/test/knots.jl @@ -11,8 +11,4 @@ @test is_knotted(backbone) @test !is_knotted(backbone[1:end-1]) - @test is_knotted(Protein.readpdb("data/1YVE.pdb")["I"].backbone) - @test !is_knotted(Protein.readpdb("data/1ASS.pdb")["A"].backbone) - @test !is_knotted(Protein.readpdb("data/1ZAK.pdb")["A"].backbone) - end \ No newline at end of file diff --git a/test/protein/BioStructures-interface.jl b/test/protein/BioStructures-interface.jl deleted file mode 100644 index 2e9d4d0..0000000 --- a/test/protein/BioStructures-interface.jl +++ /dev/null @@ -1,24 +0,0 @@ -@testset "BioStructures-interface.jl" begin - - @testset "PDB" begin - - @testset "read" begin - chains_pdb = readpdb("data/1ASS.pdb") - @test length.(chains_pdb) == [152] - chains_cif = readchains("data/1ASS.cif", MMCIFFormat) - @test chains_pdb[1].backbone.coords == chains_cif[1].backbone.coords - end - - @testset "write" begin - chains = readpdb("data/1ASS.pdb") - new_chains = mktempdir() do temp_dir - temp_path = joinpath(temp_dir, "temp.pdb") - writepdb(chains, temp_path) - readpdb(temp_path) - end - @test chains[1].backbone.coords == new_chains[1].backbone.coords - end - - end - -end \ No newline at end of file diff --git a/test/protein/chain.jl b/test/protein/chain.jl deleted file mode 100644 index 2cac3dc..0000000 --- a/test/protein/chain.jl +++ /dev/null @@ -1,84 +0,0 @@ -@testset "chain" begin - - @testset "chain.jl" begin - - coords = randn(3, 15) - backbone = Backbone(coords) - chain = Chain("A", backbone) - @test chain.id == "A" - @test chain.backbone.coords == coords - @test chain.aavector == fill('G', length(chain)) - @test chain.ssvector == fill(' ', length(chain)) - @test !has_assigned_ss(chain) - @test length(chain) == 5 - @test size(chain) == (5,) - - protein = [chain] - @test protein["A"] == protein[:A] == protein[1] - - @test chain[1] == Protein.Residue(1, chain.residue_atoms[1], 'G', ' ', ' ') - - @test summary(chain) == "Chain A with 5 residues" - - io = IOBuffer() - show(io, chain) - @test String(take!(io)) == summary(chain) - - end - - @testset "is_knotted" begin - - protein = readpdb("data/1ZAK.pdb") - chain = protein["A"] - @test !is_knotted(chain) - - end - - @testset "atom coords" begin - - protein = readpdb("data/1ASS.pdb") - chain = protein["A"] - L = length(chain) - @test size(nitrogen_coords(chain), 2) == L - @test size(alphacarbon_coords(chain), 2) == L - @test size(carbonyl_coords(chain), 2) == L - - end - - @testset "dihedrals" begin - - protein = readpdb("data/1ASS.pdb") - chain = protein["A"] - L = length(chain) - @test length(psi_angles(chain)) == L - 1 - @test length(phi_angles(chain)) == L - 1 - @test length(omega_angles(chain)) == L - 1 - - end - - @testset "atom_distances" begin - - protein = readpdb("data/1ASS.pdb") - chain = protein["A"] - - @testset "nitrogen_alphacarbon_distances" begin - NCa_distances = nitrogen_alphacarbon_distances(chain) - @test length(NCa_distances) == length(chain) - @test 1.44 <= sum(NCa_distances) / length(NCa_distances) <= 1.48 - end - - @testset "alphacarbon_carbonyl_distances" begin - CaC_distances = alphacarbon_carbonyl_distances(chain) - @test length(CaC_distances) == length(chain) - @test 1.50 <= sum(CaC_distances) / length(CaC_distances) <= 1.54 - end - - @testset "carbonyl_nitrogen_distances" begin - CN_distances = carbonyl_nitrogen_distances(chain) - @test length(CN_distances) == length(chain) - 1 - @test 1.31 <= sum(CN_distances) / length(CN_distances) <= 1.35 - end - - end - -end \ No newline at end of file diff --git a/test/protein/protein.jl b/test/protein/protein.jl deleted file mode 100644 index 2d2bd71..0000000 --- a/test/protein/protein.jl +++ /dev/null @@ -1,5 +0,0 @@ -using Backboner.Protein - -include("residue.jl") -include("chain.jl") -include("BioStructures-interface.jl") \ No newline at end of file diff --git a/test/protein/residue.jl b/test/protein/residue.jl deleted file mode 100644 index 5673138..0000000 --- a/test/protein/residue.jl +++ /dev/null @@ -1,7 +0,0 @@ -@testset "residue.jl" begin - residue = Protein.Residue(1, Protein.Atom[], 'A', 'H', 'A') - - io = IOBuffer() - show(io, residue) - @test String(take!(io)) == "Residue 1A ALA with 0 atoms (H)" -end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 4f7b8c4..01bce99 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,7 +9,5 @@ using LinearAlgebra include("bonds.jl") include("frames.jl") include("knots.jl") - include("protein/protein.jl") - include("ext/ZygoteIdealizationExt.jl") end