Skip to content

Commit

Permalink
Update docstrings, length of backbone definition, tests
Browse files Browse the repository at this point in the history
  • Loading branch information
AntonOresten committed Dec 18, 2023
1 parent 83f64e1 commit 7007c52
Show file tree
Hide file tree
Showing 12 changed files with 88 additions and 145 deletions.
2 changes: 0 additions & 2 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ makedocs(;
doctest = false,
pages = [
"Overview" => "index.md",
"Types" => "types.md",
"Oxygen atoms" => "oxygen.md",
],
authors = "Anton Oresten",
checkdocs = :all,
Expand Down
61 changes: 0 additions & 61 deletions docs/src/oxygen.md

This file was deleted.

22 changes: 0 additions & 22 deletions docs/src/types.md

This file was deleted.

9 changes: 6 additions & 3 deletions src/backbone/backbone.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
export Backbone, atom_coords

"""
Backbone{A, T <: Real} <: AbstractBackbone{3, A, T}
Backbone{A, T <: Real} <: AbstractArray{T, 3}
The `Backbone{A}` type is designed to efficiently store and manipulate the three-dimensional coordinates of backbone atoms.
The type parameter `A` stands for the number of atoms per residue, and is used in the size of the coordinate array, 3xAxL,
where L is the number of residues.
A wrapper for a 3xAxL array of coordinates of atoms.
Backbone{3} is used to store 3-dimensional coordinates of the contiguous backbone atoms (N, Cα, C) of a protein chain.
"""
struct Backbone{A, T <: Real} <: AbstractArray{T, 3}
Expand All @@ -21,7 +24,7 @@ struct Backbone{A, T <: Real} <: AbstractArray{T, 3}
end

@inline Base.size(backbone::Backbone) = size(backbone.coords)
@inline Base.length(backbone::Backbone) = size(backbone, 3)
@inline Base.length(backbone::Backbone) = size(backbone, 2) * size(backbone, 3)
@inline Base.getindex(backbone::Backbone, i, j, k) = backbone.coords[i, j, k]
@inline Base.getindex(backbone::Backbone, i::Integer) = view(backbone.coords, :, :, i)
@inline Base.getindex(backbone::Backbone, r::UnitRange{Int}) = Backbone(view(backbone.coords, :, :, r))
Expand Down
31 changes: 24 additions & 7 deletions src/backbone/bonds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,29 @@ get_dihedrals(backbone::Backbone) = get_dihedrals(get_bond_vectors(backbone))
ChainedBonds{T <: Real}
A lazy way to store a backbone as a series of bond lengths, angles, and dihedrals.
Can be instantiated from a Backbone or a matrix of bond vectors.
Can be used to instantiate a Backbone using the `Backbone{A}(bonds::ChainedBonds)` constructor.
It can be instantiated from a Backbone or a matrix of bond vectors.
It can also be used to instantiate a Backbone using the `Backbone{A}(bonds::ChainedBonds)` constructor.
# Example
```jldoctest
julia> protein = pdb_to_protein("test/data/1ZAK.pdb")
2-element Vector{ProteinChain}:
ProteinChain A with 220 residues
ProteinChain B with 220 residues
julia> chain = protein["A"]
ProteinChain A with 220 residues
julia> get_oxygens(chain) # returns the estimated position of oxygen atoms (~0.05 Å mean deviation)
3×220 Matrix{Float32}:
22.6697 25.1719 24.7761 25.8559 … 24.7911 22.7649 22.6578 21.24
15.7257 13.505 13.5151 11.478 15.0888 12.2361 15.8825 14.2933
21.4295 19.5663 22.8638 25.3283 7.95346 4.81901 3.24164 -0.742424
```
!!! note
The `get_oxygens` function adds oxygen atoms to the backbone using idealized geometry, and oxygens atom will on average deviate [0.05 Å](https://github.com/MurrellGroup/Backboner.jl/blob/main/test/protein/oxygen.jl) from the original positions.
Moreover, the last oxygen atom is essentially given a random (although deterministic) orientation, as that information is lost when the backbone is reduced to 3 atoms, and there's no next nitrogen atom to compare with.
"""
struct ChainedBonds{T <: Real}
lengths::AbstractVector{T}
Expand All @@ -91,9 +110,7 @@ struct ChainedBonds{T <: Real}
return new{T}(lengths, angles, dihedrals)
end

function ChainedBonds(backbone::Backbone{A, T}) where {A, T}
return ChainedBonds(get_bond_vectors(backbone))
end
ChainedBonds(backbone::Backbone) = ChainedBonds(get_bond_vectors(backbone))
end

Base.:(==)(b1::ChainedBonds, b2::ChainedBonds) = b1.lengths == b2.lengths && b1.angles == b2.angles && b1.dihedrals == b2.dihedrals
Expand Down
2 changes: 1 addition & 1 deletion src/backbone/rotations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ according to a defined standard triangle (`Backboner.STANDARD_TRIANGLE_ANGSTROM`
"""
function backbone_to_locs_and_rots(backbone::Backbone{A, T}, unit::Symbol=:angstrom) where {A, T}
@assert 3 <= A <= 4 "backbone must have 3 or 4 atoms per residue"
L = length(backbone)
L = size(backbone, 3)
locations = Array{T}(undef, 3, L)
rot_matrices = Array{T}(undef, 3, 3, L)
for i in 1:L
Expand Down
46 changes: 28 additions & 18 deletions src/protein/chain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@ export ProteinChain
"""
ProteinChain <: AbstractVector{Residue}
A chain has an identifier (usually a single letter) and holds the backbone atom coordinates, amino acid sequence, and secondary structures of a protein chain.
A `ProteinChain` represents a chain of a protein, and is a vector of `Residue`s, which are instantiated from indexing the chain.
## Fields
- `id::AbstractString`: A string identifier (usually a single letter).
- `backbone::Backbone{3}`: An backbone with 3 atoms per residue (N, Ca, C), storing the coordinates of backbone atoms in a 3x3xL array.
- `aavector::Vector{Char}`: storing the amino acid sequence.
- `ssvector::Vector{Char}`: storing the secondary structure.
"""
struct ProteinChain <: AbstractVector{Residue}
id::AbstractString
Expand All @@ -14,10 +20,10 @@ struct ProteinChain <: AbstractVector{Residue}
function ProteinChain(
id::AbstractString,
backbone::Backbone{3};
aavector::Vector{Char} = fill('G', length(backbone)),
ssvector::Union{Vector{Char}, Vector{<:Integer}} = fill(' ', length(backbone)),
aavector::Vector{Char} = fill('G', size(backbone, 3)),
ssvector::Union{Vector{Char}, Vector{<:Integer}} = fill(' ', size(backbone, 3)),
)
@assert length(backbone) == length(aavector) == length(ssvector) "backbone, aavector, and ssvector must have the same length"
@assert size(backbone, 3) == length(aavector) == length(ssvector) "backbone, aavector, and ssvector must have the same length"
ssvector isa Vector{<:Integer} && (ssvector = get.(('-', 'H', 'E'), ssvector, ' '))

return new(id, backbone, aavector, ssvector)
Expand All @@ -27,8 +33,8 @@ struct ProteinChain <: AbstractVector{Residue}
end

@inline Base.:(==)(chain1::ProteinChain, chain2::ProteinChain) = chain1.id == chain2.id && chain1.backbone == chain2.backbone && chain1.ssvector == chain2.ssvector
@inline Base.length(chain::ProteinChain) = length(chain.backbone)
@inline Base.size(chain::ProteinChain) = (length(chain),)
@inline Base.length(chain::ProteinChain) = size(chain.backbone, 3)
@inline Base.size(chain::ProteinChain) = Tuple(length(chain))
@inline Base.getindex(chain::ProteinChain, i::Integer) = Residue(i, chain.backbone, chain.aavector[i], chain.ssvector[i])

Base.summary(chain::ProteinChain) = "ProteinChain $(chain.id) with $(length(chain)) residue$(length(chain) == 1 ? "" : "s")"
Expand All @@ -39,26 +45,30 @@ Base.show(io::IO, chain::ProteinChain) = print(io, summary(chain))

export nitrogen_alphacarbon_distances, alphacarbon_carbonyl_distances, carbonyl_nitrogen_distances

nitrogen_alphacarbon_distances(backbone::Backbone{3}) = get_atom_distances(backbone, 1, 2, 0)
alphacarbon_carbonyl_distances(backbone::Backbone{3}) = get_atom_distances(backbone, 2, 3, 0)
carbonyl_nitrogen_distances(backbone::Backbone{3}) = get_atom_distances(backbone, 3, 1, 1)

"""
nitrogen_alphacarbon_distances(backbone::Backbone)
nitrogen_alphacarbon_distances(chain::ProteinChain)
Calculate the distances between all pairs of contiguous nitrogen and alpha-carbon atoms in a backbone.
Returns a vector of distances of length `length(backbone)`.
Calculate the distances between all pairs of contiguous nitrogen and alpha-carbon atoms in a chain.
Returns a vector of distances of length `length(chain)`.
"""
nitrogen_alphacarbon_distances(backbone::Backbone{3}) = get_atom_distances(backbone, 1, 2, 0)
nitrogen_alphacarbon_distances(chain::ProteinChain) = nitrogen_alphacarbon_distances(chain.backbone)

"""
alphacarbon_carbonyl_distances(backbone::Backbone)
alphacarbon_carbonyl_distances(chain::ProteinChain)
Calculate the distances between all pairs of contiguous alpha-carbon and carbonyl atoms in a backbone.
Returns a vector of distances of length `length(backbone)`.
Calculate the distances between all pairs of contiguous alpha-carbon and carbonyl atoms in a chain.
Returns a vector of distances of length `length(chain)`.
"""
alphacarbon_carbonyl_distances(backbone::Backbone{3}) = get_atom_distances(backbone, 2, 3, 0)
alphacarbon_carbonyl_distances(chain::ProteinChain) = alphacarbon_carbonyl_distances(chain.backbone)

"""
carbonyl_nitrogen_distances(backbone::Backbone)
carbonyl_nitrogen_distances(chain::ProteinChain)
Calculate the distances between all pairs of contiguous carbonyl and nitrogen atoms in a backbone.
Returns a vector of distances of length `length(backbone) - 1`.
Calculate the distances between all pairs of contiguous carbonyl and nitrogen atoms in a chain.
Returns a vector of distances of length `length(chain) - 1`.
"""
carbonyl_nitrogen_distances(backbone::Backbone{3}) = get_atom_distances(backbone, 3, 1, 1)
carbonyl_nitrogen_distances(chain::ProteinChain) = carbonyl_nitrogen_distances(chain.backbone)
21 changes: 0 additions & 21 deletions src/protein/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ function protein_to_pdb(protein::Vector{ProteinChain}, filename, header=:auto, f
residue_index = 0
for chain in protein
coords = NCaCO_coords(chain)
L = length(chain)
for (resnum, (residue_coords, aa)) in enumerate(zip(eachslice(coords, dims=3), chain.aavector))
resname = get(THREE_LETTER_AA_CODES, aa, "XXX")
residue_index += 1
Expand All @@ -81,26 +80,6 @@ function protein_to_pdb(protein::Vector{ProteinChain}, filename, header=:auto, f
push!(atoms, atom)
end
end

# reuses the magic vector from src/backbone/oxygen.jl
# note: the magic vector is meant to be used to calculate the O atom position,
# but this is basically using it to get the next N position,
# so it's a hacky way to get a "random" OXT atom position
# not actually random -- it has the same orientation as the next-to-last residue
index += 1
last_N, last_CA, last_C, last_O = eachcol(coords[:, :, end])
rot_matrix = get_rotation_matrix(last_CA, last_C, last_O)
OXT_pos = rot_matrix' \ magic_vector + last_C
OXT_atom = PDBTools.Atom(
index = index,
name = "OXT",
resname = get(THREE_LETTER_AA_CODES, chain.aavector[end], "XXX"),
chain = chain.id,
resnum = L,
residue = residue_index,
x = OXT_pos[1], y = OXT_pos[2], z = OXT_pos[3],
)
push!(atoms, OXT_atom)
end
PDBTools.writePDB(atoms, filename, header=header, footer=footer)
end
8 changes: 4 additions & 4 deletions src/protein/oxygen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,19 @@ function get_oxygens(
)
backbone = chain.backbone
T = eltype(backbone)
L = length(backbone)
L = size(backbone, 3)

CAs = eachcol(atom_coords(backbone, 2))
Cs = eachcol(atom_coords(backbone, 3))
next_Ns = eachcol(atom_coords(backbone[2:end], 1))
next_Ns = eachcol(@view(atom_coords(backbone, 1)[:, 2:end]))

oxygen_coords = zeros(T, 3, L)
for (i, (CA, C, next_N)) in enumerate(zip(CAs, Cs, next_Ns))
oxygen_coords[:, i] = estimate_oxygen_position(CA, C, next_N)
end
oxygen_coords[:, end] = get_last_oxygen(eachcol(backbone[end])...)
oxygen_coords[:, end] = get_last_oxygen(eachcol(@view(backbone.coords[:, :, end]))...)

return oxygen_coords
end

NCaCO_coords(chain::ProteinChain) = cat(chain.backbone, reshape(get_oxygens(chain), 3, 1, :), dims=2)
NCaCO_coords(chain::ProteinChain) = cat(chain.backbone.coords, reshape(get_oxygens(chain), 3, 1, :), dims=2)
2 changes: 1 addition & 1 deletion test/backbone/backbone.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
backbone = Backbone(coords)
@test backbone isa Backbone{4}
@test size(backbone) == (3, 4, 5)
@test length(backbone) == 5
@test length(backbone) == 20
@test backbone[1] == coords[:, :, 1]

end
Expand Down
6 changes: 5 additions & 1 deletion test/backbone/bonds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,12 @@

@testset "ChainedBonds" begin

backbone = pdb_to_protein("data/1ASS.pdb")["A"].backbone
bonds = ChainedBonds(backbone)
@test bonds == bonds
@test size(bonds) == (length(backbone) - 1,)

@testset "invertibility" begin
backbone = pdb_to_protein("data/1ASS.pdb")["A"].backbone
@test ChainedBonds(Backbone{3}(ChainedBonds(backbone))) ChainedBonds(backbone)
@test ChainedBonds(Backbone(ChainedBonds(backbone))) ChainedBonds(backbone)
end
Expand Down
23 changes: 19 additions & 4 deletions test/protein/chain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,25 @@
@testset "atom_distances" begin

protein = pdb_to_protein("data/1ASS.pdb")
backbone = protein["A"].backbone
cn_distances = carbonyl_nitrogen_distances(backbone)
@test length(cn_distances) == length(backbone) - 1
@test 1.32 <= sum(cn_distances) / length(cn_distances) <= 1.34
chain = protein["A"]

@testset "nitrogen_alphacarbon_distances" begin
NCa_distances = nitrogen_alphacarbon_distances(chain)
@test length(NCa_distances) == length(chain)
@test 1.45 <= sum(NCa_distances) / length(NCa_distances) <= 1.47
end

@testset "alphacarbon_carbonyl_distances" begin
CaC_distances = alphacarbon_carbonyl_distances(chain)
@test length(CaC_distances) == length(chain)
@test 1.51 <= sum(CaC_distances) / length(CaC_distances) <= 1.53
end

@testset "carbonyl_nitrogen_distances" begin
CN_distances = carbonyl_nitrogen_distances(chain)
@test length(CN_distances) == length(chain) - 1
@test 1.32 <= sum(CN_distances) / length(CN_distances) <= 1.34
end

end

Expand Down

0 comments on commit 7007c52

Please sign in to comment.