Skip to content

Commit

Permalink
Store non-backbone atoms in Chain
Browse files Browse the repository at this point in the history
  • Loading branch information
AntonOresten committed Apr 13, 2024
1 parent 1f2da0b commit 9475f93
Show file tree
Hide file tree
Showing 11 changed files with 104 additions and 67 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Backboner"
uuid = "9ac9c2a2-1cfe-46d3-b3fd-6fa470ea56a7"
authors = ["Anton Oresten <anton.oresten42@gmail.com>"]
version = "0.9.5"
version = "0.9.6"

[deps]
BioStructures = "de9282ab-8554-53be-b2d6-f6c222edabfc"
Expand Down
16 changes: 16 additions & 0 deletions src/protein/atom.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
export Atom

struct Atom
name::String
coords::AbstractVector{<:Real}

function Atom(name::AbstractString, coords::AbstractVector{<:Real})
length(coords) == 3 || throw(ArgumentError("coords must be a 3-vector"))
return new(strip(name), coords)
end
end

Atom(atom::BioStructures.Atom) = Atom(atom.name, atom.coords)

Base.summary(atom::Atom) = "Atom $(atom.name) at $(atom.coords)"
Base.show(io::IO, atom::Atom) = print(io, summary(atom))
24 changes: 19 additions & 5 deletions src/protein/chain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export phi_angles, psi_angles, omega_angles
A `Chain` represents a chain of a protein, and is a vector of `Residue`s, which are instantiated from indexing the chain.
## Fields
- `id::AbstractString`: A string identifier (usually a single letter).
- `id::String`: A string identifier (usually a single letter).
- `backbone::Backbone`: A backbone with a length divisible by 3, to ensure 3 atoms per residue (N, Ca, C).
- `modelnum::Int`: The model number of the chain.
- `resnums::Vector{Int}`: storing the residue numbers.
Expand All @@ -22,8 +22,9 @@ struct Chain <: AbstractVector{Residue}
backbone::Backbone
modelnum::Int
resnums::Vector{Int}
aavector::Vector{Char} # TODO: switch to BioStructures.LongAA (from BioSequences)
aavector::Vector{Char}
ssvector::Vector{Char}
residue_atoms_dict::Dict{Int, Vector{Atom}}

function Chain(
backbone::Backbone;
Expand All @@ -32,14 +33,17 @@ struct Chain <: AbstractVector{Residue}
resnums::Vector{Int} = collect(1:length(backbone) ÷ 3),
aavector::Vector{Char} = fill('G', length(backbone) ÷ 3),
ssvector::Union{Vector{Char}, Vector{<:Integer}} = fill(' ', length(backbone) ÷ 3),
residue_atoms_dict::Dict{Int, Vector{Atom}} = Dict{Int, Vector{Atom}}(i => Atom[] for i in resnums),
)
L, r = divrem(length(backbone), 3)
iszero(r) || throw(ArgumentError("backbone must have a length divisible by 3"))
length(resnums) == L || throw(ArgumentError("length of resnums must be equal to length of backbone divided by 3"))
length(aavector) == L || throw(ArgumentError("length of aavector must be equal to length of backbone divided by 3"))
length(ssvector) == L || throw(ArgumentError("length of ssvector must be equal to length of backbone divided by 3"))
ssvector isa Vector{<:Integer} && (ssvector = get.(('-', 'H', 'E'), ssvector, ' '))
return new(id, backbone, modelnum, resnums, aavector, ssvector)
chain = new(id, backbone, modelnum, resnums, aavector, ssvector, residue_atoms_dict)
assign_missing_backbone_atoms!(chain)
return chain
end

Chain(id::AbstractString, backbone::Backbone; kwargs...) = Chain(backbone; id=id, kwargs...)
Expand All @@ -48,7 +52,7 @@ end
@inline Base.:(==)(chain1::Chain, chain2::Chain) = chain1.id == chain2.id && chain1.backbone == chain2.backbone && chain1.ssvector == chain2.ssvector
@inline Base.length(chain::Chain) = length(chain.backbone) ÷ 3
@inline Base.size(chain::Chain) = Tuple(length(chain))
@inline Base.getindex(chain::Chain, i::Integer) = Residue(chain.resnums[i], chain.aavector[i], chain.ssvector[i])
@inline Base.getindex(chain::Chain, i::Integer) = Residue(chain.resnums[i], chain.residue_atoms_dict[chain.resnums[i]], chain.aavector[i], chain.ssvector[i])

Base.summary(chain::Chain) = "Chain $(chain.id) with $(length(chain)) residue$(length(chain) == 1 ? "" : "s")"
Base.show(io::IO, chain::Chain) = print(io, summary(chain))
Expand All @@ -60,7 +64,17 @@ has_assigned_ss(ssvector::Vector{Char}) = all(!=(' '), ssvector)
has_assigned_ss(chain::Chain) = has_assigned_ss(chain.ssvector)
has_assigned_ss(protein::AbstractVector{Chain}) = all(has_assigned_ss, protein)

Backboner.is_knotted(chain::Chain) = is_knotted(chain.backbone[2:3:end])
Backboner.is_knotted(chain::Chain) = is_knotted(@view(chain.backbone[2:3:end]))

function assign_missing_backbone_atoms!(chain::Chain)
reshaped_backbone = reshape(chain.backbone.coords, 3, 3, :)
for (residue_backbone_coords, residue) in zip(eachslice(reshaped_backbone, dims=3), chain)
atom_names = map(atom -> atom.name, residue.atoms)
for (name, coords) in Iterators.reverse(zip(BACKBONE_ATOM_NAMES, eachcol(residue_backbone_coords)))
!any(==(name), atom_names) && insert!(residue.atoms, 1, Atom(name, coords))
end
end
end

# oxygen_coords function in src/protein/oxygen.jl
# TODO: hydrogen_coords
Expand Down
49 changes: 31 additions & 18 deletions src/protein/oxygen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,17 @@ end

const IDEALIZED_OXYGEN_VECTOR = [-0.672, -1.034, 0.003]

function estimate_oxygen_position(
function _estimate_oxygen_position(
CA::AbstractVector{T}, C::AbstractVector{T}, next_N::AbstractVector{T},
) where T <: Real
R = get_rotation_matrix(CA, C, next_N)
return R' \ IDEALIZED_OXYGEN_VECTOR + C # inverse of R' * (oxygen_pos - C)
return R * IDEALIZED_OXYGEN_VECTOR + C # inverse of R' * (oxygen_pos - C)
end

# last oxygen is put on the same plane as the last residue triangle
# 1.2 angstroms away from the C atom
# with a 120 degree angle between the C-Ca and C-O bonds
function get_last_oxygen(
function _estimate_oxygen_position_last(
N_pos::V, Ca_pos::V, C_pos::V,
) where V <: AbstractVector{T} where T <: Real
angle = 2π/3
Expand All @@ -38,29 +38,34 @@ function get_last_oxygen(
w = cross(v, Ca_pos - N_pos)
u = cross(w, v)
O_pos = C_pos + normalize(u)*bond_length*cos(angle - 0.5π) - normalize(v)*bond_length*sin(angle - 0.5π)

return O_pos
end

function oxygen_coords(backbone::Backbone)
T = eltype(eltype(backbone))
L = length(backbone) ÷ 3

CAs = eachcol(alphacarbon_coords(backbone))
Cs = eachcol(carbonyl_coords(backbone))
next_Ns = eachcol(@view nitrogen_coords(backbone)[:, 2:end])
oxygen_coords = zeros(T, 3, L)
for (i, (CA, C, next_N)) in enumerate(zip(CAs, Cs, next_Ns))
oxygen_coords[:, i] = estimate_oxygen_position(CA, C, next_N)
function estimate_oxygen_position(chain::Chain, i::Int)
1 <= i <= length(chain) || throw(ArgumentError("Index $i out of bounds for chain of length $(length(chain))"))
chain[i].atoms
if i == length(chain)
N = chain.backbone[3*(i-1)+1]
CA = chain.backbone[3*(i-1)+2]
C = chain.backbone[3*(i-1)+3]
return _estimate_oxygen_position_last(N, CA, C)
else
CA = chain.backbone[3*(i-1)+2]
C = chain.backbone[3*(i-1)+3]
next_N = chain.backbone[3*(i-1)+4]
return _estimate_oxygen_position(CA, C, next_N)
end
oxygen_coords[:, end] = get_last_oxygen(eachcol(backbone[end-2:end])...)
end

return oxygen_coords
function get_oxygen(chain::Chain, i::Int)
residue = chain[i]
k = findfirst(==("O") (atom -> atom.name), residue.atoms)
isnothing(k) && return estimate_oxygen_position(chain, i)
return residue.atoms[k].coords
end

"""
oxygen_coords(chain::Chain)
oxygen_coords(backbone::Backbone)
Add oxygen atoms to the backbone of a protein, turning the coordinate array from size 3x3xL to 3x4xL-1,
where L is the length of the backbone.
Expand All @@ -83,6 +88,14 @@ julia> oxygen_coords(chains["A"]) # returns the estimated position of oxygen ato
The `oxygen_coords` function finds the oxygen atoms to the backbone using idealized geometry, and oxygens atom will on average deviate [0.05 Å](https://github.com/MurrellGroup/Backboner.jl/blob/main/test/protein/oxygen.jl) from original PDB positions.
Moreover, the last oxygen atom is essentially given a random (although deterministic) orientation, as that information is lost when the backbone is reduced to 3 atoms, and there's no next nitrogen atom to compare with.
"""
oxygen_coords(chain::Chain) = oxygen_coords(chain.backbone)
function oxygen_coords(chain::Chain)
return stack(get_oxygen(chain, i) for i in 1:length(chain))
end

function assign_missing_oxygens!(chain::Chain)
for (i, residue) in enumerate(chain)
!any(==("O") (atom -> atom.name), residue.atoms) && insert!(residue.atoms, 4, Atom("O", estimate_oxygen_position(chain, i)))
end
end

ncaco_coords(chain::Chain) = cat(reshape(chain.backbone.coords, 3, 3, :), reshape(oxygen_coords(chain), 3, 1, :), dims=2)
42 changes: 27 additions & 15 deletions src/protein/pdb.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ backboneselector(res::BioStructures.AbstractResidue) =

getresidues(chain::BioStructures.Chain, res_selector) = BioStructures.collectresidues(chain, res_selector)

atomcoords(res, atom_name)::Vector = BioStructures.collectatoms(res, atom -> BioStructures.atomnameselector(atom, [atom_name])) |> only |> BioStructures.coords
atomcoords(res, atom_name) = BioStructures.collectatoms(res, atom -> BioStructures.atomnameselector(atom, [atom_name])) |> only |> BioStructures.coords

backbonecoords(res::BioStructures.AbstractResidue)::Matrix = stack(AT -> atomcoords(res, AT), BACKBONE_ATOM_NAMES)

Expand All @@ -27,13 +27,22 @@ protein_backbone(chain::BioStructures.Chain, res_selector) = Backboner.Backbone(
aminoacid_sequence(residues::Vector{<:BioStructures.AbstractResidue}) = map(oneletter_resname, residues)
aminoacid_sequence(chain::BioStructures.Chain, res_selector) = aminoacid_sequence(getresidues(chain, res_selector))

function get_residue_atoms_dict(residues::Vector{<:BioStructures.AbstractResidue})
atoms = Dict{Int, Vector{Atom}}()
for res in residues
append!(get!(atoms, res.number, Atom[]), Atom.(BioStructures.collectatoms(res, BioStructures.standardselector)))
end
return atoms
end

function Protein.Chain(residues::Vector{<:BioStructures.AbstractResidue})
id = only(unique(BioStructures.chainid.(residues)))
backbone = protein_backbone(residues)
aavector = aminoacid_sequence(residues)
resnums = BioStructures.resnumber.(residues)
modelnum = only(unique(BioStructures.modelnumber.(residues)))
return Protein.Chain(id, backbone; resnums, aavector, modelnum)
residue_atoms_dict = get_residue_atoms_dict(residues)
return Protein.Chain(id, backbone; resnums, aavector, modelnum, residue_atoms_dict)
end

function Protein.Chain(chain::BioStructures.Chain; res_selector=backboneselector)
Expand Down Expand Up @@ -62,7 +71,6 @@ function readpdb(pdbfile::String)
return chains(struc)
end


import PDBTools

"""
Expand All @@ -71,30 +79,34 @@ import PDBTools
Write a protein (represented as a `Vector{Protein.Chain}`s) to a PDB file.
"""
function writepdb(protein::Vector{Chain}, filename, header=:auto, footer=:auto)
atoms = PDBTools.Atom[]
all_atoms = PDBTools.Atom[]
index = 0
residue_index = 0
for chain in protein
coords = ncaco_coords(chain)
for (residue, res_coords) in zip(chain, eachslice(coords, dims=3))
residue_backbone_coords = reshape(chain.backbone.coords, 3, 3, :)
assign_missing_oxygens!(chain)
for (i, residue) in enumerate(chain)
resname = get(threeletter_aa_names, residue.aa, "XXX")
residue_index += 1
for (name, atomcoords) in zip(["N", "CA", "C", "O"], eachcol(res_coords))
residue_atoms = [
[Atom(name, coords) for (name, coords) in zip(BACKBONE_ATOM_NAMES, eachcol(view(residue_backbone_coords, :, :, i)))];
filter(a -> !(a.name in BACKBONE_ATOM_NAMES), residue.atoms)
]
for atom in residue_atoms
index += 1
atom = PDBTools.Atom(
push!(all_atoms, PDBTools.Atom(
index = index,
name = name,
name = atom.name,
resname = resname,
chain = chain.id,
resnum = residue.num,
residue = residue_index,
x = atomcoords[1],
y = atomcoords[2],
z = atomcoords[3],
)
push!(atoms, atom)
x = atom.coords[1],
y = atom.coords[2],
z = atom.coords[3],
))
end
end
end
PDBTools.writePDB(atoms, filename, header=header, footer=footer)
PDBTools.writePDB(all_atoms, filename, header=header, footer=footer)
end
1 change: 1 addition & 0 deletions src/protein/protein.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module Protein
using ..Backboner
import BioStructures

include("atom.jl")
include("residue.jl")
include("chain.jl")
include("oxygen.jl")
Expand Down
11 changes: 6 additions & 5 deletions src/protein/residue.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,20 @@ export Residue

const threeletter_aa_names = Dict{Char, String}([Char(v) => k for (k, v) in BioStructures.threeletter_to_aa])

struct Residue
struct Residue #<: AbstractVector{Atom}
num::Integer
atoms::Vector{Atom}
aa::Char
ss::Char

function Residue(num::Integer, aa::Char='G', ss::Char=' ')
return new(num, aa, ss)
function Residue(num::Integer, atoms::Vector{Atom}, aa::Char='G', ss::Char=' ')
return new(num, atoms, aa, ss)
end
end

function Base.show(io::IO, residue::Residue)
num = residue.num
aa = get(threeletter_aa_names, residue.aa, "XXX")
ss = residue.ss
print(io, "Residue $num $aa $ss")
ss = residue.ss == ' ' ? "" : "($(residue.ss))"
print(io, "Residue $num $aa with $(length(residue.atoms)) atom$(length(residue.atoms) == 1 ? "" : "s") $ss")
end
2 changes: 1 addition & 1 deletion test/protein/chain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
protein = [chain]
@test protein["A"] == protein[:A] == protein[1]

@test chain[1] == Residue(1, 'G', ' ')
@test chain[1] == Residue(1, chain.residue_atoms_dict[1], 'G', ' ')

@test summary(chain) == "Chain A with 5 residues"

Expand Down
1 change: 0 additions & 1 deletion test/protein/protein.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,4 @@ include("residue.jl")
include("chain.jl")
include("oxygen.jl")
include("pdb.jl")
include("rotations.jl")
include("idealization.jl")
4 changes: 2 additions & 2 deletions test/protein/residue.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
@testset "residue.jl" begin
residue = Residue(1, 'A', 'H')
residue = Residue(1, Atom[], 'A', 'H')

io = IOBuffer()
show(io, residue)
@test String(take!(io)) == "Residue 1 ALA H"
@test String(take!(io)) == "Residue 1 ALA with 0 atoms (H)"
end
19 changes: 0 additions & 19 deletions test/protein/rotations.jl

This file was deleted.

2 comments on commit 9475f93

@AntonOresten
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register

Release notes:

  • Store non-backbone atoms in chains

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/104833

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.9.6 -m "<description of version>" 9475f9336438cbd802387ff7a004bb5fb04a67d9
git push origin v0.9.6

Please sign in to comment.