Skip to content

Commit

Permalink
Replace SecondaryStructure with Char
Browse files Browse the repository at this point in the history
  • Loading branch information
AntonOresten committed Nov 26, 2023
1 parent ee61b5c commit db35691
Show file tree
Hide file tree
Showing 22 changed files with 78 additions and 107 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ PDBTools = "e29189f1-7114-4dbd-93d0-c5673a921a58"
Rotations = "6038ab10-8711-5258-84ad-4b1120ba62dc"

[compat]
AssigningSecondaryStructure = "0.1"
AssigningSecondaryStructure = "0.2"
LinearAlgebra = "1"
PDBTools = "^0.15"
Rotations = "1"
Expand Down
1 change: 1 addition & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ makedocs(;
doctest = false,
pages = [
"Overview" => "index.md",
"Backbone" => "backbone.md",
"Oxygen atoms" => "oxygen.md",
],
authors = "Anton Oresten",
Expand Down
3 changes: 3 additions & 0 deletions docs/src/backbone.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Backbone

The `Backbone{N}` type wraps a three-dimensional array of size (3, N, L) representing atom coordinates of a backbone with `N` backbone atoms per residue and `L` residues.
10 changes: 7 additions & 3 deletions docs/src/oxygen.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@ The reason for having `N` as a type parameter is to make `Backbone` more flexibl
julia> using Backboner
julia> protein = pdb_to_protein("test/data/1ZAK.pdb")
2-element Protein{Float32}:
Chain A with 220 residues
Chain B with 220 residues
julia> chain = protein["A"]
Chain A with 220 residues
julia> backbone4 = chain.backbone
3×4×220 Backbone{4, Float32}:
Expand Down Expand Up @@ -40,18 +44,18 @@ julia> backbone3 = remove_column(backbone4, 4) # remove oxygen column
julia> backbone4_approx = add_oxygens(backbone3) # add oxygen column
3×4×220 Backbone{4, Float32}:
[:, :, 1] =
22.346 22.901 23.227 22.6697 # decent approximation
22.346 22.901 23.227 22.6697
17.547 18.031 16.793 15.7257
23.294 21.993 21.163 21.4295
;;; …
[:, :, 220] =
21.808 22.263 21.085 20.2198 # last oxygen position won't match original
21.808 22.263 21.085 20.2198
13.861 13.862 14.233 13.42
2.734 1.355 0.446 0.112399
```

!!! note
The `add_oxygens` function adds oxygen atoms to the backbone using idealized geometry, and oxygens atom will on average deviate [0.05 Å](https://github.com/MurrellGroup/Backboner.jl/blob/main/test/backbone/oxygen.jl) from the original positions.
Moreover, the last oxygen atom is also given a random orientation, as that information is lost when the backbone is reduced to 3 atoms.
Moreover, the last oxygen atom is also given a random orientation, as that information is lost when the backbone is reduced to 3 atoms, and there's no next nitrogen atom to compare to.
6 changes: 4 additions & 2 deletions src/Backboner.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@ using LinearAlgebra
import Rotations
import PDBTools

include("secondarystructure.jl")
export has_assigned_ss

has_assigned_ss(ssvector::Vector{Char}) = all(!=(' '), ssvector)

include("backbone/backbone.jl")
include("residue.jl")
include("chain.jl")
include("protein.jl")
include("assign.jl")
include("io.jl")
include("coordmatrix.jl")

end
11 changes: 5 additions & 6 deletions src/assign.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,13 @@ import AssigningSecondaryStructure: assign_secondary_structure!, assign_secondar
"""
assign_secondary_structure!(protein)
Uses a simplified version of DSSP to fill the secondary structure vector of each chain with Loop, Helix, and Strand.
Uses a simplified version of DSSP to fill the secondary structure vector of each chain with '-' (coil/loop), 'H' (helix), and 'E' (strand).
"""
function assign_secondary_structure!(protein::Protein)
ss_num_vectors = assign_secondary_structure([chain.backbone.coords for chain in protein])
for (chain, ss_num_vector) in zip(protein, ss_num_vectors)
ssvec = SecondaryStructure.(ss_num_vector)
@assert length(chain.ssvec) == length(ssvec)
chain.ssvec .= ssvec
ss_vectors = assign_secondary_structure([chain.backbone.coords for chain in protein])
for (chain, ssvector) in zip(protein, ss_vectors)
@assert length(chain.ssvector) == length(ssvector)
chain.ssvector .= ssvector
end
return protein
end
Expand Down
23 changes: 16 additions & 7 deletions src/backbone/backbone.jl
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
export Backbone, remove_column

"""
Backbone{N,T}
Backbone{N, T <: Real} <: AbstractArray{T, 3}
A wrapper for a 3xNxL array of coordinates of atoms.
Backbone{4} is used to store 3-dimensional coordinates of the backbone atoms (N, CA, C, O) of a protein chain.
"""
struct Backbone{N, T <: Real} <: AbstractArray{T,3}
coords::AbstractArray{T,3}
struct Backbone{N, T <: Real} <: AbstractArray{T, 3}
coords::AbstractArray{T, 3}

function Backbone{N}(coords::AbstractArray{T,3}) where {N,T}
function Backbone{N}(coords::AbstractArray{T, 3}) where {N, T}
@assert size(coords, 1) == 3 "coords must have 3 coordinates per atom"
@assert size(coords, 2) == N "The second dimension of coords must be $N"
return new{N,T}(coords)
return new{N, T}(coords)
end

Backbone(coords::AbstractArray{T,3}) where T = Backbone{size(coords, 2)}(coords)
Backbone(coords::AbstractArray{T, 3}) where T = Backbone{size(coords, 2)}(coords)
end

@inline Base.:(==)(backbone1::Backbone, backbone2::Backbone) = backbone1.coords == backbone2.coords
Expand All @@ -25,10 +25,19 @@ end
@inline Base.getindex(backbone::Backbone, i::Integer) = view(backbone.coords, :, :, i)
@inline Base.getindex(backbone::Backbone, r::UnitRange{Int}) = Backbone(view(backbone.coords, :, :, r))

function remove_column(backbone::Backbone{N,T}, i::Integer) where {N,T}
function remove_column(backbone::Backbone{N, T}, i::Integer) where {N, T}
@assert 1 <= i <= N "i must be between 1 and $N"
Backbone(view(backbone.coords, :, [1:i-1;i+1:N], :))
end

"""
atom_coord_matrix(backbone, i)
Returns the coordinates of specific columns of atoms in a backbone.
"""
function atom_coord_matrix(backbone::Backbone, i)
return view(backbone.coords, :, i, :)
end

include("rotations.jl")
include("oxygen.jl")
8 changes: 4 additions & 4 deletions src/backbone/oxygen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@ where L is the length of the backbone.
randomized orientation to preserve the length of the backbone.
"""
function add_oxygens(
backbone::Backbone{3,T},
backbone::Backbone{3, T},
) where T <: Real
L = length(backbone)

CAs = eachcol(alphacarbon_coord_matrix(backbone))
Cs = eachcol(carbon_coord_matrix(backbone))
next_Ns = eachcol(nitrogen_coord_matrix(backbone[2:end]))
CAs = eachcol(atom_coord_matrix(backbone, 2))
Cs = eachcol(atom_coord_matrix(backbone, 3))
next_Ns = eachcol(atom_coord_matrix(backbone[2:end], 1))

oxygen_coords = zeros(T, 3, L-1)
for (i, (CA, C, next_N)) in enumerate(zip(CAs, Cs, next_Ns))
Expand Down
2 changes: 1 addition & 1 deletion src/backbone/rotations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ end
Returns the locations and rotation matrices of residues in a backbone,
according to a defined standard triangle (`Backboner.STANDARD_TRIANGLE_ANGSTROM`).
"""
function backbone_to_locs_and_rots(backbone::Backbone{N,T}, unit::Symbol=:angstrom) where {N,T}
function backbone_to_locs_and_rots(backbone::Backbone{N, T}, unit::Symbol=:angstrom) where {N, T}
@assert 3 <= N <= 4 "backbone must have 3 or 4 atoms per residue"
L = length(backbone)
locations = Array{T}(undef, 3, L)
Expand Down
20 changes: 10 additions & 10 deletions src/chain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,33 @@ A chain has an identifier (usually a single letter) and holds the backbone atom
struct Chain <: AbstractVector{Residue}
id::AbstractString
backbone::Backbone{4}
aaseq::Vector{Char}
ssvec::Vector{SecondaryStructure}
aavector::Vector{Char}
ssvector::Vector{Char}

function Chain(
id::AbstractString,
backbone::Backbone{N};
aaseq::Vector{Char} = fill('G', length(backbone)),
ssvec::Union{Vector{SecondaryStructure}, Vector{<:Integer}} = fill(Unassigned, length(backbone)),
aavector::Vector{Char} = fill('G', length(backbone)),
ssvector::Union{Vector{Char}, Vector{<:Integer}} = fill(' ', length(backbone)),
) where N
@assert N == 3 || N == 4 "backbone must have 3 or 4 atoms per residue"
N == 3 && (backbone = add_oxygens(backbone))

@assert length(backbone) == length(aaseq) == length(ssvec) "backbone, aaseq, and ssvec must have the same length"
ssvec isa Vector{<:Integer} && (ssvec = SecondaryStructure.(ssvec))
@assert length(backbone) == length(aavector) == length(ssvector) "backbone, aavector, and ssvector must have the same length"
ssvector isa Vector{<:Integer} && (ssvector = get.("-HE", ssvector, ' '))

return new(id, backbone, aaseq, ssvec)
return new(id, backbone, aavector, ssvector)
end

Chain(backbone::Backbone; kwargs...) = Chain("_", backbone; kwargs...)
end

@inline Base.:(==)(chain1::Chain, chain2::Chain) = chain1.id == chain2.id && chain1.backbone == chain2.backbone && chain1.ssvec == chain2.ssvec
@inline Base.:(==)(chain1::Chain, chain2::Chain) = chain1.id == chain2.id && chain1.backbone == chain2.backbone && chain1.ssvector == chain2.ssvector
@inline Base.length(chain::Chain) = length(chain.backbone)
@inline Base.size(chain::Chain) = (length(chain),)
@inline Base.getindex(chain::Chain, i::Integer) = Residue(i, chain.backbone, chain.aaseq[i], chain.ssvec[i])
@inline Base.getindex(chain::Chain, i::Integer) = Residue(i, chain.backbone, chain.aavector[i], chain.ssvector[i])

Base.summary(chain::Chain) = "Chain $(chain.id) with $(length(chain)) residue$(length(chain) == 1 ? "" : "s")"
Base.show(io::IO, chain::Chain) = print(io, summary(chain))

has_missing_ss(chain::Chain) = has_missing_ss(chain.ssvec)
has_assigned_ss(chain::Chain) = has_assigned_ss(chain.ssvector)
20 changes: 0 additions & 20 deletions src/coordmatrix.jl

This file was deleted.

8 changes: 4 additions & 4 deletions src/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ function Chain(atoms::Vector{PDBTools.Atom})
id = PDBTools.chain(atoms[1])
@assert allequal(PDBTools.chain.(atoms)) "atoms must be from the same chain"
backbone = Backbone(atoms)
aaseq = [get(ONE_LETTER_AA_CODES, atom.resname, 'X') for atom in atoms if atom.name == "CA"]
return Chain(id, backbone, aaseq=aaseq)
aavector = [get(ONE_LETTER_AA_CODES, atom.resname, 'X') for atom in atoms if atom.name == "CA"]
return Chain(id, backbone, aavector=aavector)
end

function Protein(atoms::Vector{PDBTools.Atom})
Expand All @@ -65,7 +65,7 @@ function protein_to_pdb(protein::Protein, filename, header=:auto, footer=:auto)
residue_index = 0
for chain in protein
L = length(chain)
for (resnum, (residue_coords, aa)) in enumerate(zip(eachslice(chain.backbone.coords, dims=3), chain.aaseq))
for (resnum, (residue_coords, aa)) in enumerate(zip(eachslice(chain.backbone.coords, dims=3), chain.aavector))
resname = get(THREE_LETTER_AA_CODES, aa, "XXX")
residue_index += 1
for (name, atom_coords) in zip(["N", "CA", "C", "O"], eachcol(residue_coords))
Expand Down Expand Up @@ -95,7 +95,7 @@ function protein_to_pdb(protein::Protein, filename, header=:auto, footer=:auto)
OXT_atom = PDBTools.Atom(
index = index,
name = "OXT",
resname = get(THREE_LETTER_AA_CODES, chain.aaseq[end], "XXX"),
resname = get(THREE_LETTER_AA_CODES, chain.aavector[end], "XXX"),
chain = chain.id,
resnum = L,
residue = residue_index,
Expand Down
2 changes: 1 addition & 1 deletion src/protein.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ end

Base.summary(protein::Protein) = "Protein with $(length(protein)) chain$(length(protein) == 1 ? "" : "s")"

has_missing_ss(protein::Protein) = any(has_missing_ss, protein.chains)
has_assigned_ss(protein::Protein) = all(has_assigned_ss, protein.chains)
16 changes: 8 additions & 8 deletions src/residue.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,23 @@ export Residue
struct Residue
index::Integer
backbone::Backbone{4}
aminoacid::Char
secondarystructure::SecondaryStructure
aa::Char
ss::Char

function Residue(
index::Integer,
backbone::Backbone{4},
aminoacid::Char = 'G',
secondarystructure::SecondaryStructure = Unassigned
aa::Char = 'G',
ss::Char = ' ',
)
return new(index, backbone, aminoacid, secondarystructure)
return new(index, backbone, aa, ss)
end
end

function Base.summary(residue::Residue)
index = lpad(string(residue.index), length(string(length(residue.backbone))))
code = get(THREE_LETTER_AA_CODES, residue.aminoacid, "XXX")
ss = lpad(string(residue.secondarystructure), 6)
return "Residue $index $code $ss"
aa3 = get(THREE_LETTER_AA_CODES, residue.aa, "XXX")
ss = residue.ss
return "Residue $index $aa3 $ss"
end
Base.show(io::IO, residue::Residue) = print(io, summary(residue))
5 changes: 0 additions & 5 deletions src/secondarystructure.jl

This file was deleted.

10 changes: 5 additions & 5 deletions test/assign.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@

@testset "assign_secondary_structure!" begin
protein = pdb_to_protein("data/1ASS.pdb")
@test has_missing_ss(protein)
@test !has_assigned_ss(protein)
assign_secondary_structure!(protein)
@test !has_missing_ss(protein)
@test has_assigned_ss(protein)
end

@testset "assign_secondary_structure" begin
protein = pdb_to_protein("data/1ASS.pdb")
@test has_missing_ss(protein)
@test !has_assigned_ss(protein)
new_protein = assign_secondary_structure(protein)
@test has_missing_ss(protein)
@test !has_missing_ss(new_protein)
@test !has_assigned_ss(protein)
@test has_assigned_ss(new_protein)
end

end
8 changes: 4 additions & 4 deletions test/chain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
chain = Chain("A", backbone)
@test chain.id == "A"
@test chain.backbone.coords == coords
@test chain.aaseq == fill('G', length(chain))
@test chain.ssvec == fill(Unassigned, length(chain))
@test has_missing_ss(chain)
@test chain.aavector == fill('G', length(chain))
@test chain.ssvector == fill(' ', length(chain))
@test !has_assigned_ss(chain)
@test length(chain) == 5
@test size(chain) == (5,)
@test Chain(remove_column(backbone, 4)).backbone == add_oxygens(remove_column(backbone, 4))
@test Chain(backbone).id == "_"

@test chain[1] == Residue(1, backbone, 'G', Unassigned)
@test chain[1] == Residue(1, backbone, 'G', ' ')

@test summary(chain) == "Chain A with 5 residues"

Expand Down
10 changes: 0 additions & 10 deletions test/coordmatrix.jl

This file was deleted.

2 changes: 1 addition & 1 deletion test/protein.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
@test length(protein) == 2
@test length.(protein) == [5, 6]
@test summary(protein) == "Protein with 2 chains"
@test has_missing_ss(protein)
@test !has_assigned_ss(protein)
end

end
7 changes: 3 additions & 4 deletions test/residue.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
@testset "residue.jl" begin
backbone = Backbone(randn(3, 4, 1))
residue = Residue(1, backbone, 'A', Helix)
residue = Residue(1, backbone, 'A', 'H')

# index field get padded to digits in length of backbone, ss gets padded to 6 cause Strand is 6 chars long.
# Unassigned is longer but that's fine.
@test summary(residue) == "Residue 1 ALA Helix"
# index field get padded to digits in length of backbone
@test summary(residue) == "Residue 1 ALA H"

io = IOBuffer()
show(io, residue)
Expand Down
Loading

0 comments on commit db35691

Please sign in to comment.