Skip to content

Commit

Permalink
Reorg and removed SSClass type
Browse files Browse the repository at this point in the history
  • Loading branch information
anton083 committed Nov 1, 2023
1 parent da793b6 commit aa6b415
Show file tree
Hide file tree
Showing 8 changed files with 85 additions and 5,408 deletions.
22 changes: 8 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,18 @@

This package provides a quick way to assign secondary structure using the [DSSP](https://swift.cmbi.umcn.nl/gv/dssp/) algorithm. The code was ported from the [PyDSSP](https://github.com/ShintaroMinami/PyDSSP) package.

This is not a complete implementation of DSSP, as it only assigns '-' for loops, 'H' for alpha helices, and 'E' for beta strands. In spite of that, it matches the original DSSP to a large extent, with the added advantage of being more than 10x faster. For the full DSSP algorithm, check out [BioStructures.jl](https://github.com/BioJulia/BioStructures.jl) or [ProteinSecondaryStructures.jl](https://github.com/m3g/ProteinSecondaryStructures.jl), which both use the [DSSP_jll.jl](https://docs.juliahub.com/General/DSSP_jll/stable/) package that was auto-generated using [BinaryBuilder.jl](https://github.com/JuliaPackaging/BinaryBuilder.jl).
This is not a complete implementation of DSSP, as it only assigns loops (1), helices (2), and strands (3). It is not as accurate as the original, but is significantly faster. For the full DSSP algorithm, check out [BioStructures.jl](https://github.com/BioJulia/BioStructures.jl) or [ProteinSecondaryStructures.jl](https://github.com/m3g/ProteinSecondaryStructures.jl), which both use the [DSSP_jll.jl](https://docs.juliahub.com/General/DSSP_jll/stable/) package that was auto-generated using [BinaryBuilder.jl](https://github.com/JuliaPackaging/BinaryBuilder.jl).

```julia
julia> dssp("test/data/1ASS.pdb") # 1 chain
1-element Vector{Vector{SSClass}}:
[Loop, Loop, Loop, Strand, Strand, Strand Strand, Strand, Strand, Loop, Loop, Loop]
1-element Vector{Vector{Int64}}:
[1, 1, 1, 3, 3, 3, 1, 1, 1, 1 3, 3, 3, 3, 3, 3, 3, 1, 1, 1]

julia> string.(dssp("test/data/1ASS.pdb")) # 1 chain, Vector{SSClass} converted to string
1-element Vector{String}:
"---EEE-----------EEE-EEEEEE---E" 90 bytes "--------EEE-EEEEEEE--EEEEEEE---"

julia> string.(dssp("test/data/3GOU.pdb")) # 4 chains
4-element Vector{String}:
"---HHHHHHHHHHHHHH---HHHHHHHHHHH" 79 bytes "HH------HHHHHHHHHHHHHHHHHH-----"
"---HHHHHHHHHHHHHH---HHHHHHHHHHH" 84 bytes "---HHHHHHHHHHHHHHHHHH---------H"
"---HHHHHHHHHHHHHH---HHHHHHHHHHH" 79 bytes "HH------HHHHHHHHHHHHHHHHHH-----"
"---HHHHHHHHHHHHHH---HHHHHHHHHHH" 84 bytes "---HHHHHHHHHHHHHHHHHH---------H"
julia> dssp("test/data/3GOU.pdb") # 2 chains
2-element Vector{Vector{Int64}}:
[1, 1, 1, 1, 3, 3, 3, 3, 3, 3 2, 2, 2, 2, 2, 2, 2, 1, 1, 1]
[1, 1, 1, 1, 3, 3, 3, 3, 3, 3 2, 2, 2, 2, 2, 2, 2, 1, 1, 1]
```

## References
- [Kabsch W, Sander C (1983). Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features. Biopolymers. 22 (12): 2577–2637.](https://doi.org/10.1002/bip.360221211)
- [Kabsch W, Sander C (1983). Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features. Biopolymers. 22 (12): 2577–2637.](https://doi.org/10.1002/bip.360221211)
3 changes: 1 addition & 2 deletions src/AssigningSecondaryStructure.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
module AssigningSecondaryStructure

include("ssclass.jl")
include("utils.jl")
include("dssp.jl")
include("pdb.jl")
include("io.jl")

end
47 changes: 25 additions & 22 deletions src/dssp.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
using LinearAlgebra
using PaddedViews
# Ported from https://github.com/ShintaroMinami/PyDSSP

export dssp

using LinearAlgebra
using PaddedViews

const Q1Q2_F = 0.084 * 332
const DEFAULT_CUTOFF = -0.5
const DEFAULT_MARGIN = 1.0
Expand Down Expand Up @@ -74,14 +76,18 @@ function _get_hbond_map(
return hbond_map
end

# currently not differentiable cause we use bitwise operators
"""
dssp(coords_chains::Vararg{AbstractArray{T, 3}, N})
dssp(coords)
Takes a variable number of chains, each of which is a 3D array of shape `(3, 4, residue_count)`.
Returns a Vector{Vector{SSClass}}, where the outer vector is the number of chains,
and the inner vector is the secondary structure class of each residue.
"""
function dssp(coords::AbstractArray{T, 3}) where T
@assert size(coords, 1) == 3
@assert size(coords, 2) == 4

coords = permutedims(coords, (3, 2, 1))

hbmap = _get_hbond_map(coords)
Expand All @@ -96,48 +102,45 @@ function dssp(coords::AbstractArray{T, 3}) where T
h3 = collect(_pad(false, @view(turn3[1:end-1]) .& @view(turn3[2:end]), (1, 3)))
h4 = collect(_pad(false, @view(turn4[1:end-1]) .& @view(turn4[2:end]), (1, 4)))
h5 = collect(_pad(false, @view(turn5[1:end-1]) .& @view(turn5[2:end]), (1, 5)))

@assert length(h3) == length(h4) == length(h5)

# Helix4 first
helix4 = h4 .| circshift(h4, 1) .| circshift(h4, 2) .| circshift(h4, 3)
h3 .&= .!circshift(helix4, 1) .& .!helix4
h5 .&= .!circshift(helix4, 1) .& .!helix4

helix3 = h3 .| circshift(h3, 1) .| circshift(h3, 2)
helix5 = h5 .| circshift(h5, 1) .| circshift(h5, 2) .| circshift(h5, 3) .| circshift(h5, 4)

# Identify bridge
unfoldmap = _unfold(_unfold(hbmap, 3, -2), 3, -2) .> 0
unfoldmap_rev = permutedims(unfoldmap, (2, 1, 3, 4))

p_bridge = (unfoldmap[:, :, 1, 2] .& unfoldmap_rev[:, :, 2, 3]) .| (unfoldmap_rev[:, :, 1, 2] .& unfoldmap[:, :, 2, 3])
p_bridge = _pad(false, p_bridge, (1,1), (1,1))

a_bridge = (unfoldmap[:, :, 2, 2] .& unfoldmap_rev[:, :, 2, 2]) .| (unfoldmap[:, :, 1, 3] .& unfoldmap_rev[:, :, 1, 3])
a_bridge = _pad(false, a_bridge, (1,1), (1,1))

# Ladder
ladder = dropdims(reduce(|, p_bridge .| a_bridge, dims=2), dims=2)
# H, E, L of C3
helix = helix3 .| helix4 .| helix5
strand = ladder
loop = .!helix .& .!strand
classes = SSClass.(findfirst.(eachrow(cat(loop, helix, strand, dims=2))))
return classes

num_vector = findfirst.(eachrow(hcat(loop, helix, strand)))

return num_vector
end

function dssp(coords_chains::Vector{<:AbstractArray{T, 3}}) where T
chain_lengths = size.(coords_chains, 3)
lengths = size.(coords_chains, 3)

coords = cat(coords_chains..., dims=3)
classes = dssp(coords)
num_vector = dssp(coords)

classes_chains = Vector{SSClass}[]
i = 0
for l in chain_lengths
push!(classes_chains, classes[i+1:i+l])
end

return classes_chains
cum_indices = cumsum(lengths)
num_vectors_by_chain = [num_vector[get(cum_indices, n-1, 0)+1:cum_indices[n]] for n in 1:length(lengths)]

return num_vectors_by_chain
end
49 changes: 49 additions & 0 deletions src/io.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import PDBTools

export load_pdb_backbone_coords

function collect_ncaco(atoms::Vector{PDBTools.Atom})
residues = Vector{PDBTools.Atom}[]
i = 1
while i <= length(atoms) - 3 # Ensure there are at least four atoms left to process
# Check if the next four atoms are N, CA, C, O in order
if atoms[i].name == "N" && atoms[i+1].name == "CA" && atoms[i+2].name == "C" && atoms[i+3].name == "O" &&
all(==(PDBTools.resnum(atoms[i])), PDBTools.resnum.(atoms[i+1:i+3]))
push!(residues, atoms[i:i+3])
i += 4
else
i += 1
end
end
return residues
end

function chain_coords(id::AbstractString, atoms::Vector{PDBTools.Atom})
chain_atoms = filter(a -> PDBTools.chain(a) == id, atoms)
residues = collect_ncaco(chain_atoms)
coords = zeros(Float32, (3, 4, length(residues)))
for (i, residue) in enumerate(residues)
for (j, atom) in enumerate(residue)
coords[:, j, i] = [atom.x, atom.y, atom.z]
end
end
return coords
end

"""
load_pdb_backbones(filename::String)
Assumes that each residue starts with four atoms: N, CA, C, O.
"""
function load_pdb_backbone_coords(filename::String)
atoms = PDBTools.readPDB(filename)
filter!(a -> a.name in ["N", "CA", "C", "O"], atoms)
ids = unique(PDBTools.chain.(atoms))
chains = [chain_coords(id, atoms) for id in ids]
return chains
end

function dssp(filename::String)
chains = load_pdb_backbone_coords(filename)
return dssp(chains)
end
57 changes: 0 additions & 57 deletions src/pdb.jl

This file was deleted.

20 changes: 0 additions & 20 deletions src/ssclass.jl

This file was deleted.

Loading

0 comments on commit aa6b415

Please sign in to comment.