Skip to content

Commit

Permalink
Merge pull request #1 from MurrellGroup/dev
Browse files Browse the repository at this point in the history
assign_secondary_structure
  • Loading branch information
AntonOresten authored Nov 5, 2023
2 parents a064ab8 + 39814fd commit 739b785
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 43 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ PDBTools = "e29189f1-7114-4dbd-93d0-c5673a921a58"
PaddedViews = "5432bcbf-9aad-5242-b902-cca2824c8663"

[compat]
LinearAlgebra = ">=0"
PDBTools = "^0.15"
PaddedViews = "^0.5"
julia = "1"
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ This package provides a quick way to assign secondary structure using a simplifi
This is not a complete implementation of DSSP, as it only assigns loops (1), helices (2), and strands (3). It is not as accurate as the original, but is significantly faster. For the full DSSP algorithm, check out [BioStructures.jl](https://github.com/BioJulia/BioStructures.jl) or [ProteinSecondaryStructures.jl](https://github.com/m3g/ProteinSecondaryStructures.jl), which both use the [DSSP_jll.jl](https://docs.juliahub.com/General/DSSP_jll/stable/) package that was auto-generated using [BinaryBuilder.jl](https://github.com/JuliaPackaging/BinaryBuilder.jl).

```julia
julia> dssp("test/data/1ASS.pdb") # 1 chain
julia> assign_secondary_structure("test/data/1ASS.pdb") # 1 chain
1-element Vector{Vector{Int64}}:
[1, 1, 1, 3, 3, 3, 1, 1, 1, 1 3, 3, 3, 3, 3, 3, 3, 1, 1, 1]

julia> dssp("test/data/1ZAK.pdb") # 2 chains
julia> assign_secondary_structure("test/data/1ZAK.pdb") # 2 chains
2-element Vector{Vector{Int64}}:
[1, 1, 1, 1, 3, 3, 3, 3, 3, 3 2, 2, 2, 2, 2, 2, 2, 1, 1, 1]
[1, 1, 1, 1, 3, 3, 3, 3, 3, 3 2, 2, 2, 2, 2, 2, 2, 1, 1, 1]
Expand Down
1 change: 1 addition & 0 deletions src/AssigningSecondaryStructure.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ module AssigningSecondaryStructure
include("utils.jl")
include("dssp.jl")
include("io.jl")
include("assign.jl")

end
39 changes: 39 additions & 0 deletions src/assign.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
export assign_secondary_structure!, assign_secondary_structure

function assign_secondary_structure! end

"""
assign_secondary_structure(coords_chains)
Given a vector of chains, each represented as a 3-dimensional array of size 3x4xL, this function assigns the secondary structure to each residue. In these arrays:
- The first dimension corresponds to the x, y, and z coordinates of the atoms.
- The second dimension represents the atom type, ordered as N, CA, C, and O.
- The third dimension specifies the residue number in the chain.
"""
function assign_secondary_structure(coords_chains::Vector{<:AbstractArray{T, 3}}) where T
lengths = size.(coords_chains, 3)

coords = cat(coords_chains..., dims=3)
num_vector = dssp(coords)

cum_indices = cumsum(lengths)
num_vectors_by_chain = [num_vector[get(cum_indices, n-1, 0)+1:cum_indices[n]] for n in 1:length(lengths)]

return num_vectors_by_chain
end

"""
assign_secondary_structure(filename)
Returns a vector of vectors of integers, each of which is the secondary structure assignment
for the corresponding chain and their respective residues.
The integers are assigned as follows:
- 1: loop
- 2: helix
- 3: strand
"""
function assign_secondary_structure(filename::String)
chains = load_pdb_backbone_coords(filename)
return assign_secondary_structure(chains)
end
25 changes: 2 additions & 23 deletions src/dssp.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# Ported from https://github.com/ShintaroMinami/PyDSSP

export dssp

using LinearAlgebra
using PaddedViews

Expand Down Expand Up @@ -71,7 +69,7 @@ function _get_hbond_map(
return hbond_map
end

# currently not differentiable cause we use bitwise operators
# not differentiable like the PyDSSP version cause we use bitwise operators
function dssp(coords::AbstractArray{T, 3}) where T
@assert size(coords, 1) == 3
@assert size(coords, 2) == 4
Expand Down Expand Up @@ -119,23 +117,4 @@ function dssp(coords::AbstractArray{T, 3}) where T
num_vector = findfirst.(eachrow(hcat(loop, helix, strand)))

return num_vector
end

"""
dssp(coords_chains)
Takes a vector of chains, each of which is a 3D array of shape `(3, 4, residue_count)`
where the first dimension is the x, y, z coordinates, the second dimension is the atom type,
in the order N, CA, C, O, and the third dimension is the residue number.
"""
function dssp(coords_chains::Vector{<:AbstractArray{T, 3}}) where T
lengths = size.(coords_chains, 3)

coords = cat(coords_chains..., dims=3)
num_vector = dssp(coords)

cum_indices = cumsum(lengths)
num_vectors_by_chain = [num_vector[get(cum_indices, n-1, 0)+1:cum_indices[n]] for n in 1:length(lengths)]

return num_vectors_by_chain
end
end
16 changes: 0 additions & 16 deletions src/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,4 @@ function load_pdb_backbone_coords(filename::String)
ids = unique(PDBTools.chain.(atoms))
chains = [chain_coords(id, atoms) for id in ids]
return chains
end

"""
dssp(filename)
Returns a vector of vectors of integers, each of which is the secondary structure assignment
for the corresponding chain and their respective residues.
The integers are assigned as follows:
- 1: loop
- 2: helix
- 3: strand
"""
function dssp(filename::String)
chains = load_pdb_backbone_coords(filename)
return dssp(chains)
end
6 changes: 4 additions & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,19 @@ ss_composition(ss::Vector{Int}) = [count(==(i), ss) for i in 1:3]
end

@testset "DSSP" begin

@testset "1ASS" begin
ss = dssp("data/1ASS.pdb")
ss = assign_secondary_structure("data/1ASS.pdb")
@test length(ss) == 1
@test ss_composition.(ss) == [[60, 53, 39]]
end

@testset "1ZAK" begin
ss = dssp("data/1ZAK.pdb")
ss = assign_secondary_structure("data/1ZAK.pdb")
@test length(ss) == 2
@test ss_composition.(ss) == [[72, 116, 32], [72, 116, 32]]
end

end

end

2 comments on commit 739b785

@AntonOresten
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request updated: JuliaRegistries/General/94760

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.1.0 -m "<description of version>" 739b7855d56ee77ef63fb875d9f097052c6cd8c2
git push origin v0.1.0

Please sign in to comment.