diff --git a/Project.toml b/Project.toml index 4c720b3..01cc05d 100644 --- a/Project.toml +++ b/Project.toml @@ -9,6 +9,7 @@ PDBTools = "e29189f1-7114-4dbd-93d0-c5673a921a58" PaddedViews = "5432bcbf-9aad-5242-b902-cca2824c8663" [compat] +LinearAlgebra = ">=0" PDBTools = "^0.15" PaddedViews = "^0.5" julia = "1" diff --git a/README.md b/README.md index d9a3adb..87d19d8 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,11 @@ This package provides a quick way to assign secondary structure using a simplifi This is not a complete implementation of DSSP, as it only assigns loops (1), helices (2), and strands (3). It is not as accurate as the original, but is significantly faster. For the full DSSP algorithm, check out [BioStructures.jl](https://github.com/BioJulia/BioStructures.jl) or [ProteinSecondaryStructures.jl](https://github.com/m3g/ProteinSecondaryStructures.jl), which both use the [DSSP_jll.jl](https://docs.juliahub.com/General/DSSP_jll/stable/) package that was auto-generated using [BinaryBuilder.jl](https://github.com/JuliaPackaging/BinaryBuilder.jl). ```julia -julia> dssp("test/data/1ASS.pdb") # 1 chain +julia> assign_secondary_structure("test/data/1ASS.pdb") # 1 chain 1-element Vector{Vector{Int64}}: [1, 1, 1, 3, 3, 3, 1, 1, 1, 1 … 3, 3, 3, 3, 3, 3, 3, 1, 1, 1] -julia> dssp("test/data/1ZAK.pdb") # 2 chains +julia> assign_secondary_structure("test/data/1ZAK.pdb") # 2 chains 2-element Vector{Vector{Int64}}: [1, 1, 1, 1, 3, 3, 3, 3, 3, 3 … 2, 2, 2, 2, 2, 2, 2, 1, 1, 1] [1, 1, 1, 1, 3, 3, 3, 3, 3, 3 … 2, 2, 2, 2, 2, 2, 2, 1, 1, 1] diff --git a/src/AssigningSecondaryStructure.jl b/src/AssigningSecondaryStructure.jl index 0395bf8..cd8d7e3 100644 --- a/src/AssigningSecondaryStructure.jl +++ b/src/AssigningSecondaryStructure.jl @@ -3,5 +3,6 @@ module AssigningSecondaryStructure include("utils.jl") include("dssp.jl") include("io.jl") +include("assign.jl") end diff --git a/src/assign.jl b/src/assign.jl new file mode 100644 index 0000000..b7bfb53 --- /dev/null +++ b/src/assign.jl @@ -0,0 +1,39 @@ +export assign_secondary_structure!, assign_secondary_structure + +function assign_secondary_structure! end + +""" + assign_secondary_structure(coords_chains) + +Given a vector of chains, each represented as a 3-dimensional array of size 3x4xL, this function assigns the secondary structure to each residue. In these arrays: +- The first dimension corresponds to the x, y, and z coordinates of the atoms. +- The second dimension represents the atom type, ordered as N, CA, C, and O. +- The third dimension specifies the residue number in the chain. +""" +function assign_secondary_structure(coords_chains::Vector{<:AbstractArray{T, 3}}) where T + lengths = size.(coords_chains, 3) + + coords = cat(coords_chains..., dims=3) + num_vector = dssp(coords) + + cum_indices = cumsum(lengths) + num_vectors_by_chain = [num_vector[get(cum_indices, n-1, 0)+1:cum_indices[n]] for n in 1:length(lengths)] + + return num_vectors_by_chain +end + +""" + assign_secondary_structure(filename) + +Returns a vector of vectors of integers, each of which is the secondary structure assignment +for the corresponding chain and their respective residues. + +The integers are assigned as follows: +- 1: loop +- 2: helix +- 3: strand +""" +function assign_secondary_structure(filename::String) + chains = load_pdb_backbone_coords(filename) + return assign_secondary_structure(chains) +end \ No newline at end of file diff --git a/src/dssp.jl b/src/dssp.jl index 3a2818c..7890a19 100644 --- a/src/dssp.jl +++ b/src/dssp.jl @@ -1,7 +1,5 @@ # Ported from https://github.com/ShintaroMinami/PyDSSP -export dssp - using LinearAlgebra using PaddedViews @@ -71,7 +69,7 @@ function _get_hbond_map( return hbond_map end -# currently not differentiable cause we use bitwise operators +# not differentiable like the PyDSSP version cause we use bitwise operators function dssp(coords::AbstractArray{T, 3}) where T @assert size(coords, 1) == 3 @assert size(coords, 2) == 4 @@ -119,23 +117,4 @@ function dssp(coords::AbstractArray{T, 3}) where T num_vector = findfirst.(eachrow(hcat(loop, helix, strand))) return num_vector -end - -""" - dssp(coords_chains) - -Takes a vector of chains, each of which is a 3D array of shape `(3, 4, residue_count)` -where the first dimension is the x, y, z coordinates, the second dimension is the atom type, -in the order N, CA, C, O, and the third dimension is the residue number. -""" -function dssp(coords_chains::Vector{<:AbstractArray{T, 3}}) where T - lengths = size.(coords_chains, 3) - - coords = cat(coords_chains..., dims=3) - num_vector = dssp(coords) - - cum_indices = cumsum(lengths) - num_vectors_by_chain = [num_vector[get(cum_indices, n-1, 0)+1:cum_indices[n]] for n in 1:length(lengths)] - - return num_vectors_by_chain -end +end \ No newline at end of file diff --git a/src/io.jl b/src/io.jl index 4dbbc9b..b85fc5e 100644 --- a/src/io.jl +++ b/src/io.jl @@ -41,20 +41,4 @@ function load_pdb_backbone_coords(filename::String) ids = unique(PDBTools.chain.(atoms)) chains = [chain_coords(id, atoms) for id in ids] return chains -end - -""" - dssp(filename) - -Returns a vector of vectors of integers, each of which is the secondary structure assignment -for the corresponding chain and their respective residues. - -The integers are assigned as follows: -- 1: loop -- 2: helix -- 3: strand -""" -function dssp(filename::String) - chains = load_pdb_backbone_coords(filename) - return dssp(chains) end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 28c7c5e..c1303a9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -22,17 +22,19 @@ ss_composition(ss::Vector{Int}) = [count(==(i), ss) for i in 1:3] end @testset "DSSP" begin + @testset "1ASS" begin - ss = dssp("data/1ASS.pdb") + ss = assign_secondary_structure("data/1ASS.pdb") @test length(ss) == 1 @test ss_composition.(ss) == [[60, 53, 39]] end @testset "1ZAK" begin - ss = dssp("data/1ZAK.pdb") + ss = assign_secondary_structure("data/1ZAK.pdb") @test length(ss) == 2 @test ss_composition.(ss) == [[72, 116, 32], [72, 116, 32]] end + end end