diff --git a/src/ExaModels.jl b/src/ExaModels.jl
index 4d6b3623..d8ee3399 100644
--- a/src/ExaModels.jl
+++ b/src/ExaModels.jl
@@ -47,6 +47,8 @@ export ExaModel,
     multipliers,
     multipliers_L,
     multipliers_U,
-    WrapperNLPModel
+    WrapperNLPModel,
+    @register_univariate,
+    @register_bivariate
 
 end # module ExaModels
diff --git a/src/gradient.jl b/src/gradient.jl
index d254cb71..7b80706b 100644
--- a/src/gradient.jl
+++ b/src/gradient.jl
@@ -1,44 +1,78 @@
-@inbounds @inline function drpass(d::D, y, adj) where {D<:AdjointNode1}
+"""
+    drpass(d::D, y, adj)
+
+Performs dense gradient evaluation via the reverse pass on the computation (sub)graph formed by forward pass
+
+# Arguments:
+- `d`: first-order computation (sub)graph
+- `y`: result vector
+- `adj`: adjoint propagated up to the current node
+"""
+@inline function drpass(d::D, y, adj) where {D<:AdjointNode1}
     offset = drpass(d.inner, y, adj * d.y)
     nothing
 end
-@inbounds @inline function drpass(d::D, y, adj) where {D<:AdjointNode2}
+@inline function drpass(d::D, y, adj) where {D<:AdjointNode2}
     offset = drpass(d.inner1, y, adj * d.y1)
     offset = drpass(d.inner2, y, adj * d.y2)
     nothing
 end
-@inbounds @inline function drpass(d::D, y, adj) where {D<:AdjointNodeVar}
-    y[d.i] += adj
+@inline function drpass(d::D, y, adj) where {D<:AdjointNodeVar}
+    @inbounds y[d.i] += adj
     nothing
 end
-@inbounds @inline function drpass(f::F, x, y, adj) where {F<:SIMDFunction} end
+@inline function drpass(f::F, x, y, adj) where {F<:SIMDFunction} end
+
+"""
+    gradient!(y, f, x, adj)
+
+Performs dense gradient evalution
+
+# Arguments:
+- `y`: result vector
+- `f`: the function to be differentiated in `SIMDFunction` format
+- `x`: variable vector
+- `adj`: initial adjoint
+"""
 function gradient!(y, f, x, adj)
     @simd for k in eachindex(f.itr)
-        drpass(f.f.f(f.itr[k], AdjointNodeSource(x)), y, adj)
+        @inbounds drpass(f.f.f(f.itr[k], AdjointNodeSource(x)), y, adj)
     end
     return y
 end
 
+"""
+    grpass(d::D, comp, y, o1, cnt, adj)
+
+Performs dsparse gradient evaluation via the reverse pass on the computation (sub)graph formed by forward pass
 
-@inbounds @inline function grpass(d::D, comp, y, o1, cnt, adj) where {D<:AdjointNode1}
+# Arguments:
+- `d`: first-order computation (sub)graph
+- `comp`: a `Compressor`, which helps map counter to sparse vector index
+- `y`: result vector
+- `o1`: index offset
+- `cnt`: counter
+- `adj`: adjoint propagated up to the current node
+"""
+@inline function grpass(d::D, comp, y, o1, cnt, adj) where {D<:AdjointNode1}
     cnt = grpass(d.inner, comp, y, o1, cnt, adj * d.y)
     return cnt
 end
-@inbounds @inline function grpass(d::D, comp, y, o1, cnt, adj) where {D<:AdjointNode2}
+@inline function grpass(d::D, comp, y, o1, cnt, adj) where {D<:AdjointNode2}
     cnt = grpass(d.inner1, comp, y, o1, cnt, adj * d.y1)
     cnt = grpass(d.inner2, comp, y, o1, cnt, adj * d.y2)
     return cnt
 end
-@inbounds @inline function grpass(d::D, comp, y, o1, cnt, adj) where {D<:AdjointNodeVar}
-    y[o1+comp(cnt += 1)] += adj
+@inline function grpass(d::D, comp, y, o1, cnt, adj) where {D<:AdjointNodeVar}
+    @inbounds y[o1+comp(cnt += 1)] += adj
     return cnt
 end
 
-@inbounds @inline function grpass(d::AdjointNodeVar, comp::Nothing, y, o1, cnt, adj) # despecialization
+@inline function grpass(d::AdjointNodeVar, comp::Nothing, y, o1, cnt, adj) # despecialization
     push!(y, d.i)
     return (cnt += 1)
 end
-@inbounds @inline function grpass(
+@inline function grpass(
     d::D,
     comp,
     y::V,
@@ -47,13 +81,24 @@ end
     adj,
 ) where {D<:AdjointNodeVar,V<:AbstractVector{Tuple{Int,Int}}}
     ind = o1 + comp(cnt += 1)
-    y[ind] = (d.i, ind)
+    @inbounds y[ind] = (d.i, ind)
     return cnt
 end
 
+"""
+    sgradient!(y, f, x, adj)
+
+Performs sparse gradient evalution
+
+# Arguments:
+- `y`: result vector
+- `f`: the function to be differentiated in `SIMDFunction` format
+- `x`: variable vector
+- `adj`: initial adjoint
+"""
 function sgradient!(y, f, x, adj)
     @simd for k in eachindex(f.itr)
-        grpass(f.f.f(f.itr[k], AdjointNodeSource(x)), f.itr.comp1, y, offset1(f, k), 0, adj)
+        @inbounds grpass(f.f.f(f.itr[k], AdjointNodeSource(x)), f.itr.comp1, y, offset1(f, k), 0, adj)
     end
     return y
 end
diff --git a/src/graph.jl b/src/graph.jl
index 8e45e998..001dc098 100644
--- a/src/graph.jl
+++ b/src/graph.jl
@@ -1,20 +1,66 @@
-abstract type AbstractIndex end
+# Abstract node type for the computation graph for symbolic expression
 abstract type AbstractNode end
-abstract type AbstractPar <: AbstractNode end
+
+# Abstract node type for first-order forward pass tree
 abstract type AbstractAdjointNode end
+
+# Abstract node type for the computation graph for second-order forward pass
 abstract type AbstractSecondAdjointNode end
 
+"""
+    Var{I}
+
+A variable node for symbolic expression tree
+
+# Fields:
+- `i::I`: (parameterized) index 
+"""
 struct Var{I} <: AbstractNode
     i::I
 end
-struct Par <: AbstractPar end
-struct ParIndexed{I,J} <: AbstractPar
+
+"""
+    ParSource
+
+A source of parameterized data
+
+"""
+struct ParSource <: AbstractNode end
+
+"""
+    ParIndexed{I, J}
+
+A parameterized data node
+
+# Fields:
+- `inner::I`: parameter for the data
+"""
+struct ParIndexed{I,J} <: AbstractNode
     inner::I
 end
+
 @inline ParIndexed(inner::I, n) where {I} = ParIndexed{I,n}(inner)
+"""
+    Node1{F, I}
+
+A node with one child for symbolic expression tree
+
+# Fields:
+- `inner::I`: children
+"""
 struct Node1{F,I} <: AbstractNode
     inner::I
 end
+
+"""
+    Node2{F, I1, I2}
+
+A node with two children for symbolic expression tree
+
+# Fields:
+- `inner1::I1`: children #1
+- `inner2::I2`: children #2
+"""
 struct Node2{F,I1,I2} <: AbstractNode
     inner1::I1
     inner2::I2
@@ -27,9 +73,9 @@ struct SecondFixed{F}
     inner::F
 end
 
-@inline Base.getindex(n::Par, i) = ParIndexed(n, i)
+@inline Base.getindex(n::ParSource, i) = ParIndexed(n, i)
 
-Par(iter::DataType) = Par()
+Par(iter::DataType) = ParSource()
 Par(iter, idx...) = ParIndexed(Par(iter, idx[2:end]...), idx[1])
 Par(iter::Type{T}, idx...) where {T<:Tuple} =
     Tuple(Par(p, i, idx...) for (i, p) in enumerate(T.parameters))
@@ -44,24 +90,42 @@ Par(iter::Type{T}, idx...) where {T<:NamedTuple} = NamedTuple{T.parameters[1]}(
 
 struct Identity end
 
-struct NaNSource{T} <: AbstractVector{T} end
-@inline Base.getindex(::NaNSource{T}, i) where {T} = T(NaN)
 
+@inline (v::Var{I})(i, x) where {I} = @inbounds x[v.i(i, x)]
+@inline (v::ParSource)(i, x) = i
+@inline (v::ParIndexed{I,n})(i, x) where {I,n} = @inbounds v.inner(i, x)[n]
 
-@inbounds @inline (v::Var{I})(i, x) where {I} = x[v.i(i, x)]
-@inbounds @inline (v::Par)(i, x) = i
-@inbounds @inline (v::ParIndexed{I,n})(i, x) where {I,n} = v.inner(i, x)[n]
+(v::ParIndexed)(i::Identity, x) = NaN16 # despecialized
+(v::ParSource)(i::Identity, x) = NaN16 # despecialized
+(v::Var)(i::Identity, x) = @inbounds x[v.i] # despecialized
 
-@inbounds (v::ParIndexed)(i::Identity, x) = NaN16 # despecialized
-@inbounds (v::Par)(i::Identity, x) = NaN16 # despecialized
-@inbounds (v::Var)(i::Identity, x) = x[v.i] # despecialized
+"""
+    AdjointNode1{F, T, I}
 
+A node with one child for first-order forward pass tree
 
+# Fields:
+- `x::T`: function value
+- `y::T`: first-order sensitivity
+- `inner::I`: children
+"""
 struct AdjointNode1{F,T,I} <: AbstractAdjointNode
     x::T
     y::T
     inner::I
 end
+"""
+    AdjointNode2{F, T, I1, I2}
+
+A node with two children for first-order forward pass tree
+
+# Fields:
+- `x::T`: function value
+- `y1::T`: first-order sensitivity w.r.t. first argument
+- `y2::T`: first-order sensitivity w.r.t. second argument
+- `inner1::I1`: children #1
+- `inner2::I2`: children #2
+"""
 struct AdjointNode2{F,T,I1,I2} <: AbstractAdjointNode
     x::T
     y1::T
@@ -69,35 +133,75 @@ struct AdjointNode2{F,T,I1,I2} <: AbstractAdjointNode
     inner1::I1
     inner2::I2
 end
+"""
+    AdjointNodeVar{I, T}
+
+A variable node for first-order forward pass tree
+
+# Fields:
+- `i::I`: index
+- `x::T`: value
+"""
 struct AdjointNodeVar{I,T} <: AbstractAdjointNode
     i::I
     x::T
 end
-struct AdjointNodeSource{T,VT<:AbstractVector{T}}
+
+"""
+    AdjointNodeSource{VT}
+
+A source of `AdjointNode`. `adjoint_node_source[i]` returns an `AdjointNodeVar` at index `i`.
+
+# Fields:
+- `inner::VT`: variable vector
+"""
+struct AdjointNodeSource{VT}
     inner::VT
 end
-struct AdjointNodeNullSource end
 
 @inline AdjointNode1(f::F, x::T, y, inner::I) where {F,T,I} =
     AdjointNode1{F,T,I}(x, y, inner)
 @inline AdjointNode2(f::F, x::T, y1, y2, inner1::I1, inner2::I2) where {F,T,I1,I2} =
     AdjointNode2{F,T,I1,I2}(x, y1, y2, inner1, inner2)
 
+@inline Base.getindex(x::I, i) where {I<:AdjointNodeSource{Nothing}} =
+    AdjointNodeVar(i, NaN16)
+@inline Base.getindex(x::I, i) where {I<:AdjointNodeSource} =
+    @inbounds AdjointNodeVar(i, x.inner[i])
 
-AdjointNodeSource(::Nothing) = AdjointNodeNullSource()
 
-@inbounds @inline Base.getindex(x::I, i) where {I<:AdjointNodeNullSource} =
-    AdjointNodeVar(i, NaN16)
-@inbounds @inline Base.getindex(x::I, i) where {I<:AdjointNodeSource} =
-    AdjointNodeVar(i, x.inner[i])
+"""
+    SecondAdjointNode1{F, T, I}
 
+A node with one child for second-order forward pass tree
 
+# Fields:
+- `x::T`: function value
+- `y::T`: first-order sensitivity
+- `h::T`: second-order sensitivity
+- `inner::I`: DESCRIPTION
+"""
 struct SecondAdjointNode1{F,T,I} <: AbstractSecondAdjointNode
     x::T
     y::T
     h::T
     inner::I
 end
+"""
+    SecondAdjointNode2{F, T, I1, I2}
+
+A node with one child for second-order forward pass tree
+
+# Fields:
+- `x::T`: function value
+- `y1::T`: first-order sensitivity w.r.t. first argument
+- `y2::T`: first-order sensitivity w.r.t. first argument
+- `h11::T`: second-order sensitivity w.r.t. first argument
+- `h12::T`: second-order sensitivity w.r.t. first and second argument
+- `h22::T`: second-order sensitivity w.r.t. second argument
+- `inner1::I1`: children #1
+- `inner2::I2`: children #2
+"""
 struct SecondAdjointNode2{F,T,I1,I2} <: AbstractSecondAdjointNode
     x::T
     y1::T
@@ -109,11 +213,29 @@ struct SecondAdjointNode2{F,T,I1,I2} <: AbstractSecondAdjointNode
     inner2::I2
 end
 
+"""
+    SecondAdjointNodeVar{I, T}
+
+A variable node for first-order forward pass tree
+
+# Fields:
+- `i::I`: index
+- `x::T`: value
+"""
 struct SecondAdjointNodeVar{I,T} <: AbstractSecondAdjointNode
     i::I
     x::T
 end
-struct SecondAdjointNodeSource{T,VT<:AbstractVector{T}}
+
+"""
+    SecondAdjointNodeSource{VT}
+
+A source of `AdjointNode`. `adjoint_node_source[i]` returns an `AdjointNodeVar` at index `i`.
+
+# Fields:
+- `inner::VT`: variable vector
+"""
+struct SecondAdjointNodeSource{VT}
     inner::VT
 end
 
@@ -132,10 +254,7 @@ end
 ) where {F,T,I1,I2} =
     SecondAdjointNode2{F,T,I1,I2}(x, y1, y2, h11, h12, h22, inner1, inner2)
 
-struct SecondAdjointNodeNullSource end
-SecondAdjointNodeSource(::Nothing) = SecondAdjointNodeNullSource()
-
-@inbounds @inline Base.getindex(x::I, i) where {I<:SecondAdjointNodeNullSource} =
-    SecondAdjointNodeVar(i, NaN)
-@inbounds @inline Base.getindex(x::I, i) where {I<:SecondAdjointNodeSource} =
-    SecondAdjointNodeVar(i, x.inner[i])
+@inline Base.getindex(x::I, i) where {I<:SecondAdjointNodeSource{Nothing}} =
+    SecondAdjointNodeVar(i, NaN16)
+@inline Base.getindex(x::I, i) where {I<:SecondAdjointNodeSource} =
+    @inbounds SecondAdjointNodeVar(i, x.inner[i])
diff --git a/src/hessian.jl b/src/hessian.jl
index ab1a5ae4..3ece715b 100644
--- a/src/hessian.jl
+++ b/src/hessian.jl
@@ -1,4 +1,19 @@
-@inbounds @inline function hdrpass(
+"""
+    hdrpass(t1::T1, t2::T2, comp, y1, y2, o2, cnt, adj)
+
+Performs sparse hessian evaluation (`(df1/dx)(df2/dx)'` portion) via the reverse pass on the computation (sub)graph formed by second-order forward pass
+
+# Arguments:
+- `t1`: second-order computation (sub)graph regarding f1
+- `t2`: second-order computation (sub)graph regarding f2
+- `comp`: a `Compressor`, which helps map counter to sparse vector index
+- `y1`: result vector #1
+- `y2`: result vector #2 (only used when evaluating sparsity)
+- `o2`: index offset
+- `cnt`: counter
+- `adj`: second adjoint propagated up to the current node
+"""
+@inline function hdrpass(
     t1::T1,
     t2::T2,
     comp,
@@ -11,7 +26,7 @@
     cnt = hdrpass(t1.inner, t2.inner, comp, y1, y2, o2, cnt, adj * t1.y * t2.y)
     cnt
 end
-@inbounds function hdrpass(
+function hdrpass(
     t1::SecondAdjointNode1,
     t2::SecondAdjointNode1,
     comp::Nothing,
@@ -26,7 +41,7 @@ end
 end
 
 
-@inbounds @inline function hdrpass(
+@inline function hdrpass(
     t1::T1,
     t2::T2,
     comp,
@@ -39,7 +54,7 @@ end
     cnt = hdrpass(t1, t2.inner, comp, y1, y2, o2, cnt, adj * t2.y)
     cnt
 end
-@inbounds function hdrpass(
+function hdrpass(
     t1::SecondAdjointNodeVar,
     t2::SecondAdjointNode1,
     comp::Nothing,
@@ -54,7 +69,7 @@ end
 end
 
 
-@inbounds @inline function hdrpass(
+@inline function hdrpass(
     t1::T1,
     t2::T2,
     comp,
@@ -67,7 +82,7 @@ end
     cnt = hdrpass(t1.inner, t2, comp, y1, y2, o2, cnt, adj * t1.y)
     cnt
 end
-@inbounds function hdrpass(
+function hdrpass(
     t1::SecondAdjointNode1,
     t2::SecondAdjointNodeVar,
     comp::Nothing,
@@ -82,7 +97,7 @@ end
 end
 
 
-@inbounds @inline function hdrpass(
+@inline function hdrpass(
     t1::T1,
     t2::T2,
     comp,
@@ -98,7 +113,7 @@ end
     cnt = hdrpass(t1.inner2, t2.inner2, comp, y1, y2, o2, cnt, adj * t1.y2 * t2.y2)
     cnt
 end
-@inbounds function hdrpass(
+function hdrpass(
     t1::SecondAdjointNode2,
     t2::SecondAdjointNode2,
     comp::Nothing,
@@ -116,7 +131,7 @@ end
 end
 
 
-@inbounds @inline function hdrpass(
+@inline function hdrpass(
     t1::T1,
     t2::T2,
     comp,
@@ -130,7 +145,7 @@ end
     cnt = hdrpass(t1.inner, t2.inner2, comp, y1, y2, o2, cnt, adj * t1.y * t2.y2)
     cnt
 end
-@inbounds function hdrpass(
+function hdrpass(
     t1::SecondAdjointNode1,
     t2::SecondAdjointNode2,
     comp::Nothing,
@@ -145,7 +160,7 @@ end
     cnt
 end
 
-@inbounds @inline function hdrpass(
+@inline function hdrpass(
     t1::T1,
     t2::T2,
     comp,
@@ -159,7 +174,7 @@ end
     cnt = hdrpass(t1.inner2, t2.inner, comp, y1, y2, o2, cnt, adj * t1.y2 * t2.y)
     cnt
 end
-@inbounds function hdrpass(
+function hdrpass(
     t1::SecondAdjointNode2,
     t2::SecondAdjointNode1,
     comp::Nothing,
@@ -174,7 +189,7 @@ end
     cnt
 end
 
-@inbounds @inline function hdrpass(
+@inline function hdrpass(
     t1::T1,
     t2::T2,
     comp,
@@ -188,7 +203,7 @@ end
     cnt = hdrpass(t1, t2.inner2, comp, y1, y2, o2, cnt, adj * t2.y2)
     cnt
 end
-@inbounds function hdrpass(
+function hdrpass(
     t1::SecondAdjointNodeVar,
     t2::SecondAdjointNode2,
     comp::Nothing,
@@ -203,7 +218,7 @@ end
     cnt
 end
 
-@inbounds @inline function hdrpass(
+@inline function hdrpass(
     t1::T1,
     t2::T2,
     comp,
@@ -217,7 +232,7 @@ end
     cnt = hdrpass(t1.inner2, t2, comp, y1, y2, o2, cnt, adj * t1.y2)
     cnt
 end
-@inbounds function hdrpass(
+function hdrpass(
     t1::SecondAdjointNode2,
     t2::SecondAdjointNodeVar,
     comp::Nothing,
@@ -233,7 +248,7 @@ end
 end
 
 
-@inbounds @inline function hdrpass(
+@inline function hdrpass(
     t1::T1,
     t2::T2,
     comp,
@@ -244,7 +259,7 @@ end
     adj,
 ) where {T1<:SecondAdjointNodeVar,T2<:SecondAdjointNodeVar}
     i, j = t1.i, t2.i
-    if i == j
+    @inbounds if i == j
         y1[o2+comp(cnt += 1)] += 2 * adj
     else
         y1[o2+comp(cnt += 1)] += adj
@@ -253,7 +268,7 @@ end
 end
 
 
-@inbounds @inline function hdrpass(
+@inline function hdrpass(
     t1::T1,
     t2::T2,
     comp,
@@ -270,7 +285,7 @@ end
 }
     i, j = t1.i, t2.i
     y, v = y1
-    if i == j
+    @inbounds if i == j
         y[i] += 2 * adj * v[i]
     else
         y[i] += adj * v[j]
@@ -279,7 +294,23 @@ end
     return (cnt += 1)
 end
 
-@inbounds @inline function hrpass(
+"""
+    hrpass(t::D, comp, y1, y2, o2, cnt, adj, adj2)
+
+Performs sparse hessian evaluation (`d²f/dx²` portion) via the reverse pass on the computation (sub)graph formed by second-order forward pass
+
+# Arguments:
+- `comp`: a `Compressor`, which helps map counter to sparse vector index
+- `y1`: result vector #1
+- `y2`: result vector #2 (only used when evaluating sparsity)
+- `o2`: index offset
+- `cnt`: counter
+- `adj`: first adjoint propagated up to the current node
+- `adj`: second adjoint propagated up to the current node
+"""
+
+
+@inline function hrpass(
     t::D,
     comp,
     y1,
@@ -292,7 +323,7 @@ end
     cnt = hrpass(t.inner, comp, y1, y2, o2, cnt, adj * t.y, adj2 * (t.y)^2 + adj * t.h)
     cnt
 end
-@inbounds @inline function hrpass(
+@inline function hrpass(
     t::D,
     comp,
     y1,
@@ -310,10 +341,10 @@ end
     cnt
 end
 
-@inbounds @inline hrpass0(args...) = hrpass(args...)
+@inline hrpass0(args...) = hrpass(args...)
 
 
-@inbounds @inline function hrpass0(
+@inline function hrpass0(
     t::D,
     comp,
     y1,
@@ -326,7 +357,7 @@ end
     cnt = hrpass0(t.inner, comp, y1, y2, o2, cnt, adj * t.y, adj2 * (t.y)^2)
     cnt
 end
-@inbounds @inline function hrpass0(
+@inline function hrpass0(
     t::D,
     comp,
     y1,
@@ -339,7 +370,7 @@ end
     cnt = hrpass0(t.inner, comp, y1, y2, o2, cnt, adj, adj2)
     cnt
 end
-@inbounds @inline function hrpass0(
+@inline function hrpass0(
     t::D,
     comp,
     y1,
@@ -352,7 +383,7 @@ end
     cnt = hrpass0(t.inner, comp, y1, y2, o2, cnt, -adj, adj2)
     cnt
 end
-@inbounds @inline function hrpass0(
+@inline function hrpass0(
     t::D,
     comp,
     y1,
@@ -366,7 +397,7 @@ end
     cnt
 end
 
-@inbounds @inline function hrpass0(
+@inline function hrpass0(
     t::D,
     comp,
     y1,
@@ -379,7 +410,7 @@ end
     cnt = hrpass0(t.inner, comp, y1, y2, o2, cnt, adj, adj2)
     cnt
 end
-@inbounds @inline function hrpass0(
+@inline function hrpass0(
     t::D,
     comp,
     y1,
@@ -393,7 +424,7 @@ end
     cnt
 end
 
-@inbounds @inline function hrpass0(
+@inline function hrpass0(
     t::D,
     comp,
     y1,
@@ -408,7 +439,7 @@ end
     cnt
 end
 
-@inbounds @inline function hrpass0(
+@inline function hrpass0(
     t::D,
     comp,
     y1,
@@ -422,7 +453,7 @@ end
     cnt = hrpass0(t.inner2, comp, y1, y2, o2, cnt, -adj, adj2)
     cnt
 end
-@inbounds @inline function hrpass0(
+@inline function hrpass0(
     t::T,
     comp,
     y1,
@@ -434,7 +465,7 @@ end
 ) where {T<:SecondAdjointNodeVar}
     cnt
 end
-@inbounds @inline function hrpass0(
+@inline function hrpass0(
     t::T,
     comp::Nothing,
     y1,
@@ -448,7 +479,7 @@ end
 end
 
 
-@inbounds function hdrpass(
+function hdrpass(
     t1::SecondAdjointNodeVar,
     t2::SecondAdjointNodeVar,
     comp::Nothing,
@@ -462,7 +493,7 @@ end
     push!(y1, (t1.i, t2.i))
     cnt
 end
-@inbounds function hrpass(
+function hrpass(
     t::SecondAdjointNodeVar,
     comp::Nothing,
     y1,
@@ -477,7 +508,7 @@ end
     cnt
 end
 
-@inbounds @inline function hrpass(
+@inline function hrpass(
     t::T,
     comp,
     y1::Tuple{V1,V2},
@@ -488,10 +519,10 @@ end
     adj2,
 ) where {T<:SecondAdjointNodeVar,V1<:AbstractVector,V2<:AbstractVector}
     y, v = y1
-    y[t.i] += adj2 * v[t.i]
+    @inbounds y[t.i] += adj2 * v[t.i]
     return (cnt += 1)
 end
-@inbounds @inline function hrpass(
+@inline function hrpass(
     t::T,
     comp,
     y1,
@@ -501,10 +532,10 @@ end
     adj,
     adj2,
 ) where {T<:SecondAdjointNodeVar}
-    y1[o2+comp(cnt += 1)] += adj2
+    @inbounds y1[o2+comp(cnt += 1)] += adj2
     cnt
 end
-@inbounds @inline function hrpass(
+@inline function hrpass(
     t::T,
     comp,
     y1::V,
@@ -515,11 +546,11 @@ end
     adj2,
 ) where {T<:SecondAdjointNodeVar,I<:Integer,V<:AbstractVector{I}}
     ind = o2 + comp(cnt += 1)
-    y1[ind] = t.i
-    y2[ind] = t.i
+    @inbounds y1[ind] = t.i
+    @inbounds y2[ind] = t.i
     cnt
 end
-@inbounds @inline function hrpass(
+@inline function hrpass(
     t::T,
     comp,
     y1::V,
@@ -530,10 +561,10 @@ end
     adj2,
 ) where {T<:SecondAdjointNodeVar,I<:Tuple{Tuple{Int,Int},Int},V<:AbstractVector{I}}
     ind = o2 + comp(cnt += 1)
-    y1[ind] = ((t.i, t.i), ind)
+    @inbounds y1[ind] = ((t.i, t.i), ind)
     cnt
 end
-@inbounds @inline function hdrpass(
+@inline function hdrpass(
     t1::T1,
     t2::T2,
     comp,
@@ -545,7 +576,7 @@ end
 ) where {T1<:SecondAdjointNodeVar,T2<:SecondAdjointNodeVar,I<:Integer,V<:AbstractVector{I}}
     i, j = t1.i, t2.i
     ind = o2 + comp(cnt += 1)
-    if i >= j
+    @inbounds if i >= j
         y1[ind] = i
         y2[ind] = j
     else
@@ -554,7 +585,7 @@ end
     end
     cnt
 end
-@inbounds @inline function hdrpass(
+@inline function hdrpass(
     t1::T1,
     t2::T2,
     comp,
@@ -571,7 +602,7 @@ end
 }
     i, j = t1.i, t2.i
     ind = o2 + comp(cnt += 1)
-    if i >= j
+    @inbounds if i >= j
         y1[ind] = ((i, j), ind)
     else
         y1[ind] = ((j, i), ind)
@@ -579,9 +610,22 @@ end
     cnt
 end
 
+"""
+    shessian!(y1, y2, f, x, adj1, adj2)
+
+Performs sparse jacobian evalution
+
+# Arguments:
+- `y1`: result vector #1
+- `y2`: result vector #2 (only used when evaluating sparsity)
+- `f`: the function to be differentiated in `SIMDFunction` format
+- `x`: variable vector
+- `adj1`: initial first adjoint
+- `adj2`: initial second adjoint
+"""
 function shessian!(y1, y2, f, x, adj1, adj2)
     @simd for k in eachindex(f.itr)
-        hrpass0(
+        @inbounds hrpass0(
             f.f.f(f.itr[k], SecondAdjointNodeSource(x)),
             f.f.comp2,
             y1,
@@ -593,10 +637,9 @@ function shessian!(y1, y2, f, x, adj1, adj2)
         )
     end
 end
-
 function shessian!(y1, y2, f, x, adj1s::V, adj2) where {V<:AbstractVector}
     @simd for k in eachindex(f.itr)
-        hrpass0(
+        @inbounds hrpass0(
             f.f.f(f.itr[k], SecondAdjointNodeSource(x)),
             f.f.comp2,
             y1,
diff --git a/src/jacobian.jl b/src/jacobian.jl
index 4026cde7..faadf948 100644
--- a/src/jacobian.jl
+++ b/src/jacobian.jl
@@ -1,4 +1,19 @@
-@inbounds @inline function jrpass(
+"""
+    jrpass(d::D, comp, i, y1, y2, o1, cnt, adj)
+
+Performs sparse jacobian evaluation via the reverse pass on the computation (sub)graph formed by forward pass
+
+# Arguments:
+- `d`: first-order computation (sub)graph
+- `comp`: a `Compressor`, which helps map counter to sparse vector index
+- `i`: constraint index (this is `i`-th constraint)
+- `y1`: result vector #1
+- `y2`: result vector #2 (only used when evaluating sparsity)
+- `o1`: index offset
+- `cnt`: counter
+- `adj`: adjoint propagated up to the current node
+"""
+@inline function jrpass(
     d::D,
     comp,
     i,
@@ -11,7 +26,7 @@
     cnt = jrpass(d.inner, comp, i, y1, y2, o1, cnt, adj * d.y)
     return cnt
 end
-@inbounds @inline function jrpass(
+@inline function jrpass(
     d::D,
     comp,
     i,
@@ -25,7 +40,7 @@ end
     cnt = jrpass(d.inner2, comp, i, y1, y2, o1, cnt, adj * d.y2)
     return cnt
 end
-@inbounds @inline function jrpass(
+@inline function jrpass(
     d::D,
     comp,
     i,
@@ -35,10 +50,10 @@ end
     cnt,
     adj,
 ) where {D<:AdjointNodeVar}
-    y1[o1+comp(cnt += 1)] += adj
+    @inbounds y1[o1+comp(cnt += 1)] += adj
     return cnt
 end
-@inbounds @inline function jrpass(
+@inline function jrpass(
     d::D,
     comp,
     i,
@@ -47,12 +62,12 @@ end
     o1,
     cnt,
     adj,
-) where {D<:AdjointNodeVar,V1<:AbstractVector,V2<:AbstractVector}
+    ) where {D<:AdjointNodeVar,V1<:AbstractVector,V2<:AbstractVector}
     (y, v) = y1
-    y[i] += adj * v[d.i]
+    @inbounds y[i] += adj * v[d.i]
     return (cnt += 1)
 end
-@inbounds @inline function jrpass(
+@inline function jrpass(
     d::D,
     comp,
     i,
@@ -63,10 +78,10 @@ end
     adj,
 ) where {D<:AdjointNodeVar,V1<:AbstractVector,V2<:AbstractVector}
     y, v = y2
-    y[d.i] += adj * v[i]
+    @inbounds y[d.i] += adj * v[i]
     return (cnt += 1)
 end
-@inbounds @inline function jrpass(
+@inline function jrpass(
     d::D,
     comp,
     i,
@@ -77,11 +92,11 @@ end
     adj,
 ) where {D<:AdjointNodeVar,I<:Integer,V<:AbstractVector{I}}
     ind = o1 + comp(cnt += 1)
-    y1[ind] = i
-    y2[ind] = d.i
+    @inbounds y1[ind] = i
+    @inbounds y2[ind] = d.i
     return cnt
 end
-@inbounds @inline function jrpass(
+@inline function jrpass(
     d::D,
     comp,
     i,
@@ -92,14 +107,26 @@ end
     adj,
 ) where {D<:AdjointNodeVar,I<:Tuple{Tuple{Int,Int},Int},V<:AbstractVector{I}}
     ind = o1 + comp(cnt += 1)
-    y1[ind] = ((i, d.i), ind)
+    @inbounds y1[ind] = ((i, d.i), ind)
     return cnt
 end
 
 
+"""
+    sjacobian!(y1, y2, f, x, adj)
+
+Performs sparse jacobian evalution
+
+# Arguments:
+- `y1`: result vector #1
+- `y2`: result vector #2 (only used when evaluating sparsity)
+- `f`: the function to be differentiated in `SIMDFunction` format
+- `x`: variable vector
+- `adj`: initial adjoint
+"""
 function sjacobian!(y1, y2, f, x, adj)
     @simd for i in eachindex(f.itr)
-        jrpass(
+        @inbounds jrpass(
             f.f.f(f.itr[i], AdjointNodeSource(x)),
             f.f.comp1,
             offset0(f, i),
diff --git a/src/register.jl b/src/register.jl
index c3a40e4e..f919624c 100644
--- a/src/register.jl
+++ b/src/register.jl
@@ -1,3 +1,29 @@
+"""
+    @register_univariate(f, df, ddf)
+
+Register a univariate function `f` to `ExaModels`, so that it can be used within objective and constraint expressions
+
+# Arguments:
+- `f`: function
+- `df`: derivative function
+- `ddf`: second-order derivative funciton
+
+## Example
+```jldoctest
+julia> using ExaModels
+
+julia> relu3(x) = x > 0 ? x^3 : zero(x)
+relu3 (generic function with 1 method)
+
+julia> drelu3(x) = x > 0 ? 3*x^2 : zero(x)
+drelu3 (generic function with 1 method)
+
+julia> ddrelu3(x) = x > 0 ? 6*x : zero(x)
+ddrelu3 (generic function with 1 method)
+
+julia> @register_univariate(relu3, drelu3, ddrelu3)
+```
+"""
 macro register_univariate(f, df, ddf)
     return esc(
         quote
@@ -10,12 +36,50 @@ macro register_univariate(f, df, ddf)
             @inline $f(t::T) where {T<:ExaModels.AbstractSecondAdjointNode} =
                 ExaModels.SecondAdjointNode1($f, $f(t.x), $df(t.x), $ddf(t.x), t)
 
-            @inbounds @inline (n::ExaModels.Node1{typeof($f),I})(i, x) where {I} =
+            @inline (n::ExaModels.Node1{typeof($f),I})(i, x) where {I} =
                 $f(n.inner(i, x))
         end,
     )
 end
 
+"""
+    register_bivariate(f, df1, df2, ddf11, ddf12, ddf22)
+
+Register a bivariate function `f` to `ExaModels`, so that it can be used within objective and constraint expressions
+
+# Arguments:
+- `f`: function
+- `df1`: derivative function (w.r.t. first argument)
+- `df2`: derivative function (w.r.t. second argument)
+- `ddf11`: second-order derivative funciton (w.r.t. first argument)
+- `ddf12`: second-order derivative funciton (w.r.t. first and second argument)
+- `ddf22`: second-order derivative funciton (w.r.t. second argument)
+
+## Example
+```jldoctest
+julia> using ExaModels
+
+julia> relu23(x) = (x > 0 || y > 0) ? (x + y)^3 : zero(x)
+relu23 (generic function with 1 method)
+
+julia> drelu231(x) = (x > 0 || y > 0) ? 3 * (x + y)^2 : zero(x)
+drelu231 (generic function with 1 method)
+
+julia> drelu232(x) = (x > 0 || y > 0) ? 3 * (x + y)^2  : zero(x)
+drelu232 (generic function with 1 method)
+
+julia> ddrelu2311(x) = (x > 0 || y > 0) ? 6 * (x + y) : zero(x)
+ddrelu2311 (generic function with 1 method)
+
+julia> ddrelu2312(x) = (x > 0 || y > 0) ? 6 * (x + y) : zero(x)
+ddrelu2312 (generic function with 1 method)
+
+julia> ddrelu2322(x) = (x > 0 || y > 0) ? 6 * (x + y) : zero(x)
+ddrelu2322 (generic function with 1 method)
+
+julia> @register_bivariate(relu23, drelu231, drelu232, ddrelu2311, ddrelu2312, ddrelu2322)
+```
+"""
 macro register_bivariate(f, df1, df2, ddf11, ddf12, ddf22)
     return esc(
         quote
@@ -128,13 +192,13 @@ macro register_bivariate(f, df1, df2, ddf11, ddf12, ddf22)
                 )
             end
 
-            @inbounds @inline (n::ExaModels.Node2{typeof($f),I1,I2})(i, x) where {I1,I2} =
+            @inline (n::ExaModels.Node2{typeof($f),I1,I2})(i, x) where {I1,I2} =
                 $f(n.inner1(i, x), n.inner2(i, x))
-            @inbounds @inline (n::ExaModels.Node2{typeof($f),I1,I2})(
+            @inline (n::ExaModels.Node2{typeof($f),I1,I2})(
                 i,
                 x,
             ) where {I1<:Real,I2} = $f(n.inner1, n.inner2(i, x))
-            @inbounds @inline (n::ExaModels.Node2{typeof($f),I1,I2})(
+            @inline (n::ExaModels.Node2{typeof($f),I1,I2})(
                 i,
                 x,
             ) where {I1,I2<:Real} = $f(n.inner1(i, x), n.inner2)
diff --git a/src/simdfunction.jl b/src/simdfunction.jl
index 7b4ca44e..84471ea4 100644
--- a/src/simdfunction.jl
+++ b/src/simdfunction.jl
@@ -1,10 +1,18 @@
-@inbounds @inline (a::Pair{P,S} where {P<:AbstractNode,S<:AbstractNode})(i, x) =
+@inline (a::Pair{P,S} where {P<:AbstractNode,S<:AbstractNode})(i, x) =
     a.second(i, x)
 
+"""
+    Compressor{I}
+
+Data structure for the sparse index
+
+# Fields:
+- `inner::I`: stores the sparse index as a tuple form
+"""
 struct Compressor{I}
     inner::I
 end
-@inbounds @inline (i::Compressor{I})(n) where {I} = i.inner[n]
+@inline (i::Compressor{I})(n) where {I} = @inbounds i.inner[n]
 
 struct SIMDFunction{F,C1,C2}
     f::F
@@ -16,17 +24,29 @@ struct SIMDFunction{F,C1,C2}
     o1step::Int
     o2step::Int
 end
+
+"""
+    SIMDFunction(gen::Base.Generator, o0 = 0, o1 = 0, o2 = 0)
+
+Returns a `SIMDFunction` using the `gen`.
+
+# Arguments:
+- `gen`: an iterable function specified in `Base.Generator` format
+- `o0`: offset for the function evaluation
+- `o1`: offset for the derivative evalution
+- `o2`: offset for the second-order derivative evalution
+"""
 function SIMDFunction(gen::Base.Generator, o0 = 0, o1 = 0, o2 = 0)
 
     p = Par(eltype(gen.iter))
     f = gen.f(p)
 
 
-    d = f(Identity(), AdjointNodeSource(NaNSource{Float16}()))
+    d = f(Identity(), AdjointNodeSource(nothing))
     y1 = []
     ExaModels.grpass(d, nothing, y1, nothing, 0, NaN16)
 
-    t = f(Identity(), SecondAdjointNodeSource(NaNSource{Float16}()))
+    t = f(Identity(), SecondAdjointNodeSource(nothing))
     y2 = []
     ExaModels.hrpass0(t, nothing, y2, nothing, nothing, 0, NaN16, NaN16)
 
diff --git a/src/templates.jl b/src/templates.jl
index ea1d3d79..12d49fe0 100644
--- a/src/templates.jl
+++ b/src/templates.jl
@@ -1,7 +1,5 @@
+# A template for convert_array. This is extended in extension packages for each device architecture.
 convert_array(v, ::Nothing) = v
 
 # to avoid type privacy
-sum(a) = Base.sum(a)
-findall(f, bitarray) = Base.findall(f, bitarray)
-findall(bitarray) = Base.findall(bitarray)
 sort!(array; kwargs...) = Base.sort!(array; kwargs...)
diff --git a/src/wrapper.jl b/src/wrapper.jl
index 317b3e06..1e9367a0 100644
--- a/src/wrapper.jl
+++ b/src/wrapper.jl
@@ -1,3 +1,7 @@
+# WrapperNLPModel serves as a wrapper for ExaNLPModel, or even any NLPModels.
+# This is useful when you want to use a solver that does not support non-stardard array data types.
+# TODO: make this as an independent package
+
 struct WrapperNLPModel{
     T,
     VT,
@@ -31,7 +35,18 @@ struct WrapperNLPModel{
     counters::NLPModels.Counters
 end
 
+"""
+    WrapperNLPModel(m)
+
+Returns a `WrapperModel{Float64,Vector{64}}` wrapping `m`
+"""
 WrapperNLPModel(m) = WrapperNLPModel(Vector{Float64}, m)
+
+"""
+    WrapperNLPModel(VT, m)
+
+Returns a `WrapperModel{T,VT}` wrapping `m <: AbstractNLPModel{T}`
+"""
 function WrapperNLPModel(VT, m)
     nvar = NLPModels.get_nvar(m)
     ncon = NLPModels.get_ncon(m)