Skip to content

Commit 028503d

Browse files
alystAlexey Stukalov
authored andcommitted
SemNorm: generalize SemRidge
- allow l^alpha for any alpha - specific support for alpha=1 (SemLasso) and alpha=2 (SemRidge) - allow affine transform of parameters before regularization - SemSpec rather then SemImplied in ctor - convert calculation to evaluate!()
1 parent 71bd3ba commit 028503d

3 files changed

Lines changed: 178 additions & 96 deletions

File tree

src/StructuralEquationModels.jl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,10 @@ include("implied/empty.jl")
7070
include("loss/ML/abstract.jl")
7171
include("loss/ML/ML.jl")
7272
include("loss/ML/FIML.jl")
73-
include("loss/regularization/ridge.jl")
7473
include("loss/WLS/WLS.jl")
7574
include("loss/constant/constant.jl")
75+
# regularization
76+
include("loss/regularization/norm.jl")
7677
# optimizer
7778
include("optimizer/abstract.jl")
7879
include("optimizer/Empty.jl")
@@ -125,9 +126,11 @@ export AbstractSem,
125126
SemML,
126127
SemFIML,
127128
em_mvn,
128-
SemRidge,
129-
SemConstant,
130129
SemWLS,
130+
SemConstant,
131+
SemLasso,
132+
SemNorm,
133+
SemRidge,
131134
loss,
132135
nsem_terms,
133136
sem_terms,

src/loss/regularization/norm.jl

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
# l^α regularization
2+
3+
############################################################################################
4+
### Types
5+
############################################################################################
6+
"""
7+
struct SemNorm{α, T, TB} <: AbstractLoss{ExactHessian}
8+
9+
Regularization term that provides *Lᵅ* regularization of SEM parameters.
10+
The term implements the ``\\sum_{i=1\\ldots n} \\left| p_i \right|^{\\alpha}``,
11+
where *p_i*, *i = 1..n* is the vector of selected SEM parameter values.
12+
For `α = 1` it implements the *LASSO* (`SemLasso` alias type), and for
13+
`α = 2` it implements the *Ridge* regularization (`SemRidge` alias type).
14+
The term also allows specifying an optional affine transform (*A × p + b*)
15+
to apply to the parameters before the regularization.
16+
17+
# Constructors
18+
SemNorm(A::SparseMatrixCSC, b::Union{AbstractVector, Nothing} = nothing)
19+
SemNorm(spec::SemSpecification, params::AbstractVector,
20+
[A::AbstractMatrix = nothing],
21+
[b::AbstractVector = nothing];
22+
α::Real)
23+
24+
# Arguments
25+
- `spec`: SEM model specification.
26+
- `params::Vector`: optional IDs (Symbols) or indices of parameters to regularize.
27+
- `A`: optional transformation matrix that defines how to transform the vector of parameter values
28+
before the regularization. If `params` is not specified, the transformation is applied
29+
to the entire parameters vector.
30+
- `b`: optional vector of intercepts to add to the transformed parameters.
31+
- `α`: regularization parameter, any positive real number is supported
32+
33+
# Examples
34+
```julia
35+
my_lasso = SemLasso(spec, [:λ₁, :λ₂, :ω₂₃])
36+
my_trans_ridge = SemRidge(spec, [:λ₁, :λ₂, :ω₂₃], [1.0 1.0 0.0; 0.0 0.0 1.0], [-2.0, 0.0])
37+
```
38+
"""
39+
struct SemNorm{α, TP, TA, TB, TH} <: AbstractLoss{ExactHessian}
40+
param_inds::TP # indices of parameters to regularize
41+
A::TA # transformation/subsetting of the parameters
42+
At::TA # Aᵀ
43+
b::TB # optional transformed parameter intercepts
44+
H_inds::Vector{Int} # non-zero linear indices of Hessian
45+
H_vals::TH # non-zero values of Hessian
46+
end
47+
48+
const SemRidge{TP, TA, TB, TH} = SemNorm{2, TP, TA, TB, TH}
49+
const SemLasso{TP, TA, TB, TH} = SemNorm{1, TP, TA, TB, TH}
50+
51+
############################################################################
52+
### Constructors
53+
############################################################################
54+
55+
function SemNorm{α}(
56+
param_inds::Union{AbstractVector, Nothing} = nothing,
57+
A::Union{AbstractMatrix, Nothing} = nothing,
58+
b::Union{AbstractVector, Nothing} = nothing,
59+
) where {α}
60+
isnothing(A) || isnothing(param_inds) || size(A, 2) == length(param_inds) ||
61+
throw(DimensionMismatch("The transformation matrix columns ($(size(A, 2))) should match " *
62+
"the number of parameters to regularize ($(length(param_inds)))"))
63+
isnothing(b) || (isnothing(A) && isnothing(param_inds)) ||
64+
(length(b) == (isnothing(A) ? length(param_inds) : size(A, 1))) ||
65+
throw(DimensionMismatch("The intercept length ($(length(b))) should match the rows of " *
66+
"the transformation matrix ($(isnothing(A) ? "not specified" : size(A, 1)))" *
67+
" or the number of parameters to regularize ($(isnothing(param_inds) ? "not specified" : length(param_inds)))"))
68+
69+
At = !isnothing(A) ? convert(typeof(A), A') : nothing
70+
H = !isnothing(A) ? α * At * A : nothing # FIXME
71+
if isnothing(H)
72+
H_inds = Vector{Int}()
73+
H_v = nothing
74+
else
75+
H_i, H_j, H_v = findnz(H)
76+
H_indmtx = LinearIndices(H)
77+
H_inds = [H_indmtx[i, j] for (i, j) in zip(H_i, H_j)]
78+
H_v = copy(H_v)
79+
end
80+
return SemNorm{α, typeof(param_inds), typeof(A), typeof(b), typeof(H_v)}(
81+
param_inds, A, At, b, H_inds, H_v)
82+
end
83+
84+
function SemNorm{α}(
85+
spec::SemSpecification,
86+
params::AbstractVector,
87+
A::Union{AbstractMatrix, Nothing} = nothing,
88+
b::Union{AbstractVector, Nothing} = nothing,
89+
) where {α}
90+
param_inds = eltype(params) <: Symbol ? param_indices(spec, params) : params
91+
92+
isnothing(A) || size(A, 2) == length(param_inds) ||
93+
throw(DimensionMismatch("The transformation matrix columns ($(size(A, 2))) should match " *
94+
"the parameters to regularize ($(length(param_inds)))"))
95+
96+
sel_params_mtx = eachrow_to_col(Float64, param_inds, nparams(spec))
97+
if !isnothing(A)
98+
if A isa SparseMatrixCSC
99+
# for sparse matrices do parameters selection and multiplication in one step
100+
A = convert(typeof(A), A * sel_params_mtx)
101+
param_inds = nothing
102+
end
103+
else # if no matrix, just use selection matrix
104+
A = sel_params_mtx
105+
param_inds = nothing
106+
end
107+
return SemNorm{α}(param_inds, A, b)
108+
end
109+
110+
SemNorm(args...; α::Real) = SemNorm{α}(args...)
111+
112+
nparams(f::SemNorm) = size(f.A, 2)
113+
114+
############################################################################################
115+
### methods
116+
############################################################################################
117+
118+
elnorm(_::Val{α}) where {α} = x -> abs(x)^α
119+
elnorm(_::Val{1}) = abs
120+
elnorm(_::Val{2}) = abs2
121+
122+
elnorm(_::SemNorm{α}) where {α} = elnorm(Val(α))
123+
124+
# not multiplied by α, handled by mul!
125+
elnormgrad(_::Val{α}) where {α} = x -> abs(x)^- 1) * sign(x)
126+
elnormgrad(_::Val{2}) = identity
127+
elnormgrad(_::Val{1}) = sign
128+
129+
elnormgrad(::SemNorm{α}) where {α} = elnormgrad(Val(α))
130+
131+
function evaluate!(
132+
objective, gradient, hessian,
133+
norm::SemNorm{α},
134+
params,
135+
) where {α}
136+
if !isnothing(norm.param_inds)
137+
params = params[norm.param_inds]
138+
end
139+
if !isnothing(norm.A)
140+
trf_params = norm.A * params
141+
end
142+
if !isnothing(norm.b)
143+
trf_params .+= norm.b
144+
end
145+
146+
obj = NaN
147+
isnothing(objective) || (obj = sum(elnorm(norm), trf_params))
148+
149+
if !isnothing(gradient)
150+
elgrad_trf_params = elnormgrad(norm).(trf_params)
151+
if !isnothing(norm.param_inds)
152+
mul!(params, norm.At, elgrad_trf_params, α, 0)
153+
fill!(gradient, 0)
154+
@inbounds gradient[norm.param_inds] .= params
155+
else
156+
mul!(gradient, norm.At, elgrad_trf_params, α, 0)
157+
end
158+
end
159+
160+
if !isnothing(hessian)
161+
fill!(hessian, 0)
162+
if α === 1
163+
# do nothing, hessian is zero
164+
elseif α === 2
165+
@inbounds hessian[norm.H_inds] .= norm.H_vals
166+
else
167+
error("Hessian not implemented for α ≠ 1, 2")
168+
# TODO: Implement Hessian for other values of α
169+
end
170+
end
171+
return obj
172+
end

src/loss/regularization/ridge.jl

Lines changed: 0 additions & 93 deletions
This file was deleted.

0 commit comments

Comments
 (0)