|
| 1 | +# l^α regularization |
| 2 | + |
| 3 | +############################################################################################ |
| 4 | +### Types |
| 5 | +############################################################################################ |
| 6 | +""" |
| 7 | + struct SemNorm{α, T, TB} <: AbstractLoss{ExactHessian} |
| 8 | +
|
| 9 | +Regularization term that provides *Lᵅ* regularization of SEM parameters. |
| 10 | +The term implements the ``\\sum_{i=1\\ldots n} \\left| p_i \right|^{\\alpha}``, |
| 11 | +where *p_i*, *i = 1..n* is the vector of selected SEM parameter values. |
| 12 | +For `α = 1` it implements the *LASSO* (`SemLasso` alias type), and for |
| 13 | +`α = 2` it implements the *Ridge* regularization (`SemRidge` alias type). |
| 14 | +The term also allows specifying an optional affine transform (*A × p + b*) |
| 15 | +to apply to the parameters before the regularization. |
| 16 | +
|
| 17 | +# Constructors |
| 18 | + SemNorm(A::SparseMatrixCSC, b::Union{AbstractVector, Nothing} = nothing) |
| 19 | + SemNorm(spec::SemSpecification, params::AbstractVector, |
| 20 | + [A::AbstractMatrix = nothing], |
| 21 | + [b::AbstractVector = nothing]; |
| 22 | + α::Real) |
| 23 | +
|
| 24 | +# Arguments |
| 25 | +- `spec`: SEM model specification. |
| 26 | +- `params::Vector`: optional IDs (Symbols) or indices of parameters to regularize. |
| 27 | +- `A`: optional transformation matrix that defines how to transform the vector of parameter values |
| 28 | + before the regularization. If `params` is not specified, the transformation is applied |
| 29 | + to the entire parameters vector. |
| 30 | +- `b`: optional vector of intercepts to add to the transformed parameters. |
| 31 | +- `α`: regularization parameter, any positive real number is supported |
| 32 | +
|
| 33 | +# Examples |
| 34 | +```julia |
| 35 | +my_lasso = SemLasso(spec, [:λ₁, :λ₂, :ω₂₃]) |
| 36 | +my_trans_ridge = SemRidge(spec, [:λ₁, :λ₂, :ω₂₃], [1.0 1.0 0.0; 0.0 0.0 1.0], [-2.0, 0.0]) |
| 37 | +``` |
| 38 | +""" |
| 39 | +struct SemNorm{α, TP, TA, TB, TH} <: AbstractLoss{ExactHessian} |
| 40 | + param_inds::TP # indices of parameters to regularize |
| 41 | + A::TA # transformation/subsetting of the parameters |
| 42 | + At::TA # Aᵀ |
| 43 | + b::TB # optional transformed parameter intercepts |
| 44 | + H_inds::Vector{Int} # non-zero linear indices of Hessian |
| 45 | + H_vals::TH # non-zero values of Hessian |
| 46 | +end |
| 47 | + |
| 48 | +const SemRidge{TP, TA, TB, TH} = SemNorm{2, TP, TA, TB, TH} |
| 49 | +const SemLasso{TP, TA, TB, TH} = SemNorm{1, TP, TA, TB, TH} |
| 50 | + |
| 51 | +############################################################################ |
| 52 | +### Constructors |
| 53 | +############################################################################ |
| 54 | + |
| 55 | +function SemNorm{α}( |
| 56 | + param_inds::Union{AbstractVector, Nothing} = nothing, |
| 57 | + A::Union{AbstractMatrix, Nothing} = nothing, |
| 58 | + b::Union{AbstractVector, Nothing} = nothing, |
| 59 | +) where {α} |
| 60 | + isnothing(A) || isnothing(param_inds) || size(A, 2) == length(param_inds) || |
| 61 | + throw(DimensionMismatch("The transformation matrix columns ($(size(A, 2))) should match " * |
| 62 | + "the number of parameters to regularize ($(length(param_inds)))")) |
| 63 | + isnothing(b) || (isnothing(A) && isnothing(param_inds)) || |
| 64 | + (length(b) == (isnothing(A) ? length(param_inds) : size(A, 1))) || |
| 65 | + throw(DimensionMismatch("The intercept length ($(length(b))) should match the rows of " * |
| 66 | + "the transformation matrix ($(isnothing(A) ? "not specified" : size(A, 1)))" * |
| 67 | + " or the number of parameters to regularize ($(isnothing(param_inds) ? "not specified" : length(param_inds)))")) |
| 68 | + |
| 69 | + At = !isnothing(A) ? convert(typeof(A), A') : nothing |
| 70 | + H = !isnothing(A) ? α * At * A : nothing # FIXME |
| 71 | + if isnothing(H) |
| 72 | + H_inds = Vector{Int}() |
| 73 | + H_v = nothing |
| 74 | + else |
| 75 | + H_i, H_j, H_v = findnz(H) |
| 76 | + H_indmtx = LinearIndices(H) |
| 77 | + H_inds = [H_indmtx[i, j] for (i, j) in zip(H_i, H_j)] |
| 78 | + H_v = copy(H_v) |
| 79 | + end |
| 80 | + return SemNorm{α, typeof(param_inds), typeof(A), typeof(b), typeof(H_v)}( |
| 81 | + param_inds, A, At, b, H_inds, H_v) |
| 82 | +end |
| 83 | + |
| 84 | +function SemNorm{α}( |
| 85 | + spec::SemSpecification, |
| 86 | + params::AbstractVector, |
| 87 | + A::Union{AbstractMatrix, Nothing} = nothing, |
| 88 | + b::Union{AbstractVector, Nothing} = nothing, |
| 89 | +) where {α} |
| 90 | + param_inds = eltype(params) <: Symbol ? param_indices(spec, params) : params |
| 91 | + |
| 92 | + isnothing(A) || size(A, 2) == length(param_inds) || |
| 93 | + throw(DimensionMismatch("The transformation matrix columns ($(size(A, 2))) should match " * |
| 94 | + "the parameters to regularize ($(length(param_inds)))")) |
| 95 | + |
| 96 | + sel_params_mtx = eachrow_to_col(Float64, param_inds, nparams(spec)) |
| 97 | + if !isnothing(A) |
| 98 | + if A isa SparseMatrixCSC |
| 99 | + # for sparse matrices do parameters selection and multiplication in one step |
| 100 | + A = convert(typeof(A), A * sel_params_mtx) |
| 101 | + param_inds = nothing |
| 102 | + end |
| 103 | + else # if no matrix, just use selection matrix |
| 104 | + A = sel_params_mtx |
| 105 | + param_inds = nothing |
| 106 | + end |
| 107 | + return SemNorm{α}(param_inds, A, b) |
| 108 | +end |
| 109 | + |
| 110 | +SemNorm(args...; α::Real) = SemNorm{α}(args...) |
| 111 | + |
| 112 | +nparams(f::SemNorm) = size(f.A, 2) |
| 113 | + |
| 114 | +############################################################################################ |
| 115 | +### methods |
| 116 | +############################################################################################ |
| 117 | + |
| 118 | +elnorm(_::Val{α}) where {α} = x -> abs(x)^α |
| 119 | +elnorm(_::Val{1}) = abs |
| 120 | +elnorm(_::Val{2}) = abs2 |
| 121 | + |
| 122 | +elnorm(_::SemNorm{α}) where {α} = elnorm(Val(α)) |
| 123 | + |
| 124 | +# not multiplied by α, handled by mul! |
| 125 | +elnormgrad(_::Val{α}) where {α} = x -> abs(x)^(α - 1) * sign(x) |
| 126 | +elnormgrad(_::Val{2}) = identity |
| 127 | +elnormgrad(_::Val{1}) = sign |
| 128 | + |
| 129 | +elnormgrad(::SemNorm{α}) where {α} = elnormgrad(Val(α)) |
| 130 | + |
| 131 | +function evaluate!( |
| 132 | + objective, gradient, hessian, |
| 133 | + norm::SemNorm{α}, |
| 134 | + params, |
| 135 | +) where {α} |
| 136 | + if !isnothing(norm.param_inds) |
| 137 | + params = params[norm.param_inds] |
| 138 | + end |
| 139 | + if !isnothing(norm.A) |
| 140 | + trf_params = norm.A * params |
| 141 | + end |
| 142 | + if !isnothing(norm.b) |
| 143 | + trf_params .+= norm.b |
| 144 | + end |
| 145 | + |
| 146 | + obj = NaN |
| 147 | + isnothing(objective) || (obj = sum(elnorm(norm), trf_params)) |
| 148 | + |
| 149 | + if !isnothing(gradient) |
| 150 | + elgrad_trf_params = elnormgrad(norm).(trf_params) |
| 151 | + if !isnothing(norm.param_inds) |
| 152 | + mul!(params, norm.At, elgrad_trf_params, α, 0) |
| 153 | + fill!(gradient, 0) |
| 154 | + @inbounds gradient[norm.param_inds] .= params |
| 155 | + else |
| 156 | + mul!(gradient, norm.At, elgrad_trf_params, α, 0) |
| 157 | + end |
| 158 | + end |
| 159 | + |
| 160 | + if !isnothing(hessian) |
| 161 | + fill!(hessian, 0) |
| 162 | + if α === 1 |
| 163 | + # do nothing, hessian is zero |
| 164 | + elseif α === 2 |
| 165 | + @inbounds hessian[norm.H_inds] .= norm.H_vals |
| 166 | + else |
| 167 | + error("Hessian not implemented for α ≠ 1, 2") |
| 168 | + # TODO: Implement Hessian for other values of α |
| 169 | + end |
| 170 | + end |
| 171 | + return obj |
| 172 | +end |
0 commit comments