From 90a977298d15c6aac5323ca0115f388b37bc8048 Mon Sep 17 00:00:00 2001 From: rand0mwalker Date: Fri, 10 Jul 2015 17:45:55 +0800 Subject: [PATCH 1/3] Add a complete example. A small example is added in the examples/bcd.cpp. The vector x0 is not necessarily restored in each test. It may have something to do with the parameter \tau, since it need to be tuned for specific prlblems. --- examples/bcd.cpp | 215 +++++++++++++--------------------------- include/Solvers/BCD.hpp | 137 +++++-------------------- 2 files changed, 91 insertions(+), 261 deletions(-) diff --git a/examples/bcd.cpp b/examples/bcd.cpp index a1f89be..40a4ba4 100644 --- a/examples/bcd.cpp +++ b/examples/bcd.cpp @@ -3,37 +3,14 @@ #include #include #include +#include +#include +#include using namespace std; #include "Solvers/BCD.hpp" -//class MyFuncF : public FuncF -//{ -//public: -// Float eval(const Vector& x) -// { -// Float val = 0; -// int dim = x.size(); -// for(int i = 0; i < dim; ++i) -// { -// val += (x[i]-5.0) * (x[i]-5.0); -// } -// return val; -// } - -// Vector grad(const Vector& x) -// { -// int dim = x.size(); -// Vector g(dim); -// for(int i = 0; i < dim; ++i) -// { -// g[i] = 2.0 * (x[i]-5.0); -// } -// return g; -// } -//}; - class MyFuncF : public FuncF { public: @@ -51,155 +28,97 @@ class MyFuncF : public FuncF return g; } - - void loadA(const string& fn) - { - Float val; - ifstream ifs(fn.c_str()); - int m, n; - ifs >> m >> n; - cout << "m : " << m << ", n : " << n << endl; - A.resize(m,n); - for(int i = 0; i < m; ++i) - { - for(int j = 0; j < n; ++j) - { - ifs >> val; - A(i,j) = val; - } - } - ifs.close(); -// cout << "load A ok" << endl; -// cout << "A(m,n)" << A(m-1,n-1) << endl; - -// // colume normalize -// auto ncol = A.cols(); -// auto nrow = A.rows(); -// for(int ic = 0; ic < ncol; ++ ic) -// { -// A.col(ic).normalize(); -// Float cc = 0; -// for(int ir = 0; ir < nrow; ++ir) -// { -// auto v = A(ir, ic); -// cc += v * v; -// } -// cc = sqrt(cc); -// Float cc2 = 1.0 / cc; -// for(int ir = 0; ir < nrow; ++ir) -// A(ir,ic) *= cc2; -// } -// cout << "Normalized" << endl; - - } - -// Float get_lambdaMax() -// { -// Vector v = A.transpose() * b; -// Float vm = fabs(v(0)); -// for(int i = 1; i < v.size(); ++i) -// { -// Float vi = fabs(v(i)); -// if(vm < vi) -// vm = vi; -// } -// return vm; -// } - - void loadb(const string& fn) - { - Float val; - ifstream ifs(fn.c_str()); - int n; - ifs >> n; - b.resize(n); - for(int i = 0; i < n; ++i) - { - ifs >> val; - b(i) = val; - } - ifs.close(); -// cout << "load b ok" << endl; -// cout << "b(n)" << b(n-1) << endl; - } - - const Matrix& getA()const{return A;} - Matrix A; Vector b; }; -void put_vec(const Vector& x); -void load_vec(const string &fn, Vector& x); -Float load_tau(); -void set_vec(Vector& x, Float val); - +// generate sparse random x of size sz with nnz nonzeros +void gen_sp_rand_x(Vector& x, int sz, int nnz); +// inf norm of a vector +Float vec_inf_norm(const Vector& x); int main(int argc, char *argv[]) { - // set f, the differential part + + /// dim + int nrow = 15; + int ncol = 75; + + + /// set f, the differentiable part MyFuncF f; - f.loadA("A.txt"); - f.loadb("b.txt"); - int dim = f.getA().cols(); + f.A.setRandom(nrow, ncol); + for(int i = 0; i < f.A.cols(); ++i) + f.A.col(i).normalize(); - // set r_i, the non-smooth part - vector ri{FuncR(FuncR::Norm1, dim)}; + // x0, the ground truth + Vector x0; + Float spdensity = 0.05; + int nnz = max(static_cast(ncol*spdensity), 5); + gen_sp_rand_x(x0, ncol, nnz); + // b, the rhs + f.b = f.A * x0; + + + /// set r_i, the non-smooth part + int dim = f.A.cols(); + vector ri{FuncR(FuncR::Norm1, dim)}; - // set par + /// set par BCDSolverParam par; - par.tau = load_tau(); - par.maxIter = 80000; + // set tau + par.tau = 0.8; // tau should be tuned for specific problems + par.maxIter = 20000; par.objTol = 1e-6; par.xTol = 1e-6; // BCDSolver sol(par, f, ri); - Vector x0(dim); - set_vec(x0,0); - sol.init_x(x0); + Vector x_init(dim); + x_init.setZeros(); + sol.init_x(x_init); sol.solve(); - auto x_re = sol.result(); - put_vec(x_re); - + cout << "-----------------------------------" << endl; + cout << "x0 is " << x0 << endl; + } -void set_vec(Vector& x, Float val) -{ - for(auto& t:x) t = val; -} -void put_vec(const Vector& x) -{ - ofstream ofs("x_re.txt"); - ofs << x.size() << endl; - for(int i = 0; i < x.size(); ++i) - ofs << x(i) << endl; - ofs.close(); -} +/************************************************** + * + * sub routines + * + * + */ + -void load_vec(const string& fn, Vector& x) +Float vec_inf_norm(const Vector& x) { - Float val; - ifstream ifs(fn.c_str()); - int n; - ifs >> n; - x.resize(n); - for(int i = 0; i < n; ++i) + auto v = x[0]; + for(const auto& xi:x) { - ifs >> val; - x(i) = val; + if(v < xi) v = xi; + if(v < -xi) v = -xi; } - ifs.close(); + return v; } -Float load_tau() +void gen_sp_rand_x(Vector& x, int sz, int nnz) { - Float tau; - ifstream ifs("tau.txt"); - ifs >> tau; - ifs.close(); - return tau; + x.resize(sz); + x.setZeros(); + // rand engine + unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); + default_random_engine generator(seed); + // rand id + vector idx(sz); + for(int i = 0; i < sz; ++i) idx[i] = i; + shuffle(idx.begin(), idx.end(), generator); + // rand val + uniform_real_distribution distribution(0.0,1.0); + auto dice = std::bind(distribution, generator); + for(int i = 0; i < nnz; ++i) + x[idx[i]] = dice(); } diff --git a/include/Solvers/BCD.hpp b/include/Solvers/BCD.hpp index a458a0f..5992b54 100644 --- a/include/Solvers/BCD.hpp +++ b/include/Solvers/BCD.hpp @@ -2,10 +2,14 @@ /** + * BCD solver * * + * general form of the problem: + * + * f(x) + sum_{i=1}^{s}r_i(x) * - * + * where f(x) is differentiable while r_i(x) is not * */ @@ -27,8 +31,9 @@ using namespace std; typedef kernel::podscalar Float; -// user input: +/// function F: the differentiable part +/// need to be derived to overload the eval() and grad() method class FuncF { public: @@ -38,6 +43,8 @@ class FuncF inline Float operator()(const Vector& x) {return eval(x);} }; +/// function R: the non-smooth part +/// usually in the form of 1-norm or a indicator function class FuncR { public: // Function type @@ -118,17 +125,12 @@ class FuncR }; -// parameters +/// solver parameters typedef struct BCDSolverParam { // objective Float tau; - - // algorithm - Float ups; // factor in choosing direction - - // iteration int maxIter; // max iteration number Float errorTol; // @@ -143,7 +145,6 @@ typedef struct BCDSolverParam , errorTol(1e-6) , xTol(1e-6) , objTol(1e-6) - , ups(0.8) , verbose(0) , tau(1.0) {} @@ -207,10 +208,6 @@ class BCDSolver : public COPT::GeneralSolver Vector m_x, m_xprev, m_xprev2; Vector m_gradf; Vector m_descent, m_descentprev; - Matrix m_H; - - -// vector m_idTag; vector m_block; int m_dim; @@ -239,7 +236,6 @@ class BCDSolver : public COPT::GeneralSolver void paramUpdate(); void updateL(); vector m_xhat; - void updateLPerBlock(int kblock); // block control class BlockCtrl @@ -334,13 +330,9 @@ BCDSolver::BCDSolver(BCDSolverParam param, FuncF &f, vector &r_vec) m_xprev.resize(m_dim); m_xprev2.resize(m_dim); m_gradf.resize(m_dim); -// m_gradfprev.resize(m_dim); m_descent.resize(m_dim); m_descentprev.resize(m_dim); - m_H.resize(m_dim, m_dim); - m_H = Matrix::identity(m_dim, m_dim); -// m_idTag.resize(m_dim); - + // init value m_objprev = numeric_limits::max(); Vector tx(m_dim); @@ -430,107 +422,25 @@ void BCDSolver::paramUpdate() Float wt1 = (par0[i].t - 1.0) / par0[i].t; Float wt2 = sqrt(par0[i].L / par[i].L); par[i].w = min(wt1, wt2); - } - } -} - -void BCDSolver::updateL() -{ -// Vector g = m_f.grad(m_x); -// int id1, id2; -// auto& par = m_paramAuto; -// for(int kb = 0; kb < m_nblock; ++kb) -// { -// m_blockCtrl.block_id_interval(kb, id1, id2); -// Vector gk; -// get_block(g, id1, id2, gk); -// Matrix ggt = gk.mulTrans(gk); -// Float maxe = ggt.frobeniusNorm(); -// par[kb].L = 2.5 * maxe; -// } - - - - if (m_kIter == 0) - { - // init - for(int kb = 0; kb < m_nblock; ++kb) - { - m_paramAuto[kb].L = 1.0; -// updateLPerBlock(kb); - } - } - else - { - auto& par = m_paramAuto; - Vector xt, xi, dxi; - xt = m_xprev; - for(int kb = 0; kb < m_nblock; ++kb) - { - // block k - int ia, ib; - m_blockCtrl.block_id_interval(kb, ia, ib); - - // dxi - get_block(m_x,ia,ib, xi); - dxi = xi - m_xhat[kb]; - - // obj - set_block(xt, ia, ib, m_xhat[kb]); - Float fval0= m_f(xt); - set_block(xt, ia, ib, xi); - Float fval = m_f(xt); - - // gi - Vector g = m_f.grad(xt), gi; - get_block(g, ia, ib, gi); - - // vals - Float gDx = gi.dot(dxi); - Float dxNorm = 0.5 * dxi.dot(dxi); - cout << "start checking" << endl; - cin.get(); - while (fval > fval0 + gDx + dxNorm * par[kb].L) - { - par[kb].L *= 2.0; - cout << " enlarge " << endl; - cin.get(); - } - cout << "--------------------" << endl; - cout << " f : " << fval << endl; - cout << " f0 : " << fval0 << endl; - cout << " gdx: " << gDx << endl; - cout << " dxn: " << dxNorm << endl; - cout << " L : " << par[kb].L << endl; } - - -// for(int kb = 0; kb < m_nblock; ++kb) -// { -// updateLPerBlock(kb); -// } - } - } -void BCDSolver::updateLPerBlock(int kblock) +void BCDSolver::updateL() { Vector g = m_f.grad(m_x); int id1, id2; auto& par = m_paramAuto; - - m_blockCtrl.block_id_interval(kblock, id1, id2); - Vector gk; - get_block(g, id1, id2, gk); - Matrix ggt = gk.mulTrans(gk); - Float maxe = ggt.frobeniusNorm(); - par[kblock].L = 2.0 * maxe; -// cout << "maxe: " << maxe << endl; - // adjust per step - + for(int kb = 0; kb < m_nblock; ++kb) + { + m_blockCtrl.block_id_interval(kb, id1, id2); + Vector gk; + get_block(g, id1, id2, gk); + Matrix ggt = gk.mulTrans(gk); + Float maxe = ggt.frobeniusNorm(); + par[kb].L = maxe; + } - } @@ -596,7 +506,6 @@ void BCDSolver::doCompute() kernel::podscalar BCDSolver::doOneIteration() { // preproc -// m_xprev2 = m_xprev; m_xprev = m_x; m_objprev = m_obj; @@ -640,10 +549,13 @@ bool BCDSolver::terminalSatisfied()const return true; // gradient norm + // ... // step length + // ... // relative error + // ... } kernel::podscalar BCDSolver::objective()const @@ -663,4 +575,3 @@ void BCDSolver::print() #define BCDSOLVER #endif // BCDSOLVER - From 2289b81b6fc3c0c4df5d7356af469b4bddee9d3d Mon Sep 17 00:00:00 2001 From: rand0mwalker Date: Sun, 12 Jul 2015 23:27:58 +0800 Subject: [PATCH 2/3] Rewritten with functional style interface of copt. 1. rewrite using the functional style framework of copt; 2. add a constructor of solver class with a option parameter; 3. currently implementation put in the same file of declaration. --- examples/bcd.cpp | 117 ++++--- include/Base/Solver.hpp | 34 ++ include/Solvers/BCD.hpp | 759 +++++++++++++++++----------------------- 3 files changed, 419 insertions(+), 491 deletions(-) diff --git a/examples/bcd.cpp b/examples/bcd.cpp index 40a4ba4..3cd2e2a 100644 --- a/examples/bcd.cpp +++ b/examples/bcd.cpp @@ -6,32 +6,45 @@ #include #include #include +#include using namespace std; +using namespace std::placeholders; #include "Solvers/BCD.hpp" +/// +/// example +/// +/// min 0.5*||Ax-b||_2^2 + tau*||x||_1 +/// +/// here f(x) = 0.5*||Ax-b||_2^2 +/// and r(x) = ||x||_1 +/// +/// -class MyFuncF : public FuncF +/// f(x) +// parameter of f(x): A & b +struct f_param { -public: - Float eval(const Vector& x) - { - Vector r = A*x - b; - Float val = r.norm(); - return 0.5 * val * val; - } - - Vector grad(const Vector& x) - { - Matrix aat = A.transMulti(A); - Vector g = aat*x - A.transpose() * b; - return g; - } - Matrix A; Vector b; }; +// evaluation of f(x) +Float f_eval(const Vector& x, const f_param& fpar) +{ + Vector r = fpar.A*x - fpar.b; + Float val = r.norm(); + return 0.5 * val * val; +} +// gradient of f(x) +Vector f_grad(const Vector& x, const f_param& fpar) +{ + Matrix aat = fpar.A.transMulti(fpar.A); + Vector g = aat*x - fpar.A.transpose() * fpar.b; + return g; +} +/// tool functions // generate sparse random x of size sz with nnz nonzeros void gen_sp_rand_x(Vector& x, int sz, int nnz); // inf norm of a vector @@ -39,49 +52,53 @@ Float vec_inf_norm(const Vector& x); int main(int argc, char *argv[]) { - - /// dim + /// generate parameter for f + // matrix dimention int nrow = 15; int ncol = 75; - - - /// set f, the differentiable part - MyFuncF f; - - f.A.setRandom(nrow, ncol); - for(int i = 0; i < f.A.cols(); ++i) - f.A.col(i).normalize(); - + f_param fpar; + fpar.A.setRandom(nrow, ncol); + for(int i = 0; i < fpar.A.cols(); ++i) + fpar.A.col(i).normalize(); // x0, the ground truth Vector x0; Float spdensity = 0.05; int nnz = max(static_cast(ncol*spdensity), 5); - gen_sp_rand_x(x0, ncol, nnz); - + gen_sp_rand_x(x0, ncol, nnz); // b, the rhs - f.b = f.A * x0; + fpar.b = fpar.A * x0; + // bind parameter to function f + auto f_eval2 = std::bind(f_eval, _1, fpar); + auto f_grad2 = std::bind(f_grad, _1, fpar); + + /// set param for r + vector ri{FuncRInfo(FuncRInfo::Norm1, ncol)}; + + /// set tau + Float tau = 0.8; // tau should be tuned for specific problems + /// set solver param + BCDSolverParam sol_par; + sol_par.set_f(f_eval2, f_grad2); + sol_par.set_r(ri); + sol_par.set_tau(tau); - /// set r_i, the non-smooth part - int dim = f.A.cols(); - vector ri{FuncR(FuncR::Norm1, dim)}; + /// set solver option + BCDSolverOption sol_opt; + sol_opt.MaxIter = 20000; + sol_opt.xTol = 1e-6; - /// set par - BCDSolverParam par; - // set tau - par.tau = 0.8; // tau should be tuned for specific problems - par.maxIter = 20000; - par.objTol = 1e-6; - par.xTol = 1e-6; + /// set solver and solve + BCDSolver bcdsol(sol_par, sol_opt); + bcdsol.solve(); - // - BCDSolver sol(par, f, ri); - Vector x_init(dim); - x_init.setZeros(); - sol.init_x(x_init); - sol.solve(); - cout << "-----------------------------------" << endl; - cout << "x0 is " << x0 << endl; + /// outpout + cout.unsetf(ios::fixed); + cout.unsetf(ios::scientific); + cout << "x0: " << endl; + cout << x0 << endl; + cout << "result: " << endl; + cout << bcdsol.result() << endl; } @@ -112,11 +129,11 @@ void gen_sp_rand_x(Vector& x, int sz, int nnz) // rand engine unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); default_random_engine generator(seed); - // rand id + // rand index vector idx(sz); for(int i = 0; i < sz; ++i) idx[i] = i; shuffle(idx.begin(), idx.end(), generator); - // rand val + // rand value uniform_real_distribution distribution(0.0,1.0); auto dice = std::bind(distribution, generator); for(int i = 0; i < nnz; ++i) diff --git a/include/Base/Solver.hpp b/include/Base/Solver.hpp index fa71ee3..dbabb10 100644 --- a/include/Base/Solver.hpp +++ b/include/Base/Solver.hpp @@ -269,6 +269,15 @@ class Solver IterationFunction iterfunc = nullptr, TerminationFunction terfunc = nullptr, int printlevel = 1); + + Solver( + const Parameter& para, // the input parameter has be been given + const Option& op, + ObjectiveFunction obfunc = nullptr, + InitializationFunction initfunc=nullptr, + IterationFunction iterfunc = nullptr, + TerminationFunction terfunc = nullptr, + int printlevel = 1); /** solve the problem */ void solve(); @@ -342,6 +351,31 @@ Solver::Solver( { } +template +Solver::Solver( + const Parameter& para, + const Option& op, + ObjectiveFunction obfunc, + InitializationFunction initfunc, + IterationFunction iterfunc, + TerminationFunction terfunc, + int printlevel) + : + __para(para), + __op(op), + __print_level(printlevel), + __ob_func(obfunc), + __init_func(initfunc), + __iter_func(iterfunc), + __ter_func(terfunc), + __arg_to_re_func(argEqualToResult), + __ostream(std::cout), + __begin_print_func(&solverBeginPrint), + __iter_print_func(&iterationPrint), + __end_print_func(&solverEndPrint) +{ +} + template void Solver::solve() { diff --git a/include/Solvers/BCD.hpp b/include/Solvers/BCD.hpp index 5992b54..ea2b8ee 100644 --- a/include/Solvers/BCD.hpp +++ b/include/Solvers/BCD.hpp @@ -1,5 +1,5 @@ #ifndef BCDSOLVER - +#define BCDSOLVER /** * BCD solver @@ -26,552 +26,429 @@ typedef COPT::BasicOption BasicOption; #include #include #include +#include using namespace std; typedef kernel::podscalar Float; +inline void get_vec_block(const Vector& x0, int id1, int id2, Vector& x); +inline void set_vec_block(Vector& x0, int id1, int id2, const Vector& x); +inline Vector get_vec_block(const Vector& x0, int id1, int id2); +inline Float shrinkage(Float z, Float tau); -/// function F: the differentiable part -/// need to be derived to overload the eval() and grad() method -class FuncF -{ -public: - - virtual Float eval(const Vector& x) = 0; - virtual Vector grad(const Vector& x) = 0; - inline Float operator()(const Vector& x) {return eval(x);} -}; -/// function R: the non-smooth part -/// usually in the form of 1-norm or a indicator function -class FuncR +////////////////////////////////////////////////////// +/// block control +class BlockCtrl { -public: // Function type - - enum RType { Norm1, Indicator }; +public: -public: // interface + void setBlocking(const vector& blocking) + { + m_block = blocking; + m_nblock = m_block.size(); + m_blockIdx.resize(m_nblock+1); + m_blockIdx[0] = 0; + for(int i = 0 ; i < m_nblock; ++i) + { + m_blockIdx[i+1] = m_block[i]; + m_blockIdx[i+1] += m_blockIdx[i]; + } + is_set = true; + } - FuncR(RType type = Norm1, int dim = 1) - : m_type(type) - , m_dim(dim) + int n_block()const {return m_nblock;} + int block_dim(int i)const{return m_block[i];} + int full_dim()const { - assert(type == Norm1); - m_funcEval = &FuncR::func_norm1; + int dim = 0; + for(const auto& t:m_block) dim += t; + return dim; } - - FuncR(RType type, int dim, - const vector& lb, const vector& ub) - : m_type(type) - , m_dim(dim) - , m_lb(lb) - , m_ub(ub) + void block_id_interval(int i, int& a, int& b)const { - assert(type == Indicator); - m_funcEval = &FuncR::func_indicator_rectangel; + a = m_blockIdx[i]; + b = m_blockIdx[i+1]; } - Float eval(const Vector& x) {return (this->*m_funcEval)(x);} - inline Float operator()(const Vector& x) {return eval(x); } - - int dim()const{return m_dim;} - RType type()const{return m_type;} - - // getter & setter - void set_type(RType t) { m_type = t; } - void set_dim(int dim) { m_dim = dim; } - void set_bound(const vector& lb, const vector& ub) - { m_lb = lb; m_ub = ub; } - - -private: // typedef - - typedef Float (FuncR::*FUNC_EVAL)(const Vector& x); - -private: // property - - RType m_type; - int m_dim; - - - FUNC_EVAL m_funcEval; - vector m_lb, m_ub; // lower bound, upper bound - - -private: // inner implement - - Float func_norm1(const Vector& x) { return x.absNorm(); } - - Float func_indicator_rectangel(const Vector& x) + inline Vector get_vec_block(const Vector& x0, int kblock)const { - assert(m_lb.size() == x.size() && m_ub.size() == x.size()); - auto n = x.size(); - bool inside = true; - for(int i = 0; i < n; ++i) - { - if(x[i] < m_lb[i] || x[i] > m_ub[i]) - { - inside = false; - break; - } - } - if(inside) - return 0; - else - return numeric_limits::infinity(); + assert(is_set); + int ia, ib; + block_id_interval(kblock, ia, ib); + return ::get_vec_block(x0, ia, ib); + } + inline void set_vec_block(Vector& x0, int kblock, const Vector& x)const + { + assert(is_set); + int ia, ib; + block_id_interval(kblock, ia, ib); + ::set_vec_block(x0, ia, ib, x); } + bool is_set = false; + +private: + vector m_block; + vector m_blockIdx; + int m_nblock = 0; }; - -/// solver parameters -typedef struct BCDSolverParam +/// solver option +struct BCDSolverOption : public BasicOption { - // objective - Float tau; - - // iteration - int maxIter; // max iteration number - Float errorTol; // - Float xTol; - Float objTol; - int verbose; +/// from base class +// int MaxIter; // +// Float Threshold; // - - // defaults - BCDSolverParam() - : maxIter(100) - , errorTol(1e-6) + /// added member + Float xTol; // tolerance of change in x + Float objTol; // tolerance of change in objective + + BCDSolverOption() + : BasicOption() , xTol(1e-6) , objTol(1e-6) - , verbose(0) - , tau(1.0) {} - -}BCDSolverParam; - +}; -class BCDSolver : public COPT::GeneralSolver +// r - the non-smooth part +struct FuncRInfo { -public: + // type of function r + enum RType { Norm1, Indicator }; - BCDSolver( - BCDSolverParam param, - FuncF& f, - vector& r_vec); + RType type; + int dim; + FuncRInfo(RType tp = Norm1, int dm = 1) + : type(tp) + , dim(dm) + {} +}; + +/// solver parameters +class BCDSolverParam : public BasicParameter +{ public: + + /// objective + // tau + Float m_tau; + // f - the differentiable part + function m_fEval; + function m_fGrad; + - void init_x(const Vector& x); - Float objective(const Vector& x)const; - - /** get the result */ - const kernel::Vector& result() const; - -private: // - - //// - /** vritual function that has to be implemented by derived classes */ - /** something happens in the beginning of solving */ - void solvingBegin(); - - /** the real solving part */ -// void doSolve(); - - /** the real computation part */ - void doCompute(); - - /** whether the terminal satisfied */ - bool terminalSatisfied() const; - - /** an iteration - * the estimated error is returned for iteration - */ - podscalar doOneIteration(); - - /** compute the objective function */ - podscalar objective() const; - - - -private: // basic param - - BCDSolverParam m_param; - FuncF& m_f; - vector& m_rVec; - -private: // opt vars - - Float m_obj, m_objprev; - Float m_step; - Vector m_x, m_xprev, m_xprev2; - Vector m_gradf; - Vector m_descent, m_descentprev; +private: + /// ri + // vector of ri + vector m_ri; + // evaluation + Float ri_eval(const FuncRInfo& r, const Vector& x)const + { + switch(r.type) + { + case FuncRInfo::RType::Norm1: + return x.absNorm(); + break; + case FuncRInfo::RType::Indicator: + return 0; + break; + } + } - vector m_block; - int m_dim; - int m_nblock; +public: + /// iteration + int verbose; + int kIter; -private: // iteration ctrl + /// intermediate variables + Vector xprev; + Float obj, objprev; + + BlockCtrl m_blockCtrl; - int m_kIter; - bool m_isInit; + /// auto updated inner param + struct paramAuto + { + Float t, w, L; + }; + vector m_paramAuto, m_paramAutoPrev; -private: // inner tools +public: - inline Float errorEst(); + /// defaults + BCDSolverParam() + : BasicParameter() + , verbose(0) + , m_tau(1.0) + , kIter(0) + {} - void solve_subblock(int kblock); + BCDSolverParam( + const function& f_eval, + const function& f_grad, + const vector& ri, + Float tau + ) + : BasicParameter() + , verbose(0) + , m_tau(1.0) + , kIter(0) + { + set_f(f_eval, f_grad); + set_r(ri); + set_tau(tau); + } - // auto updated parameters - typedef struct paramAuto + void set_f( + const function& f_eval, + const function& f_grad) { - Float t; - Float w; - Float L; - }paramAuto; - vector m_paramAuto, m_paramAutoPrev; - void paramInit(); - void paramUpdate(); - void updateL(); - vector m_xhat; + this->m_fEval = f_eval; + this->m_fGrad = f_grad; + } - // block control - class BlockCtrl + void set_r(const vector& ri) { - public: - - void setBlocking(const vector& blocking) - { - m_block = blocking; - m_nblock = m_block.size(); - m_blockIdx.resize(m_nblock+1); - m_blockIdx[0] = 0; - for(int i = 0 ; i < m_nblock; ++i) - { - m_blockIdx[i+1] = m_block[i]; - m_blockIdx[i+1] += m_blockIdx[i]; - } - } - - int n_block()const {return m_nblock;} - int block_dim(int i)const{return m_block[i];} - void block_id_interval(int i, int& a, int& b)const + this->m_ri = ri; + vector blocking(ri.size()); + for(int i = 0; i < ri.size(); ++i) { - a = m_blockIdx[i]; - b = m_blockIdx[i+1]; + blocking[i] = ri[i].dim; } - - private: - vector m_block; - vector m_blockIdx; - int m_nblock; - }; - BlockCtrl m_blockCtrl; - - static void get_block(const Vector& x0, int id1, int id2, Vector& x) - { - x.resize(id2-id1); - for(int i = id1, k = 0; i < id2; ++i, ++k) - x[k] = x0[i]; + m_blockCtrl.setBlocking(blocking); } - static void set_block(Vector& x0, int id1, int id2, const Vector& x) + + void set_tau(Float tau) { - for(int i = id1, k = 0; i < id2; ++i, ++k) - x0[i] = x[k]; + this->m_tau = tau; } - Float shrinkage(Float z, Float tau) + inline Float f_eval(const Vector& x)const{return m_fEval(x);} + inline Vector f_grad(const Vector& x)const{return m_fGrad(x);} + inline Float r_eval(const Vector& x, int kblock)const { - Float t = 1.0 - tau / fabs(z); - return t > 0.0 ? t * z : 0.0; + return ri_eval(m_ri[kblock], m_blockCtrl.get_vec_block(x,kblock)); } - void print(); - }; -/////////////////////////////////////////////////////////////////////////////////////////// -/* - * - * Implementation - * - * - * - * - */ +void updateL(const Vector& x, BCDSolverParam& par); +void paramInit(const Vector& x, BCDSolverParam& par); +void paramUpdate(const Vector& x, BCDSolverParam& par); +void solve_subblock(Vector& x, BCDSolverParam& par, int kblock); +Float errorEst(const Vector& x, const BCDSolverParam& par); -BCDSolver::BCDSolver(BCDSolverParam param, FuncF &f, vector &r_vec) - : COPT::GeneralSolver(param.maxIter,param.xTol) - , m_param(param) - , m_f(f) - , m_rVec(r_vec) - , m_nblock(0) - , m_dim(0) - , m_kIter(0) - , m_isInit(false) - -{ - m_nblock = r_vec.size(); - m_block.resize(m_nblock); - m_paramAuto.resize(m_nblock); - m_dim = 0; - for(int i = 0; i < m_nblock; ++i) - { - m_block[i] = r_vec[i].dim(); - m_dim += m_block[i]; - } - m_blockCtrl.setBlocking(m_block); - m_xhat.resize(m_nblock); - - // resize - m_x.resize(m_dim); - m_xprev.resize(m_dim); - m_xprev2.resize(m_dim); - m_gradf.resize(m_dim); - m_descent.resize(m_dim); - m_descentprev.resize(m_dim); - - // init value - m_objprev = numeric_limits::max(); - Vector tx(m_dim); - for(auto& t:tx) t = 100.0 * m_param.xTol; - m_xprev = m_x + tx; - m_xprev2 = m_xprev + tx; - - paramInit(); -} +////////////////////// -Float BCDSolver::objective(const Vector &x) const +Float bcd_objective(const Vector& x, const BCDSolverParam& par) { Float obj = 0; - if (m_nblock == 1) - obj = m_rVec[0](x); - else - { - for(int i = 0; i < m_nblock; ++i) - { - Vector xi; - int ia, ib; - m_blockCtrl.block_id_interval(i, ia, ib); - get_block(x, ia, ib, xi); - obj += m_rVec[i](xi); - } - } - - obj *= m_param.tau; - obj += m_f(x); + // r part + for(int i = 0; i < par.m_blockCtrl.n_block(); ++i) + obj += par.r_eval(x, i); + // f part + obj += par.f_eval(x); return obj; } -void BCDSolver::init_x(const Vector& x) +/// +Float bcd_oneIteration(Vector& x, BCDSolverParam& par) { - if (x.size() == m_dim) + par.xprev = x; + par.objprev = par.obj; + // update x block-wise + auto nblock = par.m_blockCtrl.n_block(); + for(int kb = 0; kb < nblock; ++kb) { - m_x = x; - m_obj = objective(m_x); - m_isInit = true; - } - else - { - cout << "error in init: vector dimension inconsistent" << endl; - return; + solve_subblock(x, par, kb); } - - if (m_param.verbose) - print(); + + // + ++par.kIter; + paramUpdate(x, par); + par.obj = bcd_objective(x, par); + + return errorEst(x, par); } - - -void BCDSolver::paramInit() +bool bcd_terminate(const Vector& x, const BCDSolverOption& o, const BCDSolverParam& par) { - auto& par = m_paramAuto; - // t - for(int k = 0; k < m_nblock; ++k) + // iter number + if(par.kIter > o.MaxIter) { - par[k].t = 1.0; + return true; } - // L - updateL(); + // x change + Float e_x = (x - par.xprev).norm(); + if(e_x < o.xTol) + { + return true; + } -} - -void BCDSolver::paramUpdate() -{ - auto& par = m_paramAuto; - auto& par0 = m_paramAutoPrev; - par0 = par; + // obj change + // ... - // t - for(int i = 0; i < m_nblock; ++i) - { - par[i].t = 0.5*sqrt(1.0+4.0*par[i].t*par[i].t); - } + // gradient norm, step length, relative error ... - // L - updateL(); - // w - if(m_kIter > 0) - { - for(int i = 0; i < m_nblock; ++i) - { - Float wt1 = (par0[i].t - 1.0) / par0[i].t; - Float wt2 = sqrt(par0[i].L / par[i].L); - par[i].w = min(wt1, wt2); - } - } -} - -void BCDSolver::updateL() -{ - Vector g = m_f.grad(m_x); - int id1, id2; - auto& par = m_paramAuto; - for(int kb = 0; kb < m_nblock; ++kb) - { - m_blockCtrl.block_id_interval(kb, id1, id2); - Vector gk; - get_block(g, id1, id2, gk); - Matrix ggt = gk.mulTrans(gk); - Float maxe = ggt.frobeniusNorm(); - par[kb].L = maxe; - } + return false; } - -void BCDSolver::solve_subblock(int kblock) +void bcd_solverInit(Vector& x, BCDSolverParam& par) { - const auto& ib = kblock; - const auto& par = m_paramAuto; - int id1, id2; - m_blockCtrl.block_id_interval(ib,id1,id2); - Vector xi; - get_block(m_x, id1, id2, xi); - if(m_kIter > 1) + int fd = par.m_blockCtrl.full_dim(); + if (fd > 0) { - Vector xi0; - get_block(m_xprev, id1, id2, xi0); - xi = xi + par[ib].w*(xi - xi0); - set_block(m_x, id1, id2, xi); - m_xhat[kblock] = xi; + x.resize(fd); + x.setZeros(); + par.obj = bcd_objective(x, par); + + // + Float incr = 0.1*numeric_limits::max(); + par.objprev = par.obj + incr; + par.xprev = x; + par.xprev[0] = incr; + paramInit(x, par); } - - - // shrinkage for each element - Vector grad = m_f.grad(m_x); - Vector gi; - get_block(grad, id1, id2, gi); - - int n = id2 - id1; - Float tL = 1.0 / par[ib].L; - Vector tg = xi - tL * gi; - for(int i = 0; i < n; ++i) + else { - xi[i] = shrinkage(tg[i], tL); + cerr << "Invalid problem, please check param setting!" << endl; } - set_block(m_x, id1, id2, xi); } +typedef COPT::Solver tagBCDSolver; +class BCDSolver : public tagBCDSolver +{ +public: + BCDSolver(const BCDSolverParam& bcd_par, + const BCDSolverOption& bcd_opt) + : tagBCDSolver( + bcd_par, + bcd_opt, + bcd_objective, + bcd_solverInit, + bcd_oneIteration, + bcd_terminate) + {} +}; + + +///////////////////////////////////////////////////////////////// -void BCDSolver::solvingBegin() +inline void get_vec_block(const Vector& x0, int id1, int id2, Vector& x) { - if(!m_isInit) - { - m_x.setZeros(); - m_obj = objective(m_x); - } - - // init xhat (x hat) - for(int i = 0; i < m_nblock; ++i) - { - int ia, ib; - m_blockCtrl.block_id_interval(i, ia, ib); - Vector xi; - get_block(m_x, ia, ib, xi); - m_xhat[i] = xi; - } + x.resize(id2-id1); + for(int i = id1, k = 0; i < id2; ++i, ++k) + x[k] = x0[i]; } -void BCDSolver::doCompute() +inline void set_vec_block(Vector& x0, int id1, int id2, const Vector& x) { - + for(int i = id1, k = 0; i < id2; ++i, ++k) + x0[i] = x[k]; } -kernel::podscalar BCDSolver::doOneIteration() +inline Vector get_vec_block(const Vector& x0, int id1, int id2) { - // preproc - m_xprev = m_x; - m_objprev = m_obj; + Vector x(id2-id1); + for(int i = id1, k = 0; i < id2; ++i, ++k) + x[k] = x0[i]; + return x; +} + + +inline Float shrinkage(Float z, Float tau) +{ + Float t = 1.0 - tau / fabs(z); + return t > 0.0 ? t * z : 0.0; +} - // solve - for(int kblock = 0; kblock < m_nblock; ++kblock) +void updateL(const Vector& x, BCDSolverParam& par) +{ + auto& tpar = par.m_paramAuto; + int nblock = par.m_blockCtrl.n_block(); + Vector g = par.f_grad(x); + for(int i = 0; i < nblock; ++i) { - solve_subblock(kblock); + Vector gi = par.m_blockCtrl.get_vec_block(g, i); + Matrix ggt = gi.mulTrans(gi); + Float maxe = ggt.frobeniusNorm(); + tpar[i].L = maxe; } - ++m_kIter; - paramUpdate(); - - // postproc - m_obj = objective(m_x); - - - if (m_param.verbose) - print(); - - return errorEst(); } - -Float BCDSolver::errorEst() +void paramInit(const Vector& x, BCDSolverParam& par) { - return (m_x - m_xprev).norm(); + int nblock = par.m_blockCtrl.n_block(); + par.m_paramAuto.resize(nblock); + // init t + for(int i = 0;i < nblock; ++i) + par.m_paramAuto[i].t = 1.0; + // init L + updateL(x, par); } - -bool BCDSolver::terminalSatisfied()const +void paramUpdate(const Vector& x, BCDSolverParam& par) { - // iter number - if (__iter_num > __max_iteration) - return true; - - // x change - Float e_x = (m_x - m_xprev).norm(); - if (e_x > m_param.xTol) - return true; + par.m_paramAutoPrev = par.m_paramAuto; + const auto& tpar0 = par.m_paramAutoPrev; + const auto nblock = par.m_blockCtrl.n_block(); + auto& tpar = par.m_paramAuto; - // obj change - Float e_obj = fabs(m_obj - m_objprev); - if (e_obj > m_param.objTol) - return true; - - // gradient norm - // ... - - // step length - // ... + // update t + for(int i = 0; i < nblock; ++i) + { + auto& ti = tpar[i].t; + ti = 0.5*sqrt( 1.0 + 4.0*ti*ti ); + } + // update L + updateL(x, par); + // update w + if(par.kIter > 0) + { + for(int i = 0; i < nblock; ++i) + { + Float wt1 = (tpar0[i].t - 1.0) / tpar0[i].t; + Float wt2 = sqrt(tpar0[i].L / tpar[i].L); + tpar[i].w = min(wt1, wt2); + } + } - // relative error - // ... } -kernel::podscalar BCDSolver::objective()const +void solve_subblock(Vector& x, BCDSolverParam& par, int kblock) { - return objective(m_x); + const auto& tpar = par.m_paramAuto; + + Vector xi = par.m_blockCtrl.get_vec_block(x, kblock); + if(par.kIter > 1) + { + Vector xi0 = par.m_blockCtrl.get_vec_block(par.xprev, kblock); + xi = xi + tpar[kblock].w*(xi - xi0); + par.m_blockCtrl.set_vec_block(x, kblock, xi); + } + + // shrinkage for each element + Vector g = par.f_grad(x); + Vector gi = par.m_blockCtrl.get_vec_block(g, kblock); + int dim = gi.size(); + Float tL = 1.0 / tpar[kblock].L; + Vector tgi = xi - tL * gi; + for(int i = 0; i < dim; ++i) + { + xi[i] = shrinkage(tgi[i], tL); + } + par.m_blockCtrl.set_vec_block(x, kblock, xi); } - -const kernel::Vector& BCDSolver::result() const +Float errorEst(const Vector& x, const BCDSolverParam& par) { - return m_x; + return (x - par.xprev).norm(); } -void BCDSolver::print() -{ - cout << "Iter: " << m_kIter << " - x: " << m_x << " - obj: " << m_obj << endl; -} -#define BCDSOLVER #endif // BCDSOLVER From 70467c4efe4739af93e2d901267961d6c4b628fd Mon Sep 17 00:00:00 2001 From: rand0mwalker Date: Tue, 21 Jul 2015 21:47:45 +0800 Subject: [PATCH 3/3] change naming convention. 1. use the camel-case naming convention for classes and functions. 2. separete the delaration and implementation of BCDSovler. The declaration is kept in file include/Solvers/BCD.hpp, while the implementation is put in a new file include/Solvers/BCDImpl.hpp. todo: a tuning strategy for the update of parameter L. --- examples/bcd.cpp | 46 ++-- include/Solvers/BCD.hpp | 415 +++++------------------------------- include/Solvers/BCDImpl.hpp | 395 ++++++++++++++++++++++++++++++++++ 3 files changed, 471 insertions(+), 385 deletions(-) create mode 100644 include/Solvers/BCDImpl.hpp diff --git a/examples/bcd.cpp b/examples/bcd.cpp index 3cd2e2a..246c822 100644 --- a/examples/bcd.cpp +++ b/examples/bcd.cpp @@ -24,20 +24,20 @@ using namespace std::placeholders; /// f(x) // parameter of f(x): A & b -struct f_param +struct FParam { Matrix A; Vector b; }; // evaluation of f(x) -Float f_eval(const Vector& x, const f_param& fpar) +Float fEval(const Vector& x, const FParam& fpar) { Vector r = fpar.A*x - fpar.b; Float val = r.norm(); return 0.5 * val * val; } // gradient of f(x) -Vector f_grad(const Vector& x, const f_param& fpar) +Vector fGrad(const Vector& x, const FParam& fpar) { Matrix aat = fpar.A.transMulti(fpar.A); Vector g = aat*x - fpar.A.transpose() * fpar.b; @@ -46,51 +46,51 @@ Vector f_grad(const Vector& x, const f_param& fpar) /// tool functions // generate sparse random x of size sz with nnz nonzeros -void gen_sp_rand_x(Vector& x, int sz, int nnz); +void generateSparseRandomX(Vector& x, int sz, int nnz); // inf norm of a vector -Float vec_inf_norm(const Vector& x); +Float vectorInfNorm(const Vector& x); int main(int argc, char *argv[]) { /// generate parameter for f // matrix dimention - int nrow = 15; - int ncol = 75; - f_param fpar; - fpar.A.setRandom(nrow, ncol); + int nRow = 15; + int nCol = 75; + FParam fpar; + fpar.A.setRandom(nRow, nCol); for(int i = 0; i < fpar.A.cols(); ++i) fpar.A.col(i).normalize(); // x0, the ground truth Vector x0; Float spdensity = 0.05; - int nnz = max(static_cast(ncol*spdensity), 5); - gen_sp_rand_x(x0, ncol, nnz); + int nnz = max(static_cast(nCol*spdensity), 5); + generateSparseRandomX(x0, nCol, nnz); // b, the rhs fpar.b = fpar.A * x0; // bind parameter to function f - auto f_eval2 = std::bind(f_eval, _1, fpar); - auto f_grad2 = std::bind(f_grad, _1, fpar); + auto f_eval2 = std::bind(fEval, _1, fpar); + auto f_grad2 = std::bind(fGrad, _1, fpar); /// set param for r - vector ri{FuncRInfo(FuncRInfo::Norm1, ncol)}; + vector ri{FuncRInfo(FuncRInfo::Norm1, nCol)}; /// set tau Float tau = 0.8; // tau should be tuned for specific problems /// set solver param BCDSolverParam sol_par; - sol_par.set_f(f_eval2, f_grad2); - sol_par.set_r(ri); - sol_par.set_tau(tau); + sol_par.setF(f_eval2, f_grad2); + sol_par.setR(ri); + sol_par.setTau(tau); /// set solver option BCDSolverOption sol_opt; sol_opt.MaxIter = 20000; - sol_opt.xTol = 1e-6; + sol_opt.XTol = 1e-6; /// set solver and solve - BCDSolver bcdsol(sol_par, sol_opt); - bcdsol.solve(); + BCDSolver bcd_sol(sol_par, sol_opt); + bcd_sol.solve(); /// outpout cout.unsetf(ios::fixed); @@ -98,7 +98,7 @@ int main(int argc, char *argv[]) cout << "x0: " << endl; cout << x0 << endl; cout << "result: " << endl; - cout << bcdsol.result() << endl; + cout << bcd_sol.result() << endl; } @@ -111,7 +111,7 @@ int main(int argc, char *argv[]) */ -Float vec_inf_norm(const Vector& x) +Float vectorInfNorm(const Vector& x) { auto v = x[0]; for(const auto& xi:x) @@ -122,7 +122,7 @@ Float vec_inf_norm(const Vector& x) return v; } -void gen_sp_rand_x(Vector& x, int sz, int nnz) +void generateSparseRandomX(Vector& x, int sz, int nnz) { x.resize(sz); x.setZeros(); diff --git a/include/Solvers/BCD.hpp b/include/Solvers/BCD.hpp index ea2b8ee..fd49715 100644 --- a/include/Solvers/BCD.hpp +++ b/include/Solvers/BCD.hpp @@ -31,101 +31,46 @@ using namespace std; typedef kernel::podscalar Float; +////////////////////////////////////////////////////// -inline void get_vec_block(const Vector& x0, int id1, int id2, Vector& x); -inline void set_vec_block(Vector& x0, int id1, int id2, const Vector& x); -inline Vector get_vec_block(const Vector& x0, int id1, int id2); -inline Float shrinkage(Float z, Float tau); - +/// r - the non-smooth part +struct FuncRInfo +{ + // type of function r + enum RType { Norm1, Indicator }; + + RType type; + int dim; + + FuncRInfo(RType tp = Norm1, int dm = 1); +}; -////////////////////////////////////////////////////// /// block control class BlockCtrl { public: - void setBlocking(const vector& blocking) - { - m_block = blocking; - m_nblock = m_block.size(); - m_blockIdx.resize(m_nblock+1); - m_blockIdx[0] = 0; - for(int i = 0 ; i < m_nblock; ++i) - { - m_blockIdx[i+1] = m_block[i]; - m_blockIdx[i+1] += m_blockIdx[i]; - } - is_set = true; - } - - int n_block()const {return m_nblock;} - int block_dim(int i)const{return m_block[i];} - int full_dim()const - { - int dim = 0; - for(const auto& t:m_block) dim += t; - return dim; - } - void block_id_interval(int i, int& a, int& b)const - { - a = m_blockIdx[i]; - b = m_blockIdx[i+1]; - } - - inline Vector get_vec_block(const Vector& x0, int kblock)const - { - assert(is_set); - int ia, ib; - block_id_interval(kblock, ia, ib); - return ::get_vec_block(x0, ia, ib); - } - inline void set_vec_block(Vector& x0, int kblock, const Vector& x)const - { - assert(is_set); - int ia, ib; - block_id_interval(kblock, ia, ib); - ::set_vec_block(x0, ia, ib, x); - } - - bool is_set = false; + inline void setBlocking(const vector& blocking); + inline int nBlock()const; + inline int blockDimension(int i)const; + inline int fullDimension()const; + inline void blockIdexInterval(int i, int& a, int& b)const; + inline Vector getVectorBlock(const Vector& x0, int kblock)const; + inline void setVectorBlock(Vector& x0, int kblock, const Vector& x)const; private: - vector m_block; - vector m_blockIdx; - int m_nblock = 0; + bool __is_set = false; + int __nblock = 0; + vector __block; + vector __blockIdx; }; /// solver option struct BCDSolverOption : public BasicOption { -/// from base class -// int MaxIter; // -// Float Threshold; // - /// added member - Float xTol; // tolerance of change in x - Float objTol; // tolerance of change in objective - - BCDSolverOption() - : BasicOption() - , xTol(1e-6) - , objTol(1e-6) - {} -}; - -// r - the non-smooth part -struct FuncRInfo -{ - // type of function r - enum RType { Norm1, Indicator }; - - RType type; - int dim; - - FuncRInfo(RType tp = Norm1, int dm = 1) - : type(tp) - , dim(dm) - {} + Float XTol = 1e-6; // tolerance of change in x + Float ObjTol= 1e-6; // tolerance of change in objective }; /// solver parameters @@ -135,320 +80,66 @@ class BCDSolverParam : public BasicParameter /// objective // tau - Float m_tau; + Float __tau; // f - the differentiable part - function m_fEval; - function m_fGrad; + function __fEval; + function __fGrad; - private: /// ri // vector of ri - vector m_ri; + vector __ri; // evaluation - Float ri_eval(const FuncRInfo& r, const Vector& x)const - { - switch(r.type) - { - case FuncRInfo::RType::Norm1: - return x.absNorm(); - break; - case FuncRInfo::RType::Indicator: - return 0; - break; - } - } - + Float riEval(const FuncRInfo& r, const Vector& x)const; + public: /// iteration - int verbose; - int kIter; + int __verbose; + int __k_iter; /// intermediate variables - Vector xprev; - Float obj, objprev; - - BlockCtrl m_blockCtrl; + Vector __x_prev; + Float __obj, __obj_prev; + BlockCtrl __block_ctrl; /// auto updated inner param - struct paramAuto + struct ParamAuto { Float t, w, L; }; - vector m_paramAuto, m_paramAutoPrev; + vector __param_auto, __param_auto_prev; public: /// defaults - BCDSolverParam() - : BasicParameter() - , verbose(0) - , m_tau(1.0) - , kIter(0) - {} - + BCDSolverParam(); BCDSolverParam( - const function& f_eval, - const function& f_grad, + const function& fEval, + const function& fGrad, const vector& ri, - Float tau - ) - : BasicParameter() - , verbose(0) - , m_tau(1.0) - , kIter(0) - { - set_f(f_eval, f_grad); - set_r(ri); - set_tau(tau); - } + Float tau); - void set_f( - const function& f_eval, - const function& f_grad) - { - this->m_fEval = f_eval; - this->m_fGrad = f_grad; - } - - void set_r(const vector& ri) - { - this->m_ri = ri; - vector blocking(ri.size()); - for(int i = 0; i < ri.size(); ++i) - { - blocking[i] = ri[i].dim; - } - m_blockCtrl.setBlocking(blocking); - } - - void set_tau(Float tau) - { - this->m_tau = tau; - } - - inline Float f_eval(const Vector& x)const{return m_fEval(x);} - inline Vector f_grad(const Vector& x)const{return m_fGrad(x);} - inline Float r_eval(const Vector& x, int kblock)const - { - return ri_eval(m_ri[kblock], m_blockCtrl.get_vec_block(x,kblock)); - } + void setF( + const function& fEval, + const function& fGrad); + void setR(const vector& ri); + void setTau(Float tau); + inline Float fEval(const Vector& x)const; + inline Vector fGrad(const Vector& x)const; + inline Float rEval(const Vector& x, int kblock)const; }; -void updateL(const Vector& x, BCDSolverParam& par); -void paramInit(const Vector& x, BCDSolverParam& par); -void paramUpdate(const Vector& x, BCDSolverParam& par); -void solve_subblock(Vector& x, BCDSolverParam& par, int kblock); -Float errorEst(const Vector& x, const BCDSolverParam& par); - -////////////////////// - -Float bcd_objective(const Vector& x, const BCDSolverParam& par) -{ - Float obj = 0; - // r part - for(int i = 0; i < par.m_blockCtrl.n_block(); ++i) - obj += par.r_eval(x, i); - // f part - obj += par.f_eval(x); - return obj; -} - -/// -Float bcd_oneIteration(Vector& x, BCDSolverParam& par) -{ - par.xprev = x; - par.objprev = par.obj; - // update x block-wise - auto nblock = par.m_blockCtrl.n_block(); - for(int kb = 0; kb < nblock; ++kb) - { - solve_subblock(x, par, kb); - } - - // - ++par.kIter; - paramUpdate(x, par); - par.obj = bcd_objective(x, par); - - return errorEst(x, par); -} - -bool bcd_terminate(const Vector& x, const BCDSolverOption& o, const BCDSolverParam& par) -{ - // iter number - if(par.kIter > o.MaxIter) - { - return true; - } - - // x change - Float e_x = (x - par.xprev).norm(); - if(e_x < o.xTol) - { - return true; - } - - // obj change - // ... - - // gradient norm, step length, relative error ... - - - - return false; -} - -void bcd_solverInit(Vector& x, BCDSolverParam& par) -{ - int fd = par.m_blockCtrl.full_dim(); - if (fd > 0) - { - x.resize(fd); - x.setZeros(); - par.obj = bcd_objective(x, par); - - // - Float incr = 0.1*numeric_limits::max(); - par.objprev = par.obj + incr; - par.xprev = x; - par.xprev[0] = incr; - paramInit(x, par); - } - else - { - cerr << "Invalid problem, please check param setting!" << endl; - } - -} +/// bcd solver typedef COPT::Solver tagBCDSolver; class BCDSolver : public tagBCDSolver { public: BCDSolver(const BCDSolverParam& bcd_par, - const BCDSolverOption& bcd_opt) - : tagBCDSolver( - bcd_par, - bcd_opt, - bcd_objective, - bcd_solverInit, - bcd_oneIteration, - bcd_terminate) - {} + const BCDSolverOption& bcd_opt); }; - -///////////////////////////////////////////////////////////////// - -inline void get_vec_block(const Vector& x0, int id1, int id2, Vector& x) -{ - x.resize(id2-id1); - for(int i = id1, k = 0; i < id2; ++i, ++k) - x[k] = x0[i]; -} - -inline void set_vec_block(Vector& x0, int id1, int id2, const Vector& x) -{ - for(int i = id1, k = 0; i < id2; ++i, ++k) - x0[i] = x[k]; -} - -inline Vector get_vec_block(const Vector& x0, int id1, int id2) -{ - Vector x(id2-id1); - for(int i = id1, k = 0; i < id2; ++i, ++k) - x[k] = x0[i]; - return x; -} - - -inline Float shrinkage(Float z, Float tau) -{ - Float t = 1.0 - tau / fabs(z); - return t > 0.0 ? t * z : 0.0; -} - -void updateL(const Vector& x, BCDSolverParam& par) -{ - auto& tpar = par.m_paramAuto; - int nblock = par.m_blockCtrl.n_block(); - Vector g = par.f_grad(x); - for(int i = 0; i < nblock; ++i) - { - Vector gi = par.m_blockCtrl.get_vec_block(g, i); - Matrix ggt = gi.mulTrans(gi); - Float maxe = ggt.frobeniusNorm(); - tpar[i].L = maxe; - } -} -void paramInit(const Vector& x, BCDSolverParam& par) -{ - int nblock = par.m_blockCtrl.n_block(); - par.m_paramAuto.resize(nblock); - // init t - for(int i = 0;i < nblock; ++i) - par.m_paramAuto[i].t = 1.0; - // init L - updateL(x, par); -} -void paramUpdate(const Vector& x, BCDSolverParam& par) -{ - par.m_paramAutoPrev = par.m_paramAuto; - const auto& tpar0 = par.m_paramAutoPrev; - const auto nblock = par.m_blockCtrl.n_block(); - auto& tpar = par.m_paramAuto; - - // update t - for(int i = 0; i < nblock; ++i) - { - auto& ti = tpar[i].t; - ti = 0.5*sqrt( 1.0 + 4.0*ti*ti ); - } - // update L - updateL(x, par); - // update w - if(par.kIter > 0) - { - for(int i = 0; i < nblock; ++i) - { - Float wt1 = (tpar0[i].t - 1.0) / tpar0[i].t; - Float wt2 = sqrt(tpar0[i].L / tpar[i].L); - tpar[i].w = min(wt1, wt2); - } - } - -} - -void solve_subblock(Vector& x, BCDSolverParam& par, int kblock) -{ - const auto& tpar = par.m_paramAuto; - - Vector xi = par.m_blockCtrl.get_vec_block(x, kblock); - if(par.kIter > 1) - { - Vector xi0 = par.m_blockCtrl.get_vec_block(par.xprev, kblock); - xi = xi + tpar[kblock].w*(xi - xi0); - par.m_blockCtrl.set_vec_block(x, kblock, xi); - } - - // shrinkage for each element - Vector g = par.f_grad(x); - Vector gi = par.m_blockCtrl.get_vec_block(g, kblock); - int dim = gi.size(); - Float tL = 1.0 / tpar[kblock].L; - Vector tgi = xi - tL * gi; - for(int i = 0; i < dim; ++i) - { - xi[i] = shrinkage(tgi[i], tL); - } - par.m_blockCtrl.set_vec_block(x, kblock, xi); -} -Float errorEst(const Vector& x, const BCDSolverParam& par) -{ - return (x - par.xprev).norm(); -} - +#include "BCDImpl.hpp" #endif // BCDSOLVER diff --git a/include/Solvers/BCDImpl.hpp b/include/Solvers/BCDImpl.hpp new file mode 100644 index 0000000..288b8e9 --- /dev/null +++ b/include/Solvers/BCDImpl.hpp @@ -0,0 +1,395 @@ + +#ifndef BCDSOLVERIMPL +#define BCDSOLVERIMPL + + +/** + Implementation of BCD solver + +*/ + + +#include "BCD.hpp" + + + +/// optimization module functions +Float bcdObjective(const Vector& x, const BCDSolverParam& par); +Float bcdOneIteration(Vector& x, BCDSolverParam& par); +bool bcdTerminate(const Vector& x, const BCDSolverOption& o, const BCDSolverParam& par); +void bcdSolverInit(Vector& x, BCDSolverParam& par); + +/// tool functions +// vector slicing +inline void getVectorBlock(const Vector& x0, int id1, int id2, Vector& x); +inline void setVectorBlock(Vector& x0, int id1, int id2, const Vector& x); +inline Vector getVectorBlock(const Vector& x0, int id1, int id2); +// optimization related +inline Float shrinkage(Float z, Float tau); +void updateL(const Vector& x, BCDSolverParam& par); +void paramInit(const Vector& x, BCDSolverParam& par); +void paramUpdate(const Vector& x, BCDSolverParam& par); +void solveSubblock(Vector& x, BCDSolverParam& par, int kblock); +Float errorEst(const Vector& x, const BCDSolverParam& par); + + + + +////////////////////////////////////////////////////////////////////// + + +/// bcd solver +BCDSolver::BCDSolver(const BCDSolverParam& bcd_par, + const BCDSolverOption& bcd_opt) + : tagBCDSolver( + bcd_par, + bcd_opt, + bcdObjective, + bcdSolverInit, + bcdOneIteration, + bcdTerminate) +{} + + +/// +/// optimization modules +/// +Float bcdObjective(const Vector& x, const BCDSolverParam& par) +{ + Float obj = 0; + // r part + for(int i = 0; i < par.__block_ctrl.nBlock(); ++i) + obj += par.rEval(x, i); + // f part + obj += par.fEval(x); + return obj; +} + +Float bcdOneIteration(Vector& x, BCDSolverParam& par) +{ + par.__x_prev = x; + par.__obj_prev = par.__obj; + // update x block-wise + auto nblock = par.__block_ctrl.nBlock(); + for(int kb = 0; kb < nblock; ++kb) + { + solveSubblock(x, par, kb); + } + + // + ++par.__k_iter; + paramUpdate(x, par); + par.__obj = bcdObjective(x, par); + + return errorEst(x, par); +} + +bool bcdTerminate(const Vector& x, const BCDSolverOption& o, const BCDSolverParam& par) +{ + // iter number + if(par.__k_iter > o.MaxIter) + { + return true; + } + + // x change + Float e_x = (x - par.__x_prev).norm(); + if(e_x < o.XTol) + { + return true; + } + + // obj change + // ... + + // gradient norm, step length, relative error ... + + + + return false; +} + +void bcdSolverInit(Vector& x, BCDSolverParam& par) +{ + int fd = par.__block_ctrl.fullDimension(); + if (fd > 0) + { + x.resize(fd); + x.setZeros(); + par.__obj = bcdObjective(x, par); + + // + Float incr = 0.1*numeric_limits::max(); + par.__obj_prev = par.__obj + incr; + par.__x_prev = x; + par.__x_prev[0] = incr; + paramInit(x, par); + } + else + { + cerr << "Invalid problem, please check param setting!" << endl; + } + +} + + +/// +/// BlockCtrl +/// +void BlockCtrl::setBlocking(const vector &blocking) +{ + __block = blocking; + __nblock = __block.size(); + __blockIdx.resize(__nblock+1); + __blockIdx[0] = 0; + for(int i = 0 ; i < __nblock; ++i) + { + __blockIdx[i+1] = __block[i]; + __blockIdx[i+1] += __blockIdx[i]; + } + __is_set = true; +} + +int BlockCtrl::nBlock()const +{ + return __nblock; +} + +int BlockCtrl::blockDimension(int i)const +{ + return __block[i]; +} + +int BlockCtrl::fullDimension()const +{ + int dim = 0; + for(const auto& t:__block) dim += t; + return dim; +} + +void BlockCtrl::blockIdexInterval(int i, int& a, int& b)const +{ + a = __blockIdx[i]; + b = __blockIdx[i+1]; +} + +Vector BlockCtrl::getVectorBlock(const Vector& x0, int kblock)const +{ + assert(__is_set); + int ia, ib; + blockIdexInterval(kblock, ia, ib); + return ::getVectorBlock(x0, ia, ib); +} + +void BlockCtrl::setVectorBlock(Vector& x0, int kblock, const Vector& x)const +{ + assert(__is_set); + int ia, ib; + blockIdexInterval(kblock, ia, ib); + ::setVectorBlock(x0, ia, ib, x); +} + + +/// +/// r info +/// +FuncRInfo::FuncRInfo(RType tp, int dm) + : type(tp) + , dim(dm) +{} + + +/// +/// solver parameters +/// +BCDSolverParam::BCDSolverParam() + : BasicParameter() + , __verbose(0) + , __tau(1.0) + , __k_iter(0) +{} + +BCDSolverParam::BCDSolverParam( + const function& fEval, + const function& fGrad, + const vector& ri, + Float tau + ) + : BasicParameter() + , __verbose(0) + , __tau(1.0) + , __k_iter(0) +{ + setF(fEval, fGrad); + setR(ri); + setTau(tau); +} + +Float BCDSolverParam::riEval(const FuncRInfo& r, const Vector& x)const +{ + switch(r.type) + { + case FuncRInfo::RType::Norm1: + return x.absNorm(); + break; + case FuncRInfo::RType::Indicator: + return 0; + break; + } +} + +void BCDSolverParam::setF( + const function& fEval, + const function& fGrad) +{ + this->__fEval = fEval; + this->__fGrad = fGrad; +} + +void BCDSolverParam::setR(const vector& ri) +{ + this->__ri = ri; + vector blocking(ri.size()); + for(int i = 0; i < ri.size(); ++i) + { + blocking[i] = ri[i].dim; + } + __block_ctrl.setBlocking(blocking); +} + +void BCDSolverParam::setTau(Float tau) +{ + this->__tau = tau; +} + +inline Float BCDSolverParam::fEval(const Vector& x)const +{ + return __fEval(x); +} + +inline Vector BCDSolverParam::fGrad(const Vector& x)const +{ + return __fGrad(x); +} + +inline Float BCDSolverParam::rEval(const Vector& x, int kblock)const +{ + return riEval(__ri[kblock], __block_ctrl.getVectorBlock(x,kblock)); +} + + +/// +/// tool functions +/// +inline void getVectorBlock(const Vector& x0, int id1, int id2, Vector& x) +{ + x.resize(id2-id1); + for(int i = id1, k = 0; i < id2; ++i, ++k) + x[k] = x0[i]; +} + +inline void setVectorBlock(Vector& x0, int id1, int id2, const Vector& x) +{ + for(int i = id1, k = 0; i < id2; ++i, ++k) + x0[i] = x[k]; +} + +inline Vector getVectorBlock(const Vector& x0, int id1, int id2) +{ + Vector x(id2-id1); + for(int i = id1, k = 0; i < id2; ++i, ++k) + x[k] = x0[i]; + return x; +} + + +inline Float shrinkage(Float z, Float tau) +{ + Float t = 1.0 - tau / fabs(z); + return t > 0.0 ? t * z : 0.0; +} + +void updateL(const Vector& x, BCDSolverParam& par) +{ + auto& tpar = par.__param_auto; + int nblock = par.__block_ctrl.nBlock(); + Vector g = par.fGrad(x); + for(int i = 0; i < nblock; ++i) + { + Vector gi = par.__block_ctrl.getVectorBlock(g, i); + Matrix ggt = gi.mulTrans(gi); + Float maxe = ggt.frobeniusNorm(); + tpar[i].L = maxe; + } +} +void paramInit(const Vector& x, BCDSolverParam& par) +{ + int nblock = par.__block_ctrl.nBlock(); + par.__param_auto.resize(nblock); + // init t + for(int i = 0;i < nblock; ++i) + par.__param_auto[i].t = 1.0; + // init L + updateL(x, par); +} +void paramUpdate(const Vector& x, BCDSolverParam& par) +{ + par.__param_auto_prev = par.__param_auto; + const auto& tpar0 = par.__param_auto_prev; + const auto nblock = par.__block_ctrl.nBlock(); + auto& tpar = par.__param_auto; + + // update t + for(int i = 0; i < nblock; ++i) + { + auto& ti = tpar[i].t; + ti = 0.5*sqrt( 1.0 + 4.0*ti*ti ); + } + // update L + updateL(x, par); + // update w + if(par.__k_iter > 0) + { + for(int i = 0; i < nblock; ++i) + { + Float wt1 = (tpar0[i].t - 1.0) / tpar0[i].t; + Float wt2 = sqrt(tpar0[i].L / tpar[i].L); + tpar[i].w = min(wt1, wt2); + } + } + +} + +void solveSubblock(Vector& x, BCDSolverParam& par, int kblock) +{ + const auto& tpar = par.__param_auto; + + Vector xi = par.__block_ctrl.getVectorBlock(x, kblock); + if(par.__k_iter > 1) + { + Vector xi0 = par.__block_ctrl.getVectorBlock(par.__x_prev, kblock); + xi = xi + tpar[kblock].w*(xi - xi0); + par.__block_ctrl.setVectorBlock(x, kblock, xi); + } + + // shrinkage for each element + Vector g = par.fGrad(x); + Vector gi = par.__block_ctrl.getVectorBlock(g, kblock); + int dim = gi.size(); + Float tL = 1.0 / tpar[kblock].L; + Vector tgi = xi - tL * gi; + for(int i = 0; i < dim; ++i) + { + xi[i] = shrinkage(tgi[i], tL); + } + par.__block_ctrl.setVectorBlock(x, kblock, xi); +} +Float errorEst(const Vector& x, const BCDSolverParam& par) +{ + return (x - par.__x_prev).norm(); +} + + + + +#endif