Line data Source code
1 : #pragma once 2 : 3 : #include <optional> 4 : 5 : #include "Solver.h" 6 : #include "Functional.h" 7 : #include "LineSearchMethod.h" 8 : 9 : namespace elsa 10 : { 11 : /** 12 : * @brief L-BFGS solver 13 : * 14 : * Limited Memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) is a Limited-Memory quasi-Newton 15 : * iterative optimization algorithm for non-linear problems. L-BFGS minimizes an objective 16 : * function @f$f@f$ by first constructing an approximation of it as: 17 : * @f[ 18 : * m(d) = f(x_i) + \nablaf_i^T d + frac{1}{2}d^T H^{-1}_i d 19 : * @f] 20 : * 21 : * where @f$f: \mathbb{R}^n \to \mathbb{R}@f$ is differentiable, 22 : * @f$\nabla f_i: \mathbb{R}^n is the gradient of f at x_i@f$, 23 : * @f$ d: \mathbb{R}^n is a search direction @f$, 24 : * @f$ H^{-1}_i: \mathbb{R}^{n \times n} is an approximation of the Hessian \nabla^2 f(x_i) @f$ 25 : * 26 : * The search direction @f$d@f$ that minimizes the approximation @f$m(d)@f$ is 27 : * analytically found to be @f$d_i = H_i \nabla f_i@f$. 28 : * 29 : * Unlike BFGS, L-BFGS does not explicitly calculate the inverse Hessian @f$H@f$, 30 : * instead it approximates its effects by applying the following formula to find the 31 : * search direction: 32 : * 33 : * @f[ 34 : * H_{i} = (V_{i-1}^T ... V_{i-m}^T) H_0 (V_{i-m} ... V_{i-1}) 35 : * + \rho_{i-m} (V_{i-1}^T ... V_{i - m + 1}^T) s_{i-m} s_{i-m}^T 36 : * (V_{i-m+1} ... V_{i-1}) 37 : * + \rho_{i-m+1} (V_{i-1}^T ... V_{i - m + 2}^T) s_{i-m+1} s_{i-m+1}^T 38 : * (V_{i-m+2} ... V_{i-1}) 39 : * + ... 40 : * + \rho_{i-1} s_{i-1} s_{i-1}^T 41 : * @f] 42 : * 43 : * where: 44 : * @f$ H^_i: \mathbb{R}^{n \times n} is an approximation of the inverse Hessian 45 : * (\nabla^2f(x_i))^{-1}@f$ 46 : * @f$V_i: \mathbb{R}^{n \times \n} I - \rho_i y_i s_i^T@f$, 47 : * @f$ \rho_i: frac{1}{y_i^T s_i}@f$, 48 : * @f$ s_i: \mathbb{R}^{n} = x_{i+1} - x_i @f$, and 49 : * @f$ y_i: \mathbb{R}^{n} = \nabla f_{i+1} - \nabla f_i @f$ 50 : * 51 : * References: 52 : * - See Wright and Nocedal, ‘Numerical Optimization’, 2nd Edition, 2006, pp. 176-179. 53 : * 54 : * @author 55 : * - Said Alghabra - initial code 56 : * 57 : * @tparam data_t data type for the domain and range of the problem, defaulting to real_t 58 : */ 59 : template <typename data_t = real_t> 60 : class LBFGS : public Solver<data_t> 61 : { 62 : public: 63 : /// Scalar alias 64 : using Scalar = typename Solver<data_t>::Scalar; 65 : 66 : LBFGS(const Functional<data_t>& problem, const LineSearchMethod<data_t>& line_search_method, 67 : const index_t& memory = 10, const data_t& tol = 1e-4); 68 : 69 : /// make copy constructor deletion explicit 70 : LBFGS(const LBFGS<data_t>&) = delete; 71 : 72 : /// default destructor 73 10 : ~LBFGS() override = default; 74 : 75 : DataContainer<data_t> 76 : solve(index_t iterations, 77 : std::optional<DataContainer<data_t>> x0 = std::nullopt) override; 78 : 79 : private: 80 : /// the differentiable optimizaion problem 81 : std::unique_ptr<Functional<data_t>> _problem; 82 : 83 : /// the line search method 84 : // TODO: maybe change this to be only strong wolfe? 85 : std::unique_ptr<LineSearchMethod<data_t>> _ls; 86 : 87 : index_t _m; 88 : 89 : /// the step size 90 : data_t _tol; 91 : 92 : /// implement the polymorphic clone operation 93 : LBFGS<data_t>* cloneImpl() const override; 94 : 95 : /// implement the polymorphic comparison operation 96 : bool isEqual(const Solver<data_t>& other) const override; 97 : }; 98 : } // namespace elsa