Line data Source code
1 : #pragma once 2 : 3 : #include "LineSearchMethod.h" 4 : #include <vector> 5 : 6 : namespace elsa 7 : { 8 : /** 9 : * @brief Barzilai-Borwein 10 : * 11 : * BarzilaiBorwein is an adaptive stepping method based on the solution to a two-point 12 : * approximation of the secant equation to find a step size \alpha. The secant 13 : * equation: 14 : * 15 : * @f[ 16 : * \nabla^2 f_{i+1} s_i = y_i 17 : * @f] 18 : * 19 : * where @f$\nabla^2 f_{i+1}: \mathbb{R}^{n \times n}@f$ the hessian of the 20 : * objective function @f$f@f$ at @f$x_i@f$, 21 : * @f$s_i: \mathbb{R}^n is x_{i+1} - x_i@f$ and, 22 : * @f$y_i: \mathbb{R}^n is \nabla f_{i+1} - \nabla f_i @f$ 23 : * 24 : * 25 : * Given the iteration @f$ x_{i+1} = x_i - \alpha \nabla f_i @f$ and the condition 26 : * @f$ \alpha = argmin_{\alpha} ||s_i - \alpha y_i||^2 @f$ then we have the 27 : * BarzilaiBorwein step size: 28 : * 29 : * @f[ 30 : * \alpha = \langle s_i, y_i \rangle / \langle y_i, y_i \rangle 31 : * @f] 32 : * 33 : * The biggest drawback of the original implementation of the BarzilaiBorwein is that the setps 34 : * do not guarantee a descent in the values of the objective function. Which means this method 35 : * generally does not guarantee convergence to a solution of the optimization problem. However, 36 : * in this implementation, which is based on the paper by Marcos Raydan (see references below), 37 : * we modify the original implementation such that it guarantees that the new value of the 38 : * objective function satisfies the condition: 39 : * 40 : * @f[ 41 : * f(x_i - \alpha \nabla f_i) \le max_{0 \le j \le m} f_{i-j} - \gamma 42 : * \frac{1}{\alpha} \nabla f_i^T \nabla f_i 43 : * @f] 44 : * 45 : * Which means that the new value of the objective function f has to be less than or 46 : * equal to the largest value in the last m iterations. 47 : * Given m greater than zero, this method guarantees convergence to the soltuion of 48 : * the objective function. 49 : * 50 : * References: 51 : * - Barzilai, Jonathan, and Jonathan M. Borwein. "Two-point step size gradient methods." 52 : * IMA journal of numerical analysis 8.1 (1988): 141-148. 53 : * - Raydan, Marcos. "The Barzilai and Borwein gradient method for the large scale 54 : * unconstrained minimization problem." SIAM Journal on Optimization 7.1 (1997): 26-33. 55 : * 56 : * @author 57 : * - Said Alghabra - initial code 58 : * 59 : * @tparam data_t data type for the domain and range of the problem, defaulting to real_t 60 : */ 61 : template <typename data_t = real_t> 62 : class BarzilaiBorwein : public LineSearchMethod<data_t> 63 : { 64 : public: 65 : BarzilaiBorwein(const Functional<data_t>& problem, uint32_t m = 10, data_t gamma = 1e-4, 66 : data_t sigma1 = 0.1, data_t sigma2 = 0.5, data_t epsilon = 1e-10, 67 : index_t max_iterations = 10); 68 8 : ~BarzilaiBorwein() override = default; 69 : data_t solve(DataContainer<data_t> xi, DataContainer<data_t> di) override; 70 : 71 : /// implement the polymorphic comparison operation 72 : bool isEqual(const LineSearchMethod<data_t>& other) const override; 73 : 74 : private: 75 : // the number of previous objective function values saved in memory 76 : uint32_t _m; 77 : 78 : // parameter affecting the sufficient decrease condition 79 : data_t _gamma; 80 : 81 : // factors that determine the next guess of the step size that satisfies the necessary 82 : // condition 83 : data_t _sigma1; 84 : data_t _sigma2; 85 : 86 : // the smallest step size allowed 87 : data_t _epsilon; 88 : 89 : // the largest step size allowed 90 : data_t _invepsilon; 91 : 92 : // the last step size 93 : data_t _li_prev; 94 : 95 : // norm 2 squared of the last search direction 96 : data_t _derphi_prev; 97 : 98 : // current iteration 99 : index_t _iter; 100 : 101 : // the last m objective function values 102 : std::vector<data_t> _function_vals; 103 : 104 : // search direction of the previous step 105 : DataContainer<data_t> _gi_prev; 106 : 107 : /// implement the polymorphic clone operation 108 : BarzilaiBorwein<data_t>* cloneImpl() const override; 109 : }; 110 : } // namespace elsa