LCOV - code coverage report
Current view: top level - elsa/line_search - BarzilaiBorwein.h (source / functions) Hit Total Coverage
Test: coverage-all.lcov Lines: 1 1 100.0 %
Date: 2025-01-22 07:37:33 Functions: 2 2 100.0 %

          Line data    Source code
       1             : #pragma once
       2             : 
       3             : #include "LineSearchMethod.h"
       4             : #include <vector>
       5             : 
       6             : namespace elsa
       7             : {
       8             :     /**
       9             :      * @brief Barzilai-Borwein
      10             :      *
      11             :      * BarzilaiBorwein is an adaptive stepping method based on the solution to a two-point
      12             :      * approximation of the secant equation to find a step size \alpha. The secant
      13             :      * equation:
      14             :      *
      15             :      * @f[
      16             :      * \nabla^2 f_{i+1} s_i = y_i
      17             :      * @f]
      18             :      *
      19             :      * where @f$\nabla^2 f_{i+1}: \mathbb{R}^{n \times n}@f$ the hessian of the
      20             :      * objective function @f$f@f$ at @f$x_i@f$,
      21             :      * @f$s_i: \mathbb{R}^n is x_{i+1} - x_i@f$ and,
      22             :      * @f$y_i: \mathbb{R}^n is \nabla f_{i+1} - \nabla f_i @f$
      23             :      *
      24             :      *
      25             :      * Given the iteration @f$ x_{i+1} = x_i - \alpha \nabla f_i @f$ and the condition
      26             :      * @f$ \alpha = argmin_{\alpha}  ||s_i - \alpha y_i||^2 @f$ then we have the
      27             :      * BarzilaiBorwein step size:
      28             :      *
      29             :      * @f[
      30             :      * \alpha = \langle s_i, y_i \rangle / \langle y_i, y_i \rangle
      31             :      * @f]
      32             :      *
      33             :      * The biggest drawback of the original implementation of the BarzilaiBorwein is that the setps
      34             :      * do not guarantee a descent in the values of the objective function. Which means this method
      35             :      * generally does not guarantee convergence to a solution of the optimization problem. However,
      36             :      * in this implementation, which is based on the paper by Marcos Raydan (see references below),
      37             :      * we modify the original implementation such that it guarantees that the new value of the
      38             :      * objective function satisfies the condition:
      39             :      *
      40             :      * @f[
      41             :      * f(x_i - \alpha \nabla f_i) \le max_{0 \le j \le m} f_{i-j} - \gamma
      42             :      * \frac{1}{\alpha} \nabla f_i^T \nabla f_i
      43             :      * @f]
      44             :      *
      45             :      * Which means that the new value of the objective function f has to be less than or
      46             :      * equal to the largest value in the last m iterations.
      47             :      * Given m greater than zero, this method guarantees convergence to the soltuion of
      48             :      * the objective function.
      49             :      *
      50             :      * References:
      51             :      * - Barzilai, Jonathan, and Jonathan M. Borwein. "Two-point step size gradient methods."
      52             :      * IMA journal of numerical analysis 8.1 (1988): 141-148.
      53             :      * - Raydan, Marcos. "The Barzilai and Borwein gradient method for the large scale
      54             :      * unconstrained minimization problem." SIAM Journal on Optimization 7.1 (1997): 26-33.
      55             :      *
      56             :      * @author
      57             :      * - Said Alghabra - initial code
      58             :      *
      59             :      * @tparam data_t data type for the domain and range of the problem, defaulting to real_t
      60             :      */
      61             :     template <typename data_t = real_t>
      62             :     class BarzilaiBorwein : public LineSearchMethod<data_t>
      63             :     {
      64             :     public:
      65             :         BarzilaiBorwein(const Functional<data_t>& problem, uint32_t m = 10, data_t gamma = 1e-4,
      66             :                         data_t sigma1 = 0.1, data_t sigma2 = 0.5, data_t epsilon = 1e-10,
      67             :                         index_t max_iterations = 10);
      68           8 :         ~BarzilaiBorwein() override = default;
      69             :         data_t solve(DataContainer<data_t> xi, DataContainer<data_t> di) override;
      70             : 
      71             :         /// implement the polymorphic comparison operation
      72             :         bool isEqual(const LineSearchMethod<data_t>& other) const override;
      73             : 
      74             :     private:
      75             :         // the number of previous objective function values saved in memory
      76             :         uint32_t _m;
      77             : 
      78             :         // parameter affecting the sufficient decrease condition
      79             :         data_t _gamma;
      80             : 
      81             :         // factors that determine the next guess of the step size that satisfies the necessary
      82             :         // condition
      83             :         data_t _sigma1;
      84             :         data_t _sigma2;
      85             : 
      86             :         // the smallest step size allowed
      87             :         data_t _epsilon;
      88             : 
      89             :         // the largest step size allowed
      90             :         data_t _invepsilon;
      91             : 
      92             :         // the last step size
      93             :         data_t _li_prev;
      94             : 
      95             :         // norm 2 squared of the last search direction
      96             :         data_t _derphi_prev;
      97             : 
      98             :         // current iteration
      99             :         index_t _iter;
     100             : 
     101             :         // the last m objective function values
     102             :         std::vector<data_t> _function_vals;
     103             : 
     104             :         // search direction of the previous step
     105             :         DataContainer<data_t> _gi_prev;
     106             : 
     107             :         /// implement the polymorphic clone operation
     108             :         BarzilaiBorwein<data_t>* cloneImpl() const override;
     109             :     };
     110             : } // namespace elsa

Generated by: LCOV version 1.14