// -*- coding: utf-8
// vim: set fileencoding=utf-8

// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Thomas Capricelli <orzel@freehackers.org>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef EIGEN_HYBRIDNONLINEARSOLVER_H
#define EIGEN_HYBRIDNONLINEARSOLVER_H

// IWYU pragma: private
#include "./InternalHeaderCheck.h"

namespace Eigen {

namespace HybridNonLinearSolverSpace {
enum Status {
  Running = -1,
  ImproperInputParameters = 0,
  RelativeErrorTooSmall = 1,
  TooManyFunctionEvaluation = 2,
  TolTooSmall = 3,
  NotMakingProgressJacobian = 4,
  NotMakingProgressIterations = 5,
  UserAsked = 6
};
}

/**
 * \ingroup NonLinearOptimization_Module
 * \brief Finds a zero of a system of n
 * nonlinear functions in n variables by a modification of the Powell
 * hybrid method ("dogleg").
 *
 * The user must provide a subroutine which calculates the
 * functions. The Jacobian is either provided by the user, or approximated
 * using a forward-difference method.
 *
 */
template <typename FunctorType, typename Scalar = double>
class HybridNonLinearSolver {
 public:
  typedef DenseIndex Index;

  HybridNonLinearSolver(FunctorType &_functor) : functor(_functor) {
    nfev = njev = iter = 0;
    fnorm = 0.;
    useExternalScaling = false;
  }

  struct Parameters {
    Parameters()
        : factor(Scalar(100.)),
          maxfev(1000),
          xtol(numext::sqrt(NumTraits<Scalar>::epsilon())),
          nb_of_subdiagonals(-1),
          nb_of_superdiagonals(-1),
          epsfcn(Scalar(0.)) {}
    Scalar factor;
    Index maxfev;  // maximum number of function evaluation
    Scalar xtol;
    Index nb_of_subdiagonals;
    Index nb_of_superdiagonals;
    Scalar epsfcn;
  };
  typedef Matrix<Scalar, Dynamic, 1> FVectorType;
  typedef Matrix<Scalar, Dynamic, Dynamic> JacobianType;
  /* TODO: if eigen provides a triangular storage, use it here */
  typedef Matrix<Scalar, Dynamic, Dynamic> UpperTriangularType;

  HybridNonLinearSolverSpace::Status hybrj1(FVectorType &x,
                                            const Scalar tol = numext::sqrt(NumTraits<Scalar>::epsilon()));

  HybridNonLinearSolverSpace::Status solveInit(FVectorType &x);
  HybridNonLinearSolverSpace::Status solveOneStep(FVectorType &x);
  HybridNonLinearSolverSpace::Status solve(FVectorType &x);

  HybridNonLinearSolverSpace::Status hybrd1(FVectorType &x,
                                            const Scalar tol = numext::sqrt(NumTraits<Scalar>::epsilon()));

  HybridNonLinearSolverSpace::Status solveNumericalDiffInit(FVectorType &x);
  HybridNonLinearSolverSpace::Status solveNumericalDiffOneStep(FVectorType &x);
  HybridNonLinearSolverSpace::Status solveNumericalDiff(FVectorType &x);

  void resetParameters(void) { parameters = Parameters(); }
  Parameters parameters;
  FVectorType fvec, qtf, diag;
  JacobianType fjac;
  UpperTriangularType R;
  Index nfev;
  Index njev;
  Index iter;
  Scalar fnorm;
  bool useExternalScaling;

 private:
  FunctorType &functor;
  Index n;
  Scalar sum;
  bool sing;
  Scalar temp;
  Scalar delta;
  bool jeval;
  Index ncsuc;
  Scalar ratio;
  Scalar pnorm, xnorm, fnorm1;
  Index nslow1, nslow2;
  Index ncfail;
  Scalar actred, prered;
  FVectorType wa1, wa2, wa3, wa4;

  HybridNonLinearSolver &operator=(const HybridNonLinearSolver &);
};

template <typename FunctorType, typename Scalar>
HybridNonLinearSolverSpace::Status HybridNonLinearSolver<FunctorType, Scalar>::hybrj1(FVectorType &x,
                                                                                      const Scalar tol) {
  n = x.size();

  /* check the input parameters for errors. */
  if (n <= 0 || tol < 0.) return HybridNonLinearSolverSpace::ImproperInputParameters;

  resetParameters();
  parameters.maxfev = 100 * (n + 1);
  parameters.xtol = tol;
  diag.setConstant(n, 1.);
  useExternalScaling = true;
  return solve(x);
}

template <typename FunctorType, typename Scalar>
HybridNonLinearSolverSpace::Status HybridNonLinearSolver<FunctorType, Scalar>::solveInit(FVectorType &x) {
  n = x.size();

  wa1.resize(n);
  wa2.resize(n);
  wa3.resize(n);
  wa4.resize(n);
  fvec.resize(n);
  qtf.resize(n);
  fjac.resize(n, n);
  if (!useExternalScaling) diag.resize(n);
  eigen_assert((!useExternalScaling || diag.size() == n) &&
               "When useExternalScaling is set, the caller must provide a valid 'diag'");

  /* Function Body */
  nfev = 0;
  njev = 0;

  /*     check the input parameters for errors. */
  if (n <= 0 || parameters.xtol < 0. || parameters.maxfev <= 0 || parameters.factor <= 0.)
    return HybridNonLinearSolverSpace::ImproperInputParameters;
  if (useExternalScaling)
    for (Index j = 0; j < n; ++j)
      if (diag[j] <= 0.) return HybridNonLinearSolverSpace::ImproperInputParameters;

  /*     evaluate the function at the starting point */
  /*     and calculate its norm. */
  nfev = 1;
  if (functor(x, fvec) < 0) return HybridNonLinearSolverSpace::UserAsked;
  fnorm = fvec.stableNorm();

  /*     initialize iteration counter and monitors. */
  iter = 1;
  ncsuc = 0;
  ncfail = 0;
  nslow1 = 0;
  nslow2 = 0;

  return HybridNonLinearSolverSpace::Running;
}

template <typename FunctorType, typename Scalar>
HybridNonLinearSolverSpace::Status HybridNonLinearSolver<FunctorType, Scalar>::solveOneStep(FVectorType &x) {
  using std::abs;

  eigen_assert(x.size() == n);  // check the caller is not cheating us

  Index j;
  std::vector<JacobiRotation<Scalar> > v_givens(n), w_givens(n);

  jeval = true;

  /* calculate the jacobian matrix. */
  if (functor.df(x, fjac) < 0) return HybridNonLinearSolverSpace::UserAsked;
  ++njev;

  wa2 = fjac.colwise().blueNorm();

  /* on the first iteration and if external scaling is not used, scale according */
  /* to the norms of the columns of the initial jacobian. */
  if (iter == 1) {
    if (!useExternalScaling)
      for (j = 0; j < n; ++j) diag[j] = (wa2[j] == 0.) ? 1. : wa2[j];

    /* on the first iteration, calculate the norm of the scaled x */
    /* and initialize the step bound delta. */
    xnorm = diag.cwiseProduct(x).stableNorm();
    delta = parameters.factor * xnorm;
    if (delta == 0.) delta = parameters.factor;
  }

  /* compute the qr factorization of the jacobian. */
  HouseholderQR<JacobianType> qrfac(fjac);  // no pivoting:

  /* copy the triangular factor of the qr factorization into r. */
  R = qrfac.matrixQR();

  /* accumulate the orthogonal factor in fjac. */
  fjac = qrfac.householderQ();

  /* form (q transpose)*fvec and store in qtf. */
  qtf = fjac.transpose() * fvec;

  /* rescale if necessary. */
  if (!useExternalScaling) diag = diag.cwiseMax(wa2);

  while (true) {
    /* determine the direction p. */
    internal::dogleg<Scalar>(R, diag, qtf, delta, wa1);

    /* store the direction p and x + p. calculate the norm of p. */
    wa1 = -wa1;
    wa2 = x + wa1;
    pnorm = diag.cwiseProduct(wa1).stableNorm();

    /* on the first iteration, adjust the initial step bound. */
    if (iter == 1) delta = (std::min)(delta, pnorm);

    /* evaluate the function at x + p and calculate its norm. */
    if (functor(wa2, wa4) < 0) return HybridNonLinearSolverSpace::UserAsked;
    ++nfev;
    fnorm1 = wa4.stableNorm();

    /* compute the scaled actual reduction. */
    actred = -1.;
    if (fnorm1 < fnorm) /* Computing 2nd power */
      actred = 1. - numext::abs2(fnorm1 / fnorm);

    /* compute the scaled predicted reduction. */
    wa3 = R.template triangularView<Upper>() * wa1 + qtf;
    temp = wa3.stableNorm();
    prered = 0.;
    if (temp < fnorm) /* Computing 2nd power */
      prered = 1. - numext::abs2(temp / fnorm);

    /* compute the ratio of the actual to the predicted reduction. */
    ratio = 0.;
    if (prered > 0.) ratio = actred / prered;

    /* update the step bound. */
    if (ratio < Scalar(.1)) {
      ncsuc = 0;
      ++ncfail;
      delta = Scalar(.5) * delta;
    } else {
      ncfail = 0;
      ++ncsuc;
      if (ratio >= Scalar(.5) || ncsuc > 1) delta = (std::max)(delta, pnorm / Scalar(.5));
      if (abs(ratio - 1.) <= Scalar(.1)) {
        delta = pnorm / Scalar(.5);
      }
    }

    /* test for successful iteration. */
    if (ratio >= Scalar(1e-4)) {
      /* successful iteration. update x, fvec, and their norms. */
      x = wa2;
      wa2 = diag.cwiseProduct(x);
      fvec = wa4;
      xnorm = wa2.stableNorm();
      fnorm = fnorm1;
      ++iter;
    }

    /* determine the progress of the iteration. */
    ++nslow1;
    if (actred >= Scalar(.001)) nslow1 = 0;
    if (jeval) ++nslow2;
    if (actred >= Scalar(.1)) nslow2 = 0;

    /* test for convergence. */
    if (delta <= parameters.xtol * xnorm || fnorm == 0.) return HybridNonLinearSolverSpace::RelativeErrorTooSmall;

    /* tests for termination and stringent tolerances. */
    if (nfev >= parameters.maxfev) return HybridNonLinearSolverSpace::TooManyFunctionEvaluation;
    if (Scalar(.1) * (std::max)(Scalar(.1) * delta, pnorm) <= NumTraits<Scalar>::epsilon() * xnorm)
      return HybridNonLinearSolverSpace::TolTooSmall;
    if (nslow2 == 5) return HybridNonLinearSolverSpace::NotMakingProgressJacobian;
    if (nslow1 == 10) return HybridNonLinearSolverSpace::NotMakingProgressIterations;

    /* criterion for recalculating jacobian. */
    if (ncfail == 2) break;  // leave inner loop and go for the next outer loop iteration

    /* calculate the rank one modification to the jacobian */
    /* and update qtf if necessary. */
    wa1 = diag.cwiseProduct(diag.cwiseProduct(wa1) / pnorm);
    wa2 = fjac.transpose() * wa4;
    if (ratio >= Scalar(1e-4)) qtf = wa2;
    wa2 = (wa2 - wa3) / pnorm;

    /* compute the qr factorization of the updated jacobian. */
    internal::r1updt<Scalar>(R, wa1, v_givens, w_givens, wa2, wa3, &sing);
    internal::r1mpyq<Scalar>(n, n, fjac.data(), v_givens, w_givens);
    internal::r1mpyq<Scalar>(1, n, qtf.data(), v_givens, w_givens);

    jeval = false;
  }
  return HybridNonLinearSolverSpace::Running;
}

template <typename FunctorType, typename Scalar>
HybridNonLinearSolverSpace::Status HybridNonLinearSolver<FunctorType, Scalar>::solve(FVectorType &x) {
  HybridNonLinearSolverSpace::Status status = solveInit(x);
  if (status == HybridNonLinearSolverSpace::ImproperInputParameters) return status;
  while (status == HybridNonLinearSolverSpace::Running) status = solveOneStep(x);
  return status;
}

template <typename FunctorType, typename Scalar>
HybridNonLinearSolverSpace::Status HybridNonLinearSolver<FunctorType, Scalar>::hybrd1(FVectorType &x,
                                                                                      const Scalar tol) {
  n = x.size();

  /* check the input parameters for errors. */
  if (n <= 0 || tol < 0.) return HybridNonLinearSolverSpace::ImproperInputParameters;

  resetParameters();
  parameters.maxfev = 200 * (n + 1);
  parameters.xtol = tol;

  diag.setConstant(n, 1.);
  useExternalScaling = true;
  return solveNumericalDiff(x);
}

template <typename FunctorType, typename Scalar>
HybridNonLinearSolverSpace::Status HybridNonLinearSolver<FunctorType, Scalar>::solveNumericalDiffInit(FVectorType &x) {
  n = x.size();

  if (parameters.nb_of_subdiagonals < 0) parameters.nb_of_subdiagonals = n - 1;
  if (parameters.nb_of_superdiagonals < 0) parameters.nb_of_superdiagonals = n - 1;

  wa1.resize(n);
  wa2.resize(n);
  wa3.resize(n);
  wa4.resize(n);
  qtf.resize(n);
  fjac.resize(n, n);
  fvec.resize(n);
  if (!useExternalScaling) diag.resize(n);
  eigen_assert((!useExternalScaling || diag.size() == n) &&
               "When useExternalScaling is set, the caller must provide a valid 'diag'");

  /* Function Body */
  nfev = 0;
  njev = 0;

  /*     check the input parameters for errors. */
  if (n <= 0 || parameters.xtol < 0. || parameters.maxfev <= 0 || parameters.nb_of_subdiagonals < 0 ||
      parameters.nb_of_superdiagonals < 0 || parameters.factor <= 0.)
    return HybridNonLinearSolverSpace::ImproperInputParameters;
  if (useExternalScaling)
    for (Index j = 0; j < n; ++j)
      if (diag[j] <= 0.) return HybridNonLinearSolverSpace::ImproperInputParameters;

  /*     evaluate the function at the starting point */
  /*     and calculate its norm. */
  nfev = 1;
  if (functor(x, fvec) < 0) return HybridNonLinearSolverSpace::UserAsked;
  fnorm = fvec.stableNorm();

  /*     initialize iteration counter and monitors. */
  iter = 1;
  ncsuc = 0;
  ncfail = 0;
  nslow1 = 0;
  nslow2 = 0;

  return HybridNonLinearSolverSpace::Running;
}

template <typename FunctorType, typename Scalar>
HybridNonLinearSolverSpace::Status HybridNonLinearSolver<FunctorType, Scalar>::solveNumericalDiffOneStep(
    FVectorType &x) {
  using std::abs;
  using std::sqrt;

  eigen_assert(x.size() == n);  // check the caller is not cheating us

  Index j;
  std::vector<JacobiRotation<Scalar> > v_givens(n), w_givens(n);

  jeval = true;
  if (parameters.nb_of_subdiagonals < 0) parameters.nb_of_subdiagonals = n - 1;
  if (parameters.nb_of_superdiagonals < 0) parameters.nb_of_superdiagonals = n - 1;

  /* calculate the jacobian matrix. */
  if (internal::fdjac1(functor, x, fvec, fjac, parameters.nb_of_subdiagonals, parameters.nb_of_superdiagonals,
                       parameters.epsfcn) < 0)
    return HybridNonLinearSolverSpace::UserAsked;
  nfev += (std::min)(parameters.nb_of_subdiagonals + parameters.nb_of_superdiagonals + 1, n);

  wa2 = fjac.colwise().blueNorm();

  /* on the first iteration and if external scaling is not used, scale according */
  /* to the norms of the columns of the initial jacobian. */
  if (iter == 1) {
    if (!useExternalScaling)
      for (j = 0; j < n; ++j) diag[j] = (wa2[j] == 0.) ? 1. : wa2[j];

    /* on the first iteration, calculate the norm of the scaled x */
    /* and initialize the step bound delta. */
    xnorm = diag.cwiseProduct(x).stableNorm();
    delta = parameters.factor * xnorm;
    if (delta == 0.) delta = parameters.factor;
  }

  /* compute the qr factorization of the jacobian. */
  HouseholderQR<JacobianType> qrfac(fjac);  // no pivoting:

  /* copy the triangular factor of the qr factorization into r. */
  R = qrfac.matrixQR();

  /* accumulate the orthogonal factor in fjac. */
  fjac = qrfac.householderQ();

  /* form (q transpose)*fvec and store in qtf. */
  qtf = fjac.transpose() * fvec;

  /* rescale if necessary. */
  if (!useExternalScaling) diag = diag.cwiseMax(wa2);

  while (true) {
    /* determine the direction p. */
    internal::dogleg<Scalar>(R, diag, qtf, delta, wa1);

    /* store the direction p and x + p. calculate the norm of p. */
    wa1 = -wa1;
    wa2 = x + wa1;
    pnorm = diag.cwiseProduct(wa1).stableNorm();

    /* on the first iteration, adjust the initial step bound. */
    if (iter == 1) delta = (std::min)(delta, pnorm);

    /* evaluate the function at x + p and calculate its norm. */
    if (functor(wa2, wa4) < 0) return HybridNonLinearSolverSpace::UserAsked;
    ++nfev;
    fnorm1 = wa4.stableNorm();

    /* compute the scaled actual reduction. */
    actred = -1.;
    if (fnorm1 < fnorm) /* Computing 2nd power */
      actred = 1. - numext::abs2(fnorm1 / fnorm);

    /* compute the scaled predicted reduction. */
    wa3 = R.template triangularView<Upper>() * wa1 + qtf;
    temp = wa3.stableNorm();
    prered = 0.;
    if (temp < fnorm) /* Computing 2nd power */
      prered = 1. - numext::abs2(temp / fnorm);

    /* compute the ratio of the actual to the predicted reduction. */
    ratio = 0.;
    if (prered > 0.) ratio = actred / prered;

    /* update the step bound. */
    if (ratio < Scalar(.1)) {
      ncsuc = 0;
      ++ncfail;
      delta = Scalar(.5) * delta;
    } else {
      ncfail = 0;
      ++ncsuc;
      if (ratio >= Scalar(.5) || ncsuc > 1) delta = (std::max)(delta, pnorm / Scalar(.5));
      if (abs(ratio - 1.) <= Scalar(.1)) {
        delta = pnorm / Scalar(.5);
      }
    }

    /* test for successful iteration. */
    if (ratio >= Scalar(1e-4)) {
      /* successful iteration. update x, fvec, and their norms. */
      x = wa2;
      wa2 = diag.cwiseProduct(x);
      fvec = wa4;
      xnorm = wa2.stableNorm();
      fnorm = fnorm1;
      ++iter;
    }

    /* determine the progress of the iteration. */
    ++nslow1;
    if (actred >= Scalar(.001)) nslow1 = 0;
    if (jeval) ++nslow2;
    if (actred >= Scalar(.1)) nslow2 = 0;

    /* test for convergence. */
    if (delta <= parameters.xtol * xnorm || fnorm == 0.) return HybridNonLinearSolverSpace::RelativeErrorTooSmall;

    /* tests for termination and stringent tolerances. */
    if (nfev >= parameters.maxfev) return HybridNonLinearSolverSpace::TooManyFunctionEvaluation;
    if (Scalar(.1) * (std::max)(Scalar(.1) * delta, pnorm) <= NumTraits<Scalar>::epsilon() * xnorm)
      return HybridNonLinearSolverSpace::TolTooSmall;
    if (nslow2 == 5) return HybridNonLinearSolverSpace::NotMakingProgressJacobian;
    if (nslow1 == 10) return HybridNonLinearSolverSpace::NotMakingProgressIterations;

    /* criterion for recalculating jacobian. */
    if (ncfail == 2) break;  // leave inner loop and go for the next outer loop iteration

    /* calculate the rank one modification to the jacobian */
    /* and update qtf if necessary. */
    wa1 = diag.cwiseProduct(diag.cwiseProduct(wa1) / pnorm);
    wa2 = fjac.transpose() * wa4;
    if (ratio >= Scalar(1e-4)) qtf = wa2;
    wa2 = (wa2 - wa3) / pnorm;

    /* compute the qr factorization of the updated jacobian. */
    internal::r1updt<Scalar>(R, wa1, v_givens, w_givens, wa2, wa3, &sing);
    internal::r1mpyq<Scalar>(n, n, fjac.data(), v_givens, w_givens);
    internal::r1mpyq<Scalar>(1, n, qtf.data(), v_givens, w_givens);

    jeval = false;
  }
  return HybridNonLinearSolverSpace::Running;
}

template <typename FunctorType, typename Scalar>
HybridNonLinearSolverSpace::Status HybridNonLinearSolver<FunctorType, Scalar>::solveNumericalDiff(FVectorType &x) {
  HybridNonLinearSolverSpace::Status status = solveNumericalDiffInit(x);
  if (status == HybridNonLinearSolverSpace::ImproperInputParameters) return status;
  while (status == HybridNonLinearSolverSpace::Running) status = solveNumericalDiffOneStep(x);
  return status;
}

}  // end namespace Eigen

#endif  // EIGEN_HYBRIDNONLINEARSOLVER_H

// vim: ai ts=4 sts=4 et sw=4
