module nonlin_least_squares use iso_fortran_env use nonlin_multi_eqn_mult_var use nonlin_error_handling use nonlin_types use nonlin_helper use nonlin_single_var, only : fcn1var_helper, fcn1var use nonlin_solve, only : brent_solver use ferror, only : errors use linalg, only : qr_factor, solve_qr use blas, only : dgemv, dgemm implicit none private public :: least_squares_solver public :: constrained_equation_solver public :: constrained_least_squares_solver ! ****************************************************************************** ! TYPES ! ------------------------------------------------------------------------------ type, extends(equation_solver) :: least_squares_solver !! Defines a Levenberg-Marquardt based solver for unconstrained !! least-squares problems. real(real64), private :: m_factor = 100.0d0 !! Initial step bounding factor contains procedure, public :: get_step_scaling_factor => lss_get_factor procedure, public :: set_step_scaling_factor => lss_set_factor procedure, public :: solve => lss_solve end type type, abstract, extends(least_squares_solver) :: constrained_equation_solver !! A least-squares solver that implements limits (constraints) on the !! solution variables. real(real64), private, allocatable, dimension(:) :: m_upper !! An upper set of parameter bounds. real(real64), private, allocatable, dimension(:) :: m_lower !! A lower set of parameter bounds. contains procedure, public :: get_upper_limits => ces_get_upper_bounds procedure, public :: set_upper_limits => ces_set_upper_bounds procedure, public :: get_lower_limits => ces_get_lower_bounds procedure, public :: set_lower_limits => ces_set_lower_bounds procedure, public :: apply_limits => ces_apply_limits end type type, extends(constrained_equation_solver) :: constrained_least_squares_solver !! Defines a constrained least-squares solver using Powell's trust !! region method. In the event the trust-region approach is slow to !! converge a backtracking type line search will be utilized. The !! solver also utilizes a Coleman-Li scaling approach that works to !! improve stability when the solution is near a constraint. !! !! The trust region approach assumes a radius of \(\Delta\), which can !! be initially defined by the user but is automatically altered by the !! solver, and is implemented as follows. !! !! Gauss-Newton Step (Solved via QR decomposition): !! $$ J \vec{p_{gn}} = -\vec{f} $$ !! !! The gradient vector for the steepest descent is calculated by !! utilizing the product of the Jacobian and the residual as follows. !! $$ \vec{g} = J \vec{f} $$ !! !! The steepest descent step is then as follows. !! $$ \vec{p_{sd}} = -\alpha \vec{g} $$ !! where \( \alpha = \frac{||\vec{g}||^{2}}{||J \vec{g}||^2} \). !! !! Finally, the dogleg is computed as follows. !! $$ \vec{p} = \vec{p_{sd}} + t \left( \vec{p_{gn}} - \vec{p_{sd}} !! \right) $$ !! where \(t\) is found such that \(|| \vec{p}|| = \Delta \). real(real64), private :: m_delta = 1.0d0 !! The damping parameter. real(real64), private :: m_scaling = 1.0d0 !! The initial line-search scaling parameter. contains procedure, public :: get_trust_region_radius => cls_get_radius procedure, public :: set_trust_region_radius => cls_set_radius procedure, public :: get_step_scaling_factor => cls_get_factor procedure, public :: set_step_scaling_factor => cls_set_factor procedure, public :: solve => cls_solve end type contains ! ****************************************************************************** ! LEAST_SQUARES_SOLVER MEMBERS ! ------------------------------------------------------------------------------ pure function lss_get_factor(this) result(x) !! Gets a factor used to scale the bounds on the initial step. !! !! This factor is used to set the bounds on the initial step such that !! the initial step is bounded as the product of the factor with the !! Euclidean norm of the vector resulting from multiplication of the !! diagonal scaling matrix and the solution estimate. If zero, the !! factor itself is used. class(least_squares_solver), intent(in) :: this !! The [[least_squares_solver]] object. real(real64) :: x !! The factor. x = this%m_factor end function ! -------------------- subroutine lss_set_factor(this, x) !! Sets a factor used to scale the bounds on the initial step. !! !! This factor is used to set the bounds on the initial step such that !! the initial step is bounded as the product of the factor with the !! Euclidean norm of the vector resulting from multiplication of the !! diagonal scaling matrix and the solution estimate. If zero, the !! factor itself is used. class(least_squares_solver), intent(inout) :: this !! The [[least_squares_solver]] object. real(real64), intent(in) :: x !! The factor. if (x < 0.1d0) then this%m_factor = 0.1d0 else if (x > 1.0d2) then this%m_factor = 1.0d2 else this%m_factor = x end if end subroutine ! ------------------------------------------------------------------------------ subroutine lss_solve(this, fcn, x, fvec, ib, args, err) !! Applies the Levenberg-Marquardt method to solve the nonlinear !! least-squares problem. This routines is based upon the MINPACK !! routine LMDIF. class(least_squares_solver), intent(inout) :: this !! The [[least_squares_solver]] object. class(vecfcn_helper), intent(in) :: fcn !! The [[vecfcn_helper]] object containing the equations to solve. real(real64), intent(inout), dimension(:) :: x !! On input, an N-element array containing an initial estimate !! to the solution. On output, the updated solution estimate. !! N is the number of variables. real(real64), intent(out), dimension(:) :: fvec !! An M-element array that, on output, will contain the values !! of each equation as evaluated at the variable values given !! in x. type(iteration_behavior), optional :: ib !! An optional output, that if provided, allows the caller to !! obtain iteration performance statistics. class(*), intent(inout), optional :: args !! An optional argument to allow the user to communicate with !! the routine. class(errors), intent(inout), optional, target :: err !! An error handling object. ! Parameters real(real64), parameter :: zero = 0.0d0 real(real64), parameter :: p0001 = 1.0d-4 real(real64), parameter :: p1 = 0.1d0 real(real64), parameter :: qtr = 0.25d0 real(real64), parameter :: half = 0.5d0 real(real64), parameter :: p75 = 0.75d0 real(real64), parameter :: one = 1.0d0 real(real64), parameter :: hndrd = 1.0d2 ! Local Variables logical :: xcnvrg, fcnvrg, gcnvrg integer(int32) :: i, neqn, nvar, flag, neval, iter, j, l, maxeval, & njac, lwork integer(int32), allocatable, dimension(:) :: jpvt real(real64) :: ftol, xtol, gtol, fac, eps, fnorm, par, xnorm, delta, & sm, temp, gnorm, pnorm, fnorm1, actred, temp1, temp2, prered, & dirder, ratio real(real64), allocatable, dimension(:,:) :: jac real(real64), allocatable, dimension(:) :: diag, qtf, wa1, wa2, wa3, wa4, w class(errors), pointer :: errmgr type(errors), target :: deferr character(len = 256) :: errmsg ! Initialization xcnvrg = .false. fcnvrg = .false. gcnvrg = .false. neqn = fcn%get_equation_count() nvar = fcn%get_variable_count() neval = 0 iter = 0 njac = 0 fac = this%m_factor ftol = this%get_fcn_tolerance() xtol = this%get_var_tolerance() gtol = this%get_gradient_tolerance() maxeval = this%get_max_fcn_evals() eps = epsilon(eps) if (present(ib)) then ib%iter_count = iter ib%fcn_count = neval ib%jacobian_count = njac ib%converge_on_fcn = fcnvrg ib%converge_on_chng = xcnvrg ib%converge_on_zero_diff = gcnvrg ib%gradient_count = 0 end if if (present(err)) then errmgr => err else errmgr => deferr end if ! Input Check if (.not.fcn%is_fcn_defined()) then ! ERROR: No function is defined call errmgr%report_error("lss_solve", & "No function has been defined.", & NL_INVALID_OPERATION_ERROR) return end if if (nvar > neqn) then ! ERROR: System is underdetermined call errmgr%report_error("lss_solve", "The solver cannot " // & "solve the underdetermined problem. The number of " // & "unknowns must not exceed the number of equations.", & NL_INVALID_INPUT_ERROR) return end if flag = 0 if (size(x) /= nvar) then flag = 3 else if (size(fvec) /= neqn) then flag = 4 end if if (flag /= 0) then ! One of the input arrays is not sized correctly write(errmsg, 100) "Input number ", flag, & " is not sized correctly." call errmgr%report_error("lss_solve", trim(errmsg), & NL_ARRAY_SIZE_ERROR) return end if ! Local Memory Allocation allocate(jpvt(nvar), stat = flag) if (flag == 0) allocate(jac(neqn, nvar), stat = flag) if (flag == 0) allocate(diag(nvar), stat = flag) if (flag == 0) allocate(qtf(nvar), stat = flag) if (flag == 0) allocate(wa1(nvar), stat = flag) if (flag == 0) allocate(wa2(nvar), stat = flag) if (flag == 0) allocate(wa3(nvar), stat = flag) if (flag == 0) allocate(wa4(neqn), stat = flag) if (flag == 0) then call fcn%jacobian(x, jac, fv = fvec, olwork = lwork) allocate(w(lwork), stat = flag) end if if (flag /= 0) then ! ERROR: Out of memory call errmgr%report_error("qns_solve", & "Insufficient memory available.", NL_OUT_OF_MEMORY_ERROR) return end if ! Evaluate the function at the starting point, and calculate its norm call fcn%fcn(x, fvec, args) neval = 1 fnorm = norm2(fvec) ! Process par = zero iter = 1 flag = 0 do ! Evaluate the Jacobian call fcn%jacobian(x, jac, fvec, w, args = args) njac = njac + 1 ! Compute the QR factorization of the Jacobian call lmfactor(jac, .true., jpvt, wa1, wa2, wa3) ! On the first iteration, scale the problem according to the norms ! of each of the columns of the initial Jacobian if (iter == 1) then do j = 1, nvar diag(j) = wa2(j) if (wa2(j) == zero) diag(j) = one end do wa3 = diag * x xnorm = norm2(wa3) delta = fac * xnorm if (delta == zero) delta = fac end if ! Form Q**T * FVEC, and store the first N components in QTF wa4 = fvec do j = 1, nvar if (jac(j,j) /= zero) then sm = zero do i = j, neqn sm = sm + jac(i,j) * wa4(i) end do temp = -sm / jac(j,j) wa4(j:neqn) = wa4(j:neqn) + jac(j:neqn,j) * temp end if ! LINE 120 jac(j,j) = wa1(j) qtf(j) = wa4(j) end do ! Compute the norm of the scaled gradient gnorm = zero if (fnorm /= zero) then do j = 1, nvar l = jpvt(j) if (wa2(l) == zero) cycle sm = zero do i = 1, j sm = sm + jac(i,j) * (qtf(i) / fnorm) end do gnorm = max(gnorm, abs(sm / wa2(l))) end do end if ! LINE 170 ! Test for convergence of the gradient norm if (gnorm <= gtol) then gcnvrg = .true. exit end if ! Rescale if necessary do j = 1, nvar diag(j) = max(diag(j), wa2(j)) end do ! Inner Loop do ! Determine the Levenberg-Marquardt parameter call lmpar(jac, jpvt, diag, qtf, delta, par, wa1, wa2, wa3, wa4) ! Store the direction P, and X + P. Calculate the norm of P do j = 1, nvar wa1(j) = -wa1(j) wa2(j) = x(j) + wa1(j) wa3(j) = diag(j) * wa1(j) end do pnorm = norm2(wa3) ! On the first iteration, adjust the initial step bounds if (iter == 1) delta = min(delta, pnorm) ! Evaluate the function at X + P, and calculate its norm call fcn%fcn(wa2, wa4, args) neval = neval + 1 fnorm1 = norm2(wa4) ! Compute the scaled actual reduction actred = -one if (p1 * fnorm1 < fnorm) actred = one - (fnorm1 / fnorm)**2 ! Compute the scaled predicted reduction and the scaled ! directional derivative do j = 1, nvar wa3(j) = zero l = jpvt(j) temp = wa1(l) wa3(1:j) = wa3(1:j) + jac(1:j,j) * temp end do temp1 = norm2(wa3) / fnorm temp2 = (sqrt(par) * pnorm) / fnorm prered = temp1**2 + temp2**2 / half dirder = -(temp1**2 + temp2**2) ! Compute the ratio of the actual to the predicted reduction ratio = zero if (prered /= zero) ratio = actred / prered ! Update the step bounds if (ratio <= qtr) then if (actred >= zero) temp = half if (actred < zero) temp = half * dirder / & (dirder + half * actred) if (p1 * fnorm1 >= fnorm .or. temp < p1) temp = p1 delta = temp * min(delta, pnorm / p1) par = par / temp else if (par /= zero .and. ratio < p75) then ! NO ACTION REQUIRED else delta = pnorm / half par = half * par end if end if ! LINE 240 ! Test for successful iteration if (ratio >= p0001) then do j = 1, nvar x(j) = wa2(j) wa2(j) = diag(j) * x(j) end do fvec = wa4 xnorm = norm2(wa2) fnorm = fnorm1 iter = iter + 1 end if ! LINE 290 ! Tests for convergence if (abs(actred) <= ftol .and. prered <= ftol .and. & half * ratio <= one) fcnvrg = .true. if (delta <= xtol * xnorm) xcnvrg = .true. if (fcnvrg .or. xcnvrg) exit ! Tests for termination and stringent tolerances if (neval >= maxeval) flag = NL_CONVERGENCE_ERROR if (abs(actred) <= eps .and. prered <= eps .and. & half * ratio <= one) flag = NL_TOLERANCE_TOO_SMALL_ERROR if (delta <= eps * xnorm) flag = NL_TOLERANCE_TOO_SMALL_ERROR if (gnorm <= eps) flag = NL_TOLERANCE_TOO_SMALL_ERROR if (flag /= 0) exit if (ratio >= p0001) exit end do ! End of the inner loop ! Check for termination criteria if (fcnvrg .or. xcnvrg .or. gcnvrg .or. flag /= 0) exit ! Print the iteration status if (this%get_print_status()) then call print_status(iter, neval, njac, xnorm, fnorm) end if end do ! Report out iteration statistics if (present(ib)) then ib%iter_count = iter ib%fcn_count = neval ib%jacobian_count = njac ib%converge_on_fcn = fcnvrg ib%converge_on_chng = xcnvrg ib%converge_on_zero_diff = gcnvrg end if ! Check for convergence issues if (flag /= 0) then write(errmsg, 101) "The algorithm failed to " // & "converge. Function evaluations performed: ", neval, & "." // new_line('c') // "Change in Variable: ", xnorm, & new_line('c') // "Residual: ", fnorm call errmgr%report_error("lss_solve", trim(errmsg), & flag) end if ! Formatting 100 format(A, I0, A) 101 format(A, I0, A, E10.3, A, E10.3) end subroutine ! ------------------------------------------------------------------------------ subroutine lmpar(r, ipvt, diag, qtb, delta, par, x, sdiag, wa1, wa2) !! Completes the solution of the Levenberg-Marquardt problem when !! provided with a QR factored form of the system Jacobian matrix. The !! form of the problem at this stage is J*X = B (J = Jacobian), and !! D*X = 0, where D is a diagonal matrix. !! !! This routines is based upon the MINPACK routine LMPAR. real(real64), intent(inout), dimension(:,:) :: r !! On input, the N-by-N upper triangular matrix R1 of the !! QR factorization. On output, the upper triangular portion is !! unaltered, but the strict lower triangle contains the strict !! upper triangle (transposed) of the matrix S. integer(int32), intent(in), dimension(:) :: ipvt !! An N-element array tracking the pivoting operations from the !! original QR factorization. real(real64), intent(in), dimension(:) :: diag !! An N-element array containing the diagonal components of the !! matrix D. real(real64), intent(in), dimension(:) :: qtb !! An N-element array containing the first N elements of Q1**T * B. real(real64), intent(in) :: delta !! A positive input variable that specifies an upper bounds on the !! Euclidean norm of D*X. real(real64), intent(inout) :: par !! On input, the initial estimate of the Levenberg-Marquardt !! parameter. On output, the final estimate. real(real64), intent(out), dimension(:) :: x !! The N-element array that is the solution of A*X = B, and of !! D*X = 0. real(real64), intent(out), dimension(:) :: sdiag !! An N-element array containing the diagonal elements of the !! matrix S. real(real64), intent(out), dimension(:) :: wa1 !! An N-element workspace array. real(real64), intent(out), dimension(:) :: wa2 !! An N-element workspace array. ! Parameters real(real64), parameter :: zero = 0.0d0 real(real64), parameter :: p001 = 1.0d-3 real(real64), parameter :: p1 = 0.1d0 ! Local Variables integer :: iter, j, jm1, jp1, k, l, nsing, n real(real64) :: dxnorm, dwarf, fp, gnorm, parc, parl, paru, sm, temp ! Initialization n = size(r, 2) ! NOTE: R is M-by-N dwarf = tiny(dwarf) nsing = n ! Compute and store in X, the Gauss-Newton direction. If the Jacobian ! is rank deficient, obtain a least-squares solution. do j = 1, n wa1(j) = qtb(j) if (r(j,j) == zero .and. nsing == n) nsing = j - 1 if (nsing < n) wa1(j) = zero end do ! LINE 10 if (nsing >= 1) then do k = 1, nsing j = nsing - k + 1 wa1(j) = wa1(j) / r(j,j) temp = wa1(j) jm1 = j - 1 if (jm1 >= 1) then wa1(1:jm1) = wa1(1:jm1) - r(1:jm1,j) * temp end if end do ! LINE 40 end if ! LINE 50 do j = 1, n l = ipvt(j) x(l) = wa1(j) end do ! Initialize the iteration counter, evaluate the function at the origin, ! and test for acceptance of the Gauss-Newton direction. iter = 0 wa2(1:n) = diag * x dxnorm = norm2(wa2(1:n)) fp = dxnorm - delta if (fp <= p1 * delta) then ! LINE 220 if (iter == 0) par = zero return end if ! If the Jacobian is not rank deficient, the Newton step provides a ! lower bound, PARL, for the zero of the function; else, set this bound ! to zero parl = zero if (nsing == n) then do j = 1, n l = ipvt(j) wa1(j) = diag(l) * (wa2(l) / dxnorm) end do ! LINE 80 do j = 1, n sm = zero jm1 = j - 1 if (jm1 >= 1) then sm = dot_product(r(1:jm1,j), wa1(1:jm1)) end if wa1(j) = (wa1(j) - sm) / r(j,j) end do ! LINE 110 temp = norm2(wa1) parl = ((fp / delta) / temp) / temp end if ! Calculate an upper bound, PARU, for the zero of the function do j = 1, n sm = dot_product(r(1:j,j), qtb(1:j)) l = ipvt(j) wa1(j) = sm / diag(l) end do ! LINE 140 gnorm = norm2(wa1) paru = gnorm / delta if (paru == zero) paru = dwarf / min(delta, p1) ! If the input PAR lies outside of the interval (PARL,PARU), set ! PAR to the closer end point. par = max(par, parl) par = min(par, paru) if (par == zero) par = gnorm / dxnorm ! Iteration do iter = iter + 1 ! Evaluate the function at the current value of PAR if (par == zero) par = max(dwarf, p001 * paru) temp = sqrt(par) wa1 = temp * diag call lmsolve(r(1:n,1:n), ipvt, wa1, qtb, x, sdiag, wa2) wa2(1:n) = diag * x dxnorm = norm2(wa2) temp = fp fp = dxnorm - delta ! If the function is small enough, accept the current value of PAR. ! Also test for the exceptional cases where PARL is zero, or the ! number of iterations has reached 10 if (abs(fp) <= p1 * delta & .or. parl == zero .and. fp <= temp & .and. temp < zero .or. iter == 10) exit ! Compute the Newton correction do j = 1, n l = ipvt(j) wa1(j) = diag(l) * (wa2(l) / dxnorm) end do ! LINE 180 do j = 1, n wa1(j) = wa1(J) / sdiag(j) temp = wa1(j) jp1 = j + 1 if (n < jp1) cycle wa1 = wa1 - r(1:n,j) * temp end do ! LINE 210 temp = norm2(wa1) parc = ((fp / delta) / temp) / temp ! Depending on the sign of the function update PARL or PARU if (fp > zero) parl = max(parl, par) if (fp < zero) paru = min(paru, par) ! Compute an improved estimate for PAR par = max(parl, par + parc) end do ! LINE 220 (End of iteration) if (iter == zero) par = zero return end subroutine ! ------------------------------------------------------------------------------ subroutine lmfactor(a, pivot, ipvt, rdiag, acnorm, wa) !! Computes the QR factorization of an M-by-N matrix. !! !! This routines is based upon the MINPACK routine QRFAC. real(real64), intent(inout), dimension(:,:) :: a !! On input, the M-by-N matrix to factor. On output, the strict !! upper triangular portion contains matrix R1 of the factorization, !! the lower trapezoidal portion contains the factored form of Q1, !! and the diagonal contains the corresponding elementary reflector. logical, intent(in) :: pivot !! Set to true to utilize column pivoting; else, set to false for !! no pivoting. integer(int32), intent(out), dimension(:) :: ipvt !! An N-element array that is used to contain the pivot indices !! unless pivot is set to false. In such event, this array is !! unused. real(real64), intent(out), dimension(:) :: rdiag !! An N-element array used to store the diagonal elements of the !! R1 matrix. real(real64), intent(out), dimension(:) :: acnorm !! An N-element array used to contain the norms of each column in !! the event column pivoting is used. If pivoting is not used, !! this array is unused. real(real64), intent(out), dimension(:) :: wa !! An N-element workspace array. ! Parameters real(real64), parameter :: zero = 0.0d0 real(real64), parameter :: p05 = 5.0d-2 real(real64), parameter :: one = 1.0d0 ! Local Variables integer(int32) :: m, n, i, j, jp1, k, kmax, minmn real(real64) :: ajnorm, epsmch, sm, temp ! Initialization m = size(a, 1) n = size(a, 2) minmn = min(m, n) epsmch = epsilon(epsmch) ! Compute the initial column norms, and initialize several arrays do j = 1, n acnorm(j) = norm2(a(:,j)) rdiag(j) = acnorm(j) wa(j) = rdiag(j) if (pivot) ipvt(j) = j end do ! LINE 10 ! Reduce A to R with Householder transformations do j = 1, minmn if (pivot) then ! Bring the column of largest norm into the pivot position kmax = j do k = j, n if (rdiag(k) > rdiag(kmax)) kmax = k end do ! LINE 20 if (kmax /= j) then do i = 1, m temp = a(i,j) a(i,j) = a(i,kmax) a(i,kmax) = temp end do ! LINE 30 rdiag(kmax) = rdiag(j) wa(kmax) = wa(j) k = ipvt(j) ipvt(j) = ipvt(kmax) ipvt(kmax) = k end if end if ! LINE 40 ! Compute the Householder transformation to reduce the J-th column ! of A to a multiple of the J-th unit vector ajnorm = norm2(a(j:m,j)) if (ajnorm /= zero) then if (a(j,j) < zero) ajnorm = -ajnorm a(j:m,j) = a(j:m,j) / ajnorm a(j,j) = a(j,j) + one ! Apply the transformation to the remaining columns and update ! the norms jp1 = j + 1 if (n >= jp1) then do k = jp1, n sm = dot_product(a(j:m,j), a(j:m,k)) temp = sm / a(j,j) a(j:m,k) = a(j:m,k) - temp * a(j:m,j) if (.not.pivot .or. rdiag(k) == zero) cycle temp = a(j,k) / rdiag(k) rdiag(k) = rdiag(k) * sqrt(max(zero, one - temp**2)) if (p05 * (rdiag(k) / wa(k))**2 > epsmch) cycle rdiag(k) = norm2(a(jp1:m,k)) wa(k) = rdiag(k) end do ! LINE 90 end if end if ! LINE 100 rdiag(j) = -ajnorm end do ! LINE 110 end subroutine ! ------------------------------------------------------------------------------ subroutine lmsolve(r, ipvt, diag, qtb, x, sdiag, wa) !! Solves the QR factored system A*X = B, coupled with the diagonal !! system D*X = 0 in the least-squares sense. !! !! This routines is based upon the MINPACK routine QRSOLV. real(real64), intent(inout), dimension(:,:) :: r !! On input, the N-by-N upper triangular matrix R1 of the QR !! factorization. On output, the upper triangular portion is !! unaltered, but the strict lower triangle contains the strict !! upper triangle (transposed) of the matrix S. integer(int32), intent(in), dimension(:) :: ipvt !! An N-element array tracking the pivoting operations from the !! original QR factorization. real(real64), intent(in), dimension(:) :: diag !! An N-element array containing the diagonal components of the !! matrix D. real(real64), intent(in), dimension(:) :: qtb !! An N-element array containing the first N elements of Q1**T * B. real(real64), intent(out), dimension(:) :: x !! The N-element array that is the solution of A*X = B, and of !! D*X = 0. real(real64), intent(out), dimension(:) :: sdiag !! An N-element array containing the diagonal elements of the !! matrix S. real(real64), intent(out), dimension(:) :: wa !! An N-element workspace array. ! Parameters real(real64), parameter :: zero = 0.0d0 real(real64), parameter :: qtr = 0.25d0 real(real64), parameter :: half = 0.5d0 ! Local Variables integer(int32) :: n, i, j, jp1, k, kp1, l, nsing real(real64) :: cs, ctan, qtbpj, sn, sm, tn, temp ! Initialization n = size(r, 1) ! Copy R and Q**T*B to preserve inputs and initialize S do j = 1, n r(j:n,j) = r(j,j:n) x(j) = r(j,j) wa(j) = qtb(j) end do ! LINE 20 ! Eliminate the diagonal matrix D using a Givens rotation do j = 1, n ! Prepare the row of D to be eliminated, locating the diagonal ! element using P from the QR factorization l = ipvt(j) if (diag(l) /= zero) then sdiag(j:n) = zero sdiag(j) = diag(l) ! The transformations to eliminate the row of D modify only a ! single element of Q**T * B beyond the first N, which is ! initially zero. qtbpj = zero do k = j, n ! Determine a Givens rotation which eliminates the ! appropriate element in the current row of D if (sdiag(k) == zero) cycle if (abs(r(k,k)) < abs(sdiag(k))) then ctan = r(k,k) / sdiag(k) sn = half / sqrt(qtr + qtr * ctan**2) cs = sn * ctan else tn = sdiag(k) / r(k,k) cs = half / sqrt(qtr + qtr * tn**2) sn = cs * tn end if ! Compute the modified diagonal element of R and the ! modified element of Q**T * B r(k,k) = cs * r(k,k) + sn * sdiag(k) temp = cs * wa(k) + sn * qtbpj qtbpj = -sn * wa(k) + cs * qtbpj wa(k) = temp ! Accumulate the transformation in the row of S kp1 = k + 1 if (n < kp1) cycle do i = kp1, n temp = cs * r(i,k) + sn * sdiag(i) sdiag(i) = -sn * r(i,k) + cs * sdiag(i) r(i,k) = temp end do ! LINE 60 end do ! LINE 80 end if ! Store the diagonal element of S and restore the corresponding ! diagonal element of R sdiag(j) = r(j,j) r(j,j) = x(j) end do ! LINE 100 ! Solve the triangular system. If the system is singular, then obtain a ! least-squares solution nsing = n do j = 1, n if (sdiag(j) == zero .and. nsing == n) nsing = j - 1 if (nsing < n) wa(j) = zero end do ! LINE 110 if (nsing >= 1) then do k = 1, nsing j = nsing - k + 1 sm = zero jp1 = j + 1 if (nsing >= jp1) then sm = dot_product(r(jp1:nsing,j), wa(jp1:nsing)) end if wa(j) = (wa(j) - sm) / sdiag(j) end do ! LINE 140 end if ! Permute the components of Z back to components of X do j = 1, n l = ipvt(j) x(l) = wa(j) end do ! LINE 160 end subroutine ! ****************************************************************************** ! CONSTRAINED_EQUATION_SOLVER MEMBERS ! ------------------------------------------------------------------------------ pure function ces_get_upper_bounds(this) result(rst) !! Gets the array of upper bounds constraints. class(constrained_equation_solver), intent(in) :: this !! The [[constrained_equation_solver]] object. real(real64), allocatable, dimension(:) :: rst !! The limit array. if (allocated(this%m_upper)) then rst = this%m_upper else allocate(rst(0)) end if end function ! -------------------- subroutine ces_set_upper_bounds(this, x) !! Sets the array of upper bounds constraints. class(constrained_equation_solver), intent(inout) :: this !! The [[constrained_equation_solver]] object. real(real64), intent(in), dimension(:) :: x !! The limit array. if (allocated(this%m_upper)) then deallocate(this%m_upper) this%m_upper = x else this%m_upper = x end if end subroutine ! ------------------------------------------------------------------------------ pure function ces_get_lower_bounds(this) result(rst) !! Gets the array of lower bounds constraints. class(constrained_equation_solver), intent(in) :: this !! The [[constrained_equation_solver]] object. real(real64), allocatable, dimension(:) :: rst !! The limit array. if (allocated(this%m_lower)) then rst = this%m_lower else allocate(rst(0)) end if end function ! -------------------- subroutine ces_set_lower_bounds(this, x) !! Sets the array of lower bounds constraints. class(constrained_equation_solver), intent(inout) :: this !! The [[constrained_equation_solver]] object. real(real64), intent(in), dimension(:) :: x !! The limit array. if (allocated(this%m_lower)) then deallocate(this%m_lower) this%m_lower = x else this%m_lower = x end if end subroutine ! ------------------------------------------------------------------------------ subroutine ces_apply_limits(this, x) !! Applies the limits to the solution vector. class(constrained_equation_solver), intent(in) :: this !! The [[constrained_equation_solver]] object. real(real64), intent(inout), dimension(:) :: x !! On input, the solution vector. On output, the clamped solution !! vector. ! Local Variables integer(int32) :: i, nu, nl, n real(real64), allocatable, dimension(:) :: maxX, minX ! Process maxX = this%get_upper_limits() minX = this%get_lower_limits() n = size(x) nu = min(n, size(maxX)) nl = min(n, size(minX)) do i = 1, nl if (x(i) < minX(i)) x(i) = minX(i) end do do i = 1, nu if (x(i) > maxX(i)) x(i) = maxX(i) end do end subroutine ! ****************************************************************************** ! CONSTRAINED_LEAST_SQUARES_SOLVER MEMBERS & HELPER ROUTINES ! ------------------------------------------------------------------------------ pure function cls_get_radius(this) result(rst) !! Gets the initial trust-region radius. class(constrained_least_squares_solver), intent(in) :: this !! The [[constrained_least_squares_solver]] object. real(real64) :: rst !! The trust-region radius. rst = this%m_delta end function ! -------------------- subroutine cls_set_radius(this, x) !! Sets the initial trust region radius. class(constrained_least_squares_solver), intent(inout) :: this !! The [[constrained_least_squares_solver]] object. real(real64), intent(in) :: x !! The The trust-region radius. if (x <= 0.0d0) then this%m_delta = 1.0d0 else this%m_delta = x end if end subroutine ! ------------------------------------------------------------------------------ pure function cls_get_factor(this) result(rst) !! Gets the initial line-search step size scaling factor. class(constrained_least_squares_solver), intent(in) :: this !! The [[constrained_least_squares_solver]] object. real(real64) :: rst !! The scaling factor. rst = this%m_scaling end function ! -------------------- subroutine cls_set_factor(this, x) !! Gets the initial line-search step size scaling factor. class(constrained_least_squares_solver), intent(inout) :: this !! The [[constrained_least_squares_solver]] object. real(real64), intent(in) :: x !! The scaling factor. if (x <= 0.0d0) then this%m_scaling = 1.0d0 else this%m_scaling = x end if end subroutine ! ------------------------------------------------------------------------------ subroutine cls_solve(this, fcn, x, fvec, ib, args, err) !! Applies the constrained least-squares solver to solve the nonlinear !! least-squares problem. class(constrained_least_squares_solver), intent(inout) :: this !! The [[constrained_least_squares_solver]] object. class(vecfcn_helper), intent(in) :: fcn !! The [[vecfcn_helper]] object containing the equations to solve. real(real64), intent(inout), dimension(:) :: x !! On input, an N-element array containing an initial estimate !! to the solution. On output, the updated solution estimate. !! N is the number of variables. real(real64), intent(out), dimension(:) :: fvec !! An M-element array that, on output, will contain the values !! of each equation as evaluated at the variable values given !! in x. type(iteration_behavior), optional :: ib !! An optional output, that if provided, allows the caller to !! obtain iteration performance statistics. class(*), intent(inout), optional :: args !! An optional argument to allow the user to communicate with !! the routine. class(errors), intent(inout), optional, target :: err !! An error handling object. ! Parameters real(real64), parameter :: delta_max = 1.0d3 real(real64), parameter :: eta = 1.0d-1 integer(int32), parameter :: ls_max_iter = 10 real(real64), parameter :: ls_cl = 1.0d-4 real(real64), parameter :: ls_beta = 0.5d0 ! Local Variables logical :: converged, xcnvrg, fcnvrg, gcnvrg integer(int32) :: k, neqn, nvar, neval, iter, njac, maxeval, flag real(real64) :: xnorm, fnorm, gnorm, factor, fnewnorm, ftol, gtol, & xtol, actred, prered, rho, delta, stepscale, dderiv real(real64), allocatable, dimension(:) :: tau, Jp, s, g, p, xnew, & fnew, xl, xu real(real64), allocatable, dimension(:,:) :: qr, jac class(errors), pointer :: errmgr type(errors), target :: deferr character(len = 256) :: errmsg ! Initialization converged = .false. xcnvrg = .false. fcnvrg = .false. gcnvrg = .false. neqn = fcn%get_equation_count() nvar = fcn%get_variable_count() neval = 0 iter = 0 njac = 0 ftol = this%get_fcn_tolerance() xtol = this%get_var_tolerance() gtol = this%get_gradient_tolerance() maxeval = this%get_max_fcn_evals() xl = this%get_lower_limits() xu = this%get_upper_limits() if (present(err)) then errmgr => err else errmgr => deferr end if if (present(ib)) then ib%iter_count = iter ib%fcn_count = neval ib%jacobian_count = njac ib%converge_on_fcn = fcnvrg ib%converge_on_chng = xcnvrg ib%converge_on_zero_diff = gcnvrg ib%gradient_count = 0 end if ! Input Check if (.not.fcn%is_fcn_defined()) then ! ERROR: No function is defined call errmgr%report_error("cls_solve", & "No function has been defined.", & NL_INVALID_OPERATION_ERROR) return end if if (nvar > neqn) then ! ERROR: System is underdetermined call errmgr%report_error("cls_solve", "The solver cannot " // & "solve the underdetermined problem. The number of " // & "unknowns must not exceed the number of equations.", & NL_INVALID_INPUT_ERROR) return end if flag = 0 if (size(x) /= nvar) then flag = 3 else if (size(fvec) /= neqn) then flag = 4 end if if (flag /= 0) then ! One of the input arrays is not sized correctly write(errmsg, 100) "Input number ", flag, & " is not sized correctly." call errmgr%report_error("cls_solve", trim(errmsg), & NL_ARRAY_SIZE_ERROR) return end if ! Check the limit arrays if (size(xl) /= nvar) then deallocate(xl) allocate(xl(nvar), source = -huge(0.0d0)) call this%set_lower_limits(xl) end if if (size(xu) /= nvar) then deallocate(xu) allocate(xu(nvar), source = huge(0.0d0)) call this%set_upper_limits(xu) end if ! Local Memory Allocations allocate( & tau(min(neqn, nvar)), & Jp(neqn), & s(nvar), & g(nvar), & p(nvar), & xnew(nvar), & fnew(neqn), & qr(neqn, nvar), & jac(neqn, nvar) & ) ! Initial Evaluations call this%apply_limits(x) call fcn%fcn(x, fvec, args) neval = 1 fnorm = norm2(fvec) xnorm = norm2(x) if (.not. is_finite_array(x) .or. .not. is_finite_array(fvec)) then return end if ! Process delta = this%get_trust_region_radius() iter = 1 outer : do ! Evaluate the Jacobian call fcn%jacobian(x, jac, fv = fvec, args = args) njac = njac + 1 ! Print iteration status if (this%get_print_status()) then call print_status(iter, neval, njac, xnorm, fnorm) end if ! Compute the QR factorization of J qr = jac call qr_factor(qr, tau) ! Compute the scaling factors call coleman_li_scaling(x, xl, xu, s) ! Determine the step direction call dogleg(delta, x, fvec, jac, qr, tau, s, xl, xu, p, g, Jp, & prered) xnorm = scaled_norm(p, s) gnorm = norm2(g) xnew = x + p ! Update the residual call fcn%fcn(xnew, fnew, args) fnewnorm = norm2(fnew) neval = neval + 1 ! Compute the actual reduction and the reduction ratio actred = 0.5d0 * (fnorm**2 - fnewnorm**2) if (prered > 0.0d0 .and. actred >= 0.0d0) then rho = actred / prered else rho = 0.0d0 end if ! Update the trust region radius if (rho < 0.25d0) then delta = max(0.25d0, 1.0d-12) else if (rho > 0.75d0 .and. abs(xnorm - delta) < 1.0d-12 * delta) then delta = min(2.0d0 * delta, delta_max) end if ! Accept the step? if (rho > eta .and. fnewnorm <= fnorm) then ! Accept the trust-region step x = xnew call this%apply_limits(x) fvec = fnew fnorm = fnewnorm iter = iter + 1 else ! Try an Armijo backtracking step along p dderiv = dot_product(g, p) if (dderiv >= 0.0d0) then ! This isn't a descent direction. Just shrink the trust ! region delta = max(0.5d0 * delta, 1.0d-12) else stepscale = this%get_step_scaling_factor() backtrack : do k = 1, ls_max_iter xnew = x + stepscale * p call this%apply_limits(xnew) call fcn%fcn(xnew, fnew, args) neval = neval + 1 fnewnorm = norm2(fnew) if (fnewnorm <= fnorm + ls_cl * stepscale * dderiv) then ! Accept the line-search step x = xnew fvec = fnew fnorm = fnewnorm iter = iter + 1 ! Modestly adjust the trust-region radius delta = max(stepscale * xnorm, 1.0d-12) exit backtrack end if stepscale = stepscale * ls_beta end do backtrack ! If the line search fails, shrink the trust region if (k > ls_max_iter) then delta = max(0.5d0 * delta, 1.0d-12) end if end if end if if (.not. is_finite_array(x) .or. .not. is_finite_array(fvec)) then exit outer end if ! Test for convergence if (xnorm <= xtol) then converged = .true. xcnvrg = .true. exit outer end if if (fnorm <= ftol) then converged = .true. fcnvrg = .true. exit outer end if if (gnorm <= gtol) then converged = .true. gcnvrg = .true. exit outer end if if (neval >= maxeval) exit outer end do outer ! Report out iteration statistics if (present(ib)) then ib%iter_count = iter ib%fcn_count = neval ib%jacobian_count = njac ib%converge_on_fcn = fcnvrg ib%converge_on_chng = xcnvrg ib%converge_on_zero_diff = gcnvrg end if ! Check for convergence issues if (.not.converged) then write(errmsg, 101) "The algorithm failed to " // & "converge. Function evaluations performed: ", neval, & "." // new_line('c') // "Change in Variable: ", xnorm, & new_line('c') // "Residual: ", fnorm call errmgr%report_error("cls_solve", trim(errmsg), & flag) end if ! Formatting 100 format(A, I0, A) 101 format(A, I0, A, E10.3, A, E10.3) end subroutine ! ****************************************************************************** ! HELPER ROUTINES ! ------------------------------------------------------------------------------ pure function alpha_box(x, p, xl, xu) result(rst) !! Computes the maximum feasible alpha such that: !! xl <= x + alpha * p <= xu. real(real64), intent(in), dimension(:) :: x !! The N-element array containing the current solution estimate. real(real64), intent(in), dimension(:) :: p !! The N-element array containing the proposed solution step. real(real64), intent(in), dimension(:) :: xl !! The N-element array containing the lower bounds. real(real64), intent(in), dimension(:) :: xu !! The N-element array containing the upper bounds. real(real64) :: rst !! The largest possible alpha. integer(int32) :: i, n real(real64) :: a n = size(x) rst = huge(rst) do i = 1, n if (p(i) > 0.0d0) then if (xu(i) < x(i)) then rst = 0.0d0 return end if a = (xu(i) - x(i)) / p(i) if (a < rst) rst = a else if (p(i) < 0.0d0) then if (xl(i) > x(i)) then rst = 0.0d0 return end if a = (xl(i) - x(i)) / p(i) if (a < rst) rst = a end if end do if (rst < 0.0d0) rst = 0.0d0 end function ! ------------------------------------------------------------------------------ subroutine coleman_li_scaling(x, xl, xu, s) !! Computes the Coleman-Li scaling matrix. real(real64), intent(in), dimension(:) :: x !! The N-element array containing the current solution estimate. real(real64), intent(in), dimension(size(x)) :: xl !! The N-element array containing the lower bounds. real(real64), intent(in), dimension(size(x)) :: xu !! The N-element array containing the upper bounds. real(real64), intent(out), dimension(size(x)) :: s !! The N-element array containing the diagonal of the scaling !! matrix. ! Parameters real(real64), parameter :: min_scale = 1.0d-8 real(real64), parameter :: max_scale = 1.0d8 ! Local Variables integer(int32) :: i, n real(real64) :: di, big ! Process n = size(x) big = huge(1.0d0) do i = 1, n if (xl(i) > -big .and. xu(i) < big) then di = min(x(i) - xl(i), xu(i) - x(i)) else if (xl(i) > -big) then di = x(i) - xl(i) else if (xu(i) < big) then di = xu(i) - x(i) else di = 1.0d0 end if di = max(di, min_scale) s(i) = 1.0d0 / di if (s(i) > max_scale) s(i) = max_scale end do end subroutine ! ------------------------------------------------------------------------------ pure function scaled_norm(x, s) result(rst) !! Computes a scaled Euclidean norm. real(real64), intent(in), dimension(:) :: x !! The N-element array. real(real64), intent(in), dimension(size(x)) :: s !! The N-element scaling array. real(real64) :: rst !! The result rst = norm2(x * s) end function ! ------------------------------------------------------------------------------ pure function is_finite_array(x) result(rst) !! Tests to see if the array contains all finite values. real(real64), intent(in), dimension(:) :: x !! The array to test. logical :: rst !! Returns true if all values are finite; else, false. ! Local Variables integer(int32) :: i ! Process rst = .true. do i = 1, size(x) if (.not.(x(i) == x(i))) then rst = .false. exit end if if (abs(x(i)) == huge(0.0d0)) then rst = .false. exit end if end do end function ! ------------------------------------------------------------------------------ subroutine dogleg(delta, x, f, jac, qr, tau, s, xl, xu, p, g, Jp, prered) !! Computes a dog-leg step for Powell's method. real(real64), intent(in) :: delta !! The trust-region radius. real(real64), intent(in), dimension(:) :: x !! The N-element array containing the current solution estimate. real(real64), intent(in), dimension(:) :: f !! The M-element array containing the residuals. real(real64), intent(in), dimension(:,:) :: jac !! The M-by-N Jacobian matrix. real(real64), intent(inout), dimension(:,:) :: qr !! The M-by-N matrix output by qr_factor. real(real64), intent(in), dimension(:) :: tau !! The MIN(M,N)-element array, tau, as output by qr_factor. real(real64), intent(in), dimension(:) :: s !! The N-element diagonal of the scaling matrix. real(real64), intent(in), dimension(:) :: xl !! The N-element array containing the lower bounds. real(real64), intent(in), dimension(:) :: xu !! The N-element array containing the upper bounds. real(real64), intent(out), dimension(:) :: p !! The updated step size. real(real64), intent(out), dimension(:) :: g !! The N-element gradient vector. real(real64), intent(out), dimension(:) :: Jp !! The M-element array resulting from J * p. real(real64), intent(out) :: prered !! The predicted reduction. ! Local Variables integer(int32) :: m, n real(real64) :: alpha, pgnnorm, psdnorm, t, c1, c2, a, b, c, arg real(real64), allocatable, dimension(:) :: pgn, psd, u, v, Jg ! Initialization m = size(jac, 1) n = size(jac, 2) allocate(pgn(n), psd(n), u(n), v(n), Jg(m)) ! Compute the gradient vector call dgemv('T', m, n, 1.0d0, jac, m, f, 1, 0.0d0, g, 1) ! Solve the linear system for the Gauss-Newton step u = f call solve_qr(qr, tau, u) ! solution stored in u(1:n) pgn = -u(1:n) pgnnorm = scaled_norm(pgn, s) ! Is it enough, or do we need to try a steepest decsent step? if (pgnnorm > delta) then ! The Gauss-Newton step failed. Try the steepest descent step call dgemv('N', m, n, 1.0d0, jac, m, g, 1, 0.0d0, Jg, 1) c1 = dot_product(g, g) c2 = dot_product(Jg, Jg) if (c2 > 0.0d0 .and. c1 > 0.0d0) then alpha = c1 / c2 else alpha = 0.0d0 end if psd = -alpha * g psdnorm = scaled_norm(psd, s) if (psdnorm >= delta .and. psdnorm > 0.0d0) then ! Go ahead and use the steepest descent step p = (delta / psdnorm) * psd else u(1:n) = pgn - psd u(1:n) = s * u(1:n) v = s * psd a = dot_product(u(1:n), u(1:n)) b = 2.0d0 * dot_product(u(1:n), v) c = dot_product(v, v) - delta**2 if (a <= 0.0d0) then p = psd else arg = max(0.0d0, b**2 - 4.0d0 * a * c) if (arg == 0.0d0) then t = -b / (2.0d0 * a) else t = (-b + sqrt(arg)) / (2.0d0 * a) if (t < 0.0d0 .or. t > 1.0d0) then t = (-b - sqrt(arg)) / (2.0d0 * a) end if end if t = max(0.0d0, min(1.0d0, t)) p = psd + t * u(1:n) end if end if else ! Just use the Gauss-Newton step p = pgn end if ! Ensure to respect any limits alpha = alpha_box(x, p, xl, xu) if (alpha < 1.0d0) then p = alpha * p end if ! Compute the predicted reduction call dgemv('N', m, n, 1.0d0, jac, m, p, 1, 0.0d0, Jp, 1) c1 = dot_product(g, p) c2 = 0.5d0 * dot_product(Jp, Jp) prered = -c1 - c2 end subroutine ! ------------------------------------------------------------------------------ end module