Actual source code: ntl.c
1: /*$Id: ntl.c,v 1.4 2009-08-25 16:25:11 sarich Exp $*/
3: #include "ntl.h" /*I "tao_solver.h" I*/
5: #ifdef TAO_USE_PETSC
6: #include "petscksp.h"
7: #include "petscpc.h"
8: #include "src/petsctao/linearsolver/taolinearsolver_petsc.h"
9: #include "src/petsctao/vector/taovec_petsc.h"
11: #include "private/kspimpl.h"
12: #include "private/pcimpl.h"
14: #define NTL_KSP_NASH 0
15: #define NTL_KSP_STCG 1
16: #define NTL_KSP_GLTR 2
17: #define NTL_KSP_TYPES 3
19: #define NTL_PC_NONE 0
20: #define NTL_PC_AHESS 1
21: #define NTL_PC_BFGS 2
22: #define NTL_PC_PETSC 3
23: #define NTL_PC_TYPES 4
25: #define BFGS_SCALE_AHESS 0
26: #define BFGS_SCALE_BFGS 1
27: #define BFGS_SCALE_TYPES 2
29: #define NTL_INIT_CONSTANT 0
30: #define NTL_INIT_DIRECTION 1
31: #define NTL_INIT_INTERPOLATION 2
32: #define NTL_INIT_TYPES 3
34: #define NTL_UPDATE_REDUCTION 0
35: #define NTL_UPDATE_INTERPOLATION 1
36: #define NTL_UPDATE_TYPES 2
38: static const char *NTL_KSP[64] = {
39: "nash", "stcg", "gltr"
40: };
42: static const char *NTL_PC[64] = {
43: "none", "ahess", "bfgs", "petsc"
44: };
46: static const char *BFGS_SCALE[64] = {
47: "ahess", "bfgs"
48: };
50: static const char *NTL_INIT[64] = {
51: "constant", "direction", "interpolation"
52: };
54: static const char *NTL_UPDATE[64] = {
55: "reduction", "interpolation"
56: };
58: // Routine for BFGS preconditioner
62: static PetscErrorCode bfgs_apply(PC pc, Vec xin, Vec xout)
63: {
64: TaoLMVMMat *M;
66: TaoVecPetsc Xin(xin);
67: TaoVecPetsc Xout(xout);
68: TaoTruth info2;
69: int info;
72: info = PCShellGetContext(pc,(void**)&M); CHKERRQ(info);
73: info = M->Solve(&Xin, &Xout, &info2); CHKERRQ(info);
74: return(0);
75: }
77: // Implements Newton's Method with a trust-region, line-search approach for
78: // solving unconstrained minimization problems. A More'-Thuente line search
79: // is used to guarantee that the bfgs preconditioner remains positive
80: // definite.
82: #define NTL_NEWTON 0
83: #define NTL_BFGS 1
84: #define NTL_SCALED_GRADIENT 2
85: #define NTL_GRADIENT 3
89: static int TaoSolve_NTL(TAO_SOLVER tao, void *solver)
90: {
91: TAO_NTL *tl = (TAO_NTL *)solver;
92: TaoVec *X, *G = tl->G, *D = tl->D, *W = tl->W;
93: TaoVec *Xold = tl->Xold, *Gold = tl->Gold, *Diag = tl->Diag;
94: TaoMat *H;
95: TaoLMVMMat *M = tl->M;
97: TaoLinearSolver *tls;
98: TaoLinearSolverPetsc *pls;
100: KSP pksp;
101: PC ppc;
103: KSPConvergedReason ksp_reason;
104: TaoTerminateReason reason;
105: TaoTruth success;
106:
107: double fmin, ftrial, f_full, prered, actred, kappa, sigma;
108: double tau, tau_1, tau_2, tau_max, tau_min, max_radius;
109: double f, fold, gdx, gnorm;
110: double step = 1.0;
112: double delta;
113: double radius, norm_d = 0.0;
115: int info;
116: TaoInt stepType;
117: TaoInt iter = 0, status = 0;
118: TaoInt bfgsUpdates = 0;
119: TaoInt needH;
121: TaoInt i_max = 5;
122: TaoInt j_max = 1;
123: TaoInt i, j;
125: TaoInt tr_reject;
127: TaoFunctionBegin;
129: // Initialize trust-region radius
130: info = TaoGetInitialTrustRegionRadius(tao, &radius); CHKERRQ(info);
131: if (radius < 0.0) {
132: SETERRQ(1, "Initial radius negative");
133: }
135: // Modify the radius if it is too large or small
136: radius = TaoMax(radius, tl->min_radius);
137: radius = TaoMin(radius, tl->max_radius);
139: // Get vectors we will need
140: info = TaoGetSolution(tao, &X); CHKERRQ(info);
141: info = TaoGetHessian(tao, &H); CHKERRQ(info);
143: if (NTL_PC_BFGS == tl->pc_type && !M) {
144: tl->M = new TaoLMVMMat(X);
145: M = tl->M;
146: }
148: // Check convergence criteria
149: info = TaoComputeFunctionGradient(tao, X, &f, G); CHKERRQ(info);
150: info = G->Norm2(&gnorm); CHKERRQ(info);
151: if (TaoInfOrNaN(f) || TaoInfOrNaN(gnorm)) {
152: SETERRQ(1, "User provided compute function generated Inf or NaN");
153: }
154: needH = 1;
156: info = TaoMonitor(tao, iter, f, gnorm, 0.0, 1.0, &reason); CHKERRQ(info);
157: if (reason != TAO_CONTINUE_ITERATING) {
158: TaoFunctionReturn(0);
159: }
161: // Create vectors for the limited memory preconditioner
162: if ((NTL_PC_BFGS == tl->pc_type) &&
163: (BFGS_SCALE_BFGS != tl->bfgs_scale_type)) {
164: if (!Diag) {
165: info = X->Clone(&tl->Diag); CHKERRQ(info);
166: Diag = tl->Diag;
167: }
168: }
170: // Modify the linear solver to a conjugate gradient method
171: info = TaoGetLinearSolver(tao, &tls); CHKERRQ(info);
172: pls = dynamic_cast <TaoLinearSolverPetsc *> (tls);
174: pksp = pls->GetKSP();
175: switch(tl->ksp_type) {
176: case NTL_KSP_NASH:
177: info = KSPSetType(pksp, KSPNASH); CHKERRQ(info);
178: if (pksp->ops->setfromoptions) {
179: (*pksp->ops->setfromoptions)(pksp);
180: }
181: break;
183: case NTL_KSP_STCG:
184: info = KSPSetType(pksp, KSPSTCG); CHKERRQ(info);
185: if (pksp->ops->setfromoptions) {
186: (*pksp->ops->setfromoptions)(pksp);
187: }
188: break;
190: default:
191: info = KSPSetType(pksp, KSPGLTR); CHKERRQ(info);
192: if (pksp->ops->setfromoptions) {
193: (*pksp->ops->setfromoptions)(pksp);
194: }
195: break;
196: }
198: // Modify the preconditioner to use the bfgs approximation
199: info = KSPGetPC(pksp, &ppc); CHKERRQ(info);
200: switch(tl->pc_type) {
201: case NTL_PC_NONE:
202: info = PCSetType(ppc, PCNONE); CHKERRQ(info);
203: if (ppc->ops->setfromoptions) {
204: (*ppc->ops->setfromoptions)(ppc);
205: }
206: break;
208: case NTL_PC_AHESS:
209: info = PCSetType(ppc, PCJACOBI); CHKERRQ(info);
210: if (ppc->ops->setfromoptions) {
211: (*ppc->ops->setfromoptions)(ppc);
212: }
213: info = PCJacobiSetUseAbs(ppc); CHKERRQ(info);
214: break;
216: case NTL_PC_BFGS:
217: info = PCSetType(ppc, PCSHELL); CHKERRQ(info);
218: if (ppc->ops->setfromoptions) {
219: (*ppc->ops->setfromoptions)(ppc);
220: }
221: info = PCShellSetName(ppc, "bfgs"); CHKERRQ(info);
222: info = PCShellSetContext(ppc, M); CHKERRQ(info);
223: info = PCShellSetApply(ppc, bfgs_apply); CHKERRQ(info);
224: break;
226: default:
227: // Use the pc method set by pc_type
228: break;
229: }
231: // Initialize trust-region radius. The initialization is only performed
232: // when we are using Steihaug-Toint or the Generalized Lanczos method.
233: switch(tl->init_type) {
234: case NTL_INIT_CONSTANT:
235: // Use the initial radius specified
236: break;
238: case NTL_INIT_INTERPOLATION:
239: // Use the initial radius specified
240: max_radius = 0.0;
241:
242: for (j = 0; j < j_max; ++j) {
243: fmin = f;
244: sigma = 0.0;
245:
246: if (needH) {
247: info = TaoComputeHessian(tao, X, H); CHKERRQ(info);
248: needH = 0;
249: }
250:
251: for (i = 0; i < i_max; ++i) {
252: info = W->Waxpby(1.0, X, -radius / gnorm, G); CHKERRQ(info);
254: info = TaoComputeFunction(tao, W, &ftrial); CHKERRQ(info);
255: if (TaoInfOrNaN(ftrial)) {
256: tau = tl->gamma1_i;
257: }
258: else {
259: if (ftrial < fmin) {
260: fmin = ftrial;
261: sigma = -radius / gnorm;
262: }
264: info = H->Multiply(G, D); CHKERRQ(info);
265: info = D->Dot(G, &prered); CHKERRQ(info);
267: prered = radius * (gnorm - 0.5 * radius * prered / (gnorm * gnorm));
268: actred = f - ftrial;
269: if ((fabs(actred) <= tl->epsilon) &&
270: (fabs(prered) <= tl->epsilon)) {
271: kappa = 1.0;
272: }
273: else {
274: kappa = actred / prered;
275: }
277: tau_1 = tl->theta_i * gnorm * radius / (tl->theta_i * gnorm * radius + (1.0 - tl->theta_i) * prered - actred);
278: tau_2 = tl->theta_i * gnorm * radius / (tl->theta_i * gnorm * radius - (1.0 + tl->theta_i) * prered + actred);
279: tau_min = TaoMin(tau_1, tau_2);
280: tau_max = TaoMax(tau_1, tau_2);
282: if (fabs(kappa - 1.0) <= tl->mu1_i) {
283: // Great agreement
284: max_radius = TaoMax(max_radius, radius);
286: if (tau_max < 1.0) {
287: tau = tl->gamma3_i;
288: }
289: else if (tau_max > tl->gamma4_i) {
290: tau = tl->gamma4_i;
291: }
292: else if (tau_1 >= 1.0 && tau_1 <= tl->gamma4_i && tau_2 < 1.0) {
293: tau = tau_1;
294: }
295: else if (tau_2 >= 1.0 && tau_2 <= tl->gamma4_i && tau_1 < 1.0) {
296: tau = tau_2;
297: }
298: else {
299: tau = tau_max;
300: }
301: }
302: else if (fabs(kappa - 1.0) <= tl->mu2_i) {
303: // Good agreement
304: max_radius = TaoMax(max_radius, radius);
306: if (tau_max < tl->gamma2_i) {
307: tau = tl->gamma2_i;
308: }
309: else if (tau_max > tl->gamma3_i) {
310: tau = tl->gamma3_i;
311: }
312: else {
313: tau = tau_max;
314: }
315: }
316: else {
317: // Not good agreement
318: if (tau_min > 1.0) {
319: tau = tl->gamma2_i;
320: }
321: else if (tau_max < tl->gamma1_i) {
322: tau = tl->gamma1_i;
323: }
324: else if ((tau_min < tl->gamma1_i) && (tau_max >= 1.0)) {
325: tau = tl->gamma1_i;
326: }
327: else if ((tau_1 >= tl->gamma1_i) && (tau_1 < 1.0) &&
328: ((tau_2 < tl->gamma1_i) || (tau_2 >= 1.0))) {
329: tau = tau_1;
330: }
331: else if ((tau_2 >= tl->gamma1_i) && (tau_2 < 1.0) &&
332: ((tau_1 < tl->gamma1_i) || (tau_2 >= 1.0))) {
333: tau = tau_2;
334: }
335: else {
336: tau = tau_max;
337: }
338: }
339: }
340: radius = tau * radius;
341: }
342:
343: if (fmin < f) {
344: f = fmin;
345: info = X->Axpy(sigma, G); CHKERRQ(info);
346: info = TaoComputeGradient(tao, X, G); CHKERRQ(info);
347:
348: info = G->Norm2(&gnorm); CHKERRQ(info);
349: if (TaoInfOrNaN(f) || TaoInfOrNaN(gnorm)) {
350: SETERRQ(1, "User provided compute function generated Inf or NaN");
351: }
352: needH = 1;
353:
354: info = TaoMonitor(tao, iter, f, gnorm, 0.0, 1.0, &reason); CHKERRQ(info);
355: if (reason != TAO_CONTINUE_ITERATING) {
356: TaoFunctionReturn(0);
357: }
358: }
359: }
360: radius = TaoMax(radius, max_radius);
362: // Modify the radius if it is too large or small
363: radius = TaoMax(radius, tl->min_radius);
364: radius = TaoMin(radius, tl->max_radius);
365: break;
367: default:
368: // Norm of the first direction will initialize radius
369: radius = 0.0;
370: break;
371: }
373: // Set initial scaling for the BFGS preconditioner
374: // This step is done after computing the initial trust-region radius
375: // since the function value may have decreased
376: if (NTL_PC_BFGS == tl->pc_type) {
377: if (f != 0.0) {
378: delta = 2.0 * TaoAbsDouble(f) / (gnorm*gnorm);
379: }
380: else {
381: delta = 2.0 / (gnorm*gnorm);
382: }
383: info = M->SetDelta(delta); CHKERRQ(info);
384: }
386: // Set counter for gradient/reset steps
387: tl->trust = 0;
388: tl->newt = 0;
389: tl->bfgs = 0;
390: tl->sgrad = 0;
391: tl->grad = 0;
393: // Have not converged; continue with Newton method
394: while (reason == TAO_CONTINUE_ITERATING) {
395: ++iter;
397: // Compute the Hessian
398: if (needH) {
399: info = TaoComputeHessian(tao, X, H); CHKERRQ(info);
400: needH = 0;
401: }
403: if (NTL_PC_BFGS == tl->pc_type) {
404: if (BFGS_SCALE_AHESS == tl->bfgs_scale_type) {
405: // Obtain diagonal for the bfgs preconditioner
406: info = H->GetDiagonal(Diag); CHKERRQ(info);
407: info = Diag->AbsoluteValue(); CHKERRQ(info);
408: info = Diag->Reciprocal(); CHKERRQ(info);
409: info = M->SetScale(Diag); CHKERRQ(info);
410: }
412: // Update the limited memory preconditioner
413: info = M->Update(X, G); CHKERRQ(info);
414: ++bfgsUpdates;
415: }
417: // Solve the Newton system of equations
418: info = TaoPreLinearSolve(tao, H); CHKERRQ(info);
419: info = TaoLinearSolveTrustRegion(tao, H, G, D, radius, &success); CHKERRQ(info);
420: info = pls->GetNormDirection(&norm_d); CHKERRQ(info);
421: if (0.0 == radius) {
422: // Radius was uninitialized; use the norm of the direction
423: if (norm_d > 0.0) {
424: radius = norm_d;
426: // Modify the radius if it is too large or small
427: radius = TaoMax(radius, tl->min_radius);
428: radius = TaoMin(radius, tl->max_radius);
429: }
430: else {
431: // The direction was bad; set radius to default value and re-solve
432: // the trust-region subproblem to get a direction
433: info = TaoGetInitialTrustRegionRadius(tao, &radius); CHKERRQ(info);
435: // Modify the radius if it is too large or small
436: radius = TaoMax(radius, tl->min_radius);
437: radius = TaoMin(radius, tl->max_radius);
439: info = TaoLinearSolveTrustRegion(tao, H, G, D, radius, &success); CHKERRQ(info);
440: info = pls->GetNormDirection(&norm_d); CHKERRQ(info);
441: if (norm_d == 0.0) {
442: SETERRQ(1, "Initial direction zero");
443: }
444: }
445: }
446: info = D->Negate(); CHKERRQ(info);
448: info = KSPGetConvergedReason(pksp, &ksp_reason); CHKERRQ(info);
449: if ((KSP_DIVERGED_INDEFINITE_PC == ksp_reason) &&
450: (NTL_PC_BFGS == tl->pc_type) && (bfgsUpdates > 1)) {
451: // Preconditioner is numerically indefinite; reset the
452: // approximate if using BFGS preconditioning.
454: if (f != 0.0) {
455: delta = 2.0 * TaoAbsDouble(f) / (gnorm*gnorm);
456: }
457: else {
458: delta = 2.0 / (gnorm*gnorm);
459: }
460: info = M->SetDelta(delta); CHKERRQ(info);
461: info = M->Reset(); CHKERRQ(info);
462: info = M->Update(X, G); CHKERRQ(info);
463: bfgsUpdates = 1;
464: }
466: // Check trust-region reduction conditions
467: tr_reject = 0;
468: if (NTL_UPDATE_REDUCTION == tl->update_type) {
469: // Get predicted reduction
470: info = pls->GetObjFcn(&prered); CHKERRQ(info);
471: if (prered >= 0.0) {
472: // The predicted reduction has the wrong sign. This cannot
473: // happen in infinite precision arithmetic. Step should
474: // be rejected!
475: radius = tl->alpha1 * TaoMin(radius, norm_d);
476: tr_reject = 1;
477: }
478: else {
479: // Compute trial step and function value
480: info = W->Waxpby(1.0, X, 1.0, D); CHKERRQ(info);
481: info = TaoComputeFunction(tao, W, &ftrial); CHKERRQ(info);
482: if (TaoInfOrNaN(ftrial)) {
483: radius = tl->alpha1 * TaoMin(radius, norm_d);
484: tr_reject = 1;
485: }
486: else {
487: // Compute and actual reduction
488: actred = f - ftrial;
489: prered = -prered;
490: if ((fabs(actred) <= tl->epsilon) &&
491: (fabs(prered) <= tl->epsilon)) {
492: kappa = 1.0;
493: }
494: else {
495: kappa = actred / prered;
496: }
498: // Accept of reject the step and update radius
499: if (kappa < tl->eta1) {
500: // Reject the step
501: radius = tl->alpha1 * TaoMin(radius, norm_d);
502: tr_reject = 1;
503: }
504: else {
505: // Accept the step
506: if (kappa < tl->eta2) {
507: // Marginal bad step
508: radius = tl->alpha2 * TaoMin(radius, norm_d);
509: }
510: else if (kappa < tl->eta3) {
511: // Reasonable step
512: radius = tl->alpha3 * radius;
513: }
514: else if (kappa < tl->eta4) {
515: // Good step
516: radius = TaoMax(tl->alpha4 * norm_d, radius);
517: }
518: else {
519: // Very good step
520: radius = TaoMax(tl->alpha5 * norm_d, radius);
521: }
522: }
523: }
524: }
525: }
526: else {
527: // Get predicted reduction
528: info = pls->GetObjFcn(&prered); CHKERRQ(info);
530: if (prered >= 0.0) {
531: // The predicted reduction has the wrong sign. This cannot
532: // happen in infinite precision arithmetic. Step should
533: // be rejected!
534: radius = tl->gamma1 * TaoMin(radius, norm_d);
535: tr_reject = 1;
536: }
537: else {
538: info = W->Waxpby(1.0, X, 1.0, D); CHKERRQ(info);
539: info = TaoComputeFunction(tao, W, &ftrial); CHKERRQ(info);
540: if (TaoInfOrNaN(ftrial)) {
541: radius = tl->gamma1 * TaoMin(radius, norm_d);
542: tr_reject = 1;
543: }
544: else {
545: info = D->Dot(G, &gdx); CHKERRQ(info);
547: actred = f - ftrial;
548: prered = -prered;
549: if ((fabs(actred) <= tl->epsilon) &&
550: (fabs(prered) <= tl->epsilon)) {
551: kappa = 1.0;
552: }
553: else {
554: kappa = actred / prered;
555: }
557: tau_1 = tl->theta * gdx / (tl->theta * gdx - (1.0 - tl->theta) * prered + actred);
558: tau_2 = tl->theta * gdx / (tl->theta * gdx + (1.0 + tl->theta) * prered - actred);
559: tau_min = TaoMin(tau_1, tau_2);
560: tau_max = TaoMax(tau_1, tau_2);
562: if (kappa >= 1.0 - tl->mu1) {
563: // Great agreement; accept step and update radius
564: if (tau_max < 1.0) {
565: radius = TaoMax(radius, tl->gamma3 * norm_d);
566: }
567: else if (tau_max > tl->gamma4) {
568: radius = TaoMax(radius, tl->gamma4 * norm_d);
569: }
570: else {
571: radius = TaoMax(radius, tau_max * norm_d);
572: }
573: }
574: else if (kappa >= 1.0 - tl->mu2) {
575: // Good agreement
577: if (tau_max < tl->gamma2) {
578: radius = tl->gamma2 * TaoMin(radius, norm_d);
579: }
580: else if (tau_max > tl->gamma3) {
581: radius = TaoMax(radius, tl->gamma3 * norm_d);
582: } else if (tau_max < 1.0) {
583: radius = tau_max * TaoMin(radius, norm_d);
584: }
585: else {
586: radius = TaoMax(radius, tau_max * norm_d);
587: }
588: }
589: else {
590: // Not good agreement
591: if (tau_min > 1.0) {
592: radius = tl->gamma2 * TaoMin(radius, norm_d);
593: }
594: else if (tau_max < tl->gamma1) {
595: radius = tl->gamma1 * TaoMin(radius, norm_d);
596: }
597: else if ((tau_min < tl->gamma1) && (tau_max >= 1.0)) {
598: radius = tl->gamma1 * TaoMin(radius, norm_d);
599: }
600: else if ((tau_1 >= tl->gamma1) && (tau_1 < 1.0) &&
601: ((tau_2 < tl->gamma1) || (tau_2 >= 1.0))) {
602: radius = tau_1 * TaoMin(radius, norm_d);
603: }
604: else if ((tau_2 >= tl->gamma1) && (tau_2 < 1.0) &&
605: ((tau_1 < tl->gamma1) || (tau_2 >= 1.0))) {
606: radius = tau_2 * TaoMin(radius, norm_d);
607: }
608: else {
609: radius = tau_max * TaoMin(radius, norm_d);
610: }
611: tr_reject = 1;
612: }
613: }
614: }
615: }
617: if (tr_reject) {
618: // The trust-region constraints rejected the step. Apply a linesearch.
619: // Check for descent direction.
620: info = D->Dot(G, &gdx); CHKERRQ(info);
621: if ((gdx >= 0.0) || TaoInfOrNaN(gdx)) {
622: // Newton step is not descent or direction produced Inf or NaN
623:
624: if (NTL_PC_BFGS != tl->pc_type) {
625: // We don't have the bfgs matrix around and updated
626: // Must use gradient direction in this case
627: info = D->CopyFrom(G); CHKERRQ(info);
628: info = D->Negate(); CHKERRQ(info);
629:
630: ++tl->grad;
631: stepType = NTL_GRADIENT;
632: }
633: else {
634: // Attempt to use the BFGS direction
635: info = M->Solve(G, D, &success); CHKERRQ(info);
636: info = D->Negate(); CHKERRQ(info);
637:
638: // Check for success (descent direction)
639: info = D->Dot(G, &gdx); CHKERRQ(info);
640: if ((gdx >= 0) || TaoInfOrNaN(gdx)) {
641: // BFGS direction is not descent or direction produced not a number
642: // We can assert bfgsUpdates > 1 in this case because
643: // the first solve produces the scaled gradient direction,
644: // which is guaranteed to be descent
645:
646: // Use steepest descent direction (scaled)
647:
648: if (f != 0.0) {
649: delta = 2.0 * TaoAbsDouble(f) / (gnorm*gnorm);
650: }
651: else {
652: delta = 2.0 / (gnorm*gnorm);
653: }
654: info = M->SetDelta(delta); CHKERRQ(info);
655: info = M->Reset(); CHKERRQ(info);
656: info = M->Update(X, G); CHKERRQ(info);
657: info = M->Solve(G, D, &success); CHKERRQ(info);
658: info = D->Negate(); CHKERRQ(info);
659:
660: bfgsUpdates = 1;
661: ++tl->sgrad;
662: stepType = NTL_SCALED_GRADIENT;
663: }
664: else {
665: if (1 == bfgsUpdates) {
666: // The first BFGS direction is always the scaled gradient
667: ++tl->sgrad;
668: stepType = NTL_SCALED_GRADIENT;
669: }
670: else {
671: ++tl->bfgs;
672: stepType = NTL_BFGS;
673: }
674: }
675: }
676: }
677: else {
678: // Computed Newton step is descent
679: ++tl->newt;
680: stepType = NTL_NEWTON;
681: }
682:
683: // Perform the linesearch
684: fold = f;
685: info = Xold->CopyFrom(X); CHKERRQ(info);
686: info = Gold->CopyFrom(G); CHKERRQ(info);
688: step = 1.0;
689: info = TaoLineSearchApply(tao, X, G, D, W, &f, &f_full, &step, &status); CHKERRQ(info);
691: while (status && stepType != NTL_GRADIENT) {
692: // Linesearch failed
693: f = fold;
694: info = X->CopyFrom(Xold); CHKERRQ(info);
695: info = G->CopyFrom(Gold); CHKERRQ(info);
696:
697: switch(stepType) {
698: case NTL_NEWTON:
699: // Failed to obtain acceptable iterate with Newton step
701: if (NTL_PC_BFGS != tl->pc_type) {
702: // We don't have the bfgs matrix around and being updated
703: // Must use gradient direction in this case
704: info = D->CopyFrom(G); CHKERRQ(info);
705:
706: ++tl->grad;
707: stepType = NTL_GRADIENT;
708: }
709: else {
710: // Attempt to use the BFGS direction
711: info = M->Solve(G, D, &success); CHKERRQ(info);
712:
713: // Check for success (descent direction)
714: info = D->Dot(G, &gdx); CHKERRQ(info);
715: if ((gdx <= 0) || TaoInfOrNaN(gdx)) {
716: // BFGS direction is not descent or direction produced
717: // not a number. We can assert bfgsUpdates > 1 in this case
718: // Use steepest descent direction (scaled)
719:
720: if (f != 0.0) {
721: delta = 2.0 * TaoAbsDouble(f) / (gnorm*gnorm);
722: }
723: else {
724: delta = 2.0 / (gnorm*gnorm);
725: }
726: info = M->SetDelta(delta); CHKERRQ(info);
727: info = M->Reset(); CHKERRQ(info);
728: info = M->Update(X, G); CHKERRQ(info);
729: info = M->Solve(G, D, &success); CHKERRQ(info);
730:
731: bfgsUpdates = 1;
732: ++tl->sgrad;
733: stepType = NTL_SCALED_GRADIENT;
734: }
735: else {
736: if (1 == bfgsUpdates) {
737: // The first BFGS direction is always the scaled gradient
738: ++tl->sgrad;
739: stepType = NTL_SCALED_GRADIENT;
740: }
741: else {
742: ++tl->bfgs;
743: stepType = NTL_BFGS;
744: }
745: }
746: }
747: break;
749: case NTL_BFGS:
750: // Can only enter if pc_type == NTL_PC_BFGS
751: // Failed to obtain acceptable iterate with BFGS step
752: // Attempt to use the scaled gradient direction
753:
754: if (f != 0.0) {
755: delta = 2.0 * TaoAbsDouble(f) / (gnorm*gnorm);
756: }
757: else {
758: delta = 2.0 / (gnorm*gnorm);
759: }
760: info = M->SetDelta(delta); CHKERRQ(info);
761: info = M->Reset(); CHKERRQ(info);
762: info = M->Update(X, G); CHKERRQ(info);
763: info = M->Solve(G, D, &success); CHKERRQ(info);
764:
765: bfgsUpdates = 1;
766: ++tl->sgrad;
767: stepType = NTL_SCALED_GRADIENT;
768: break;
769:
770: case NTL_SCALED_GRADIENT:
771: // Can only enter if pc_type == NTL_PC_BFGS
772: // The scaled gradient step did not produce a new iterate;
773: // attemp to use the gradient direction.
774: // Need to make sure we are not using a different diagonal scaling
775: info = M->SetScale(0); CHKERRQ(info);
776: info = M->SetDelta(1.0); CHKERRQ(info);
777: info = M->Reset(); CHKERRQ(info);
778: info = M->Update(X, G); CHKERRQ(info);
779: info = M->Solve(G, D, &success); CHKERRQ(info);
780:
781: bfgsUpdates = 1;
782: ++tl->grad;
783: stepType = NTL_GRADIENT;
784: break;
785: }
786: info = D->Negate(); CHKERRQ(info);
788: // This may be incorrect; linesearch has values for stepmax and stepmin
789: // that should be reset.
790: step = 1.0;
791: info = TaoLineSearchApply(tao, X, G, D, W, &f, &f_full, &step, &status); CHKERRQ(info);
792: }
794: if (status) {
795: // Failed to find an improving point
796: f = fold;
797: info = X->CopyFrom(Xold); CHKERRQ(info);
798: info = G->CopyFrom(Gold); CHKERRQ(info);
799: radius = 0.0;
800: }
801: else if (stepType == NTL_NEWTON) {
802: if (step < tl->nu1) {
803: // Very bad step taken; reduce radius
804: radius = tl->omega1 * TaoMin(norm_d, radius);
805: }
806: else if (step < tl->nu2) {
807: // Reasonably bad step taken; reduce radius
808: radius = tl->omega2 * TaoMin(norm_d, radius);
809: }
810: else if (step < tl->nu3) {
811: // Reasonable step was taken; leave radius alone
812: if (tl->omega3 < 1.0) {
813: radius = tl->omega3 * TaoMin(norm_d, radius);
814: }
815: else if (tl->omega3 > 1.0) {
816: radius = TaoMax(tl->omega3 * norm_d, radius);
817: }
818: }
819: else if (step < tl->nu4) {
820: // Full step taken; increase the radius
821: radius = TaoMax(tl->omega4 * norm_d, radius);
822: }
823: else {
824: // More than full step taken; increase the radius
825: radius = TaoMax(tl->omega5 * norm_d, radius);
826: }
827: }
828: else {
829: // Newton step was not good; reduce the radius
830: radius = tl->omega1 * TaoMin(norm_d, radius);
831: }
832: }
833: else {
834: // Trust-region step is accepted
836: info = X->CopyFrom(W); CHKERRQ(info);
837: f = ftrial;
838: info = TaoComputeGradient(tao, X, G); CHKERRQ(info);
839: ++tl->trust;
840: }
842: // The radius may have been increased; modify if it is too large
843: radius = TaoMin(radius, tl->max_radius);
845: // Check for termination
846: info = G->Norm2(&gnorm); CHKERRQ(info);
847: if (TaoInfOrNaN(f) || TaoInfOrNaN(gnorm)) {
848: SETERRQ(1,"User provided compute function generated Not-a-Number");
849: }
850: needH = 1;
851:
852: info = TaoMonitor(tao, iter, f, gnorm, 0.0, radius, &reason); CHKERRQ(info);
853: }
854: TaoFunctionReturn(0);
855: }
857: /* ---------------------------------------------------------- */
860: static int TaoSetUp_NTL(TAO_SOLVER tao, void *solver)
861: {
862: TAO_NTL *tl = (TAO_NTL *)solver;
863: TaoVec *X;
864: TaoMat *H;
865: int info;
867: TaoFunctionBegin;
869: info = TaoGetSolution(tao, &X); CHKERRQ(info);
870: info = X->Clone(&tl->G); CHKERRQ(info);
871: info = X->Clone(&tl->D); CHKERRQ(info);
872: info = X->Clone(&tl->W); CHKERRQ(info);
874: info = X->Clone(&tl->Xold); CHKERRQ(info);
875: info = X->Clone(&tl->Gold); CHKERRQ(info);
877: tl->Diag = 0;
878: tl->M = 0;
880: info = TaoSetLagrangianGradientVector(tao, tl->G); CHKERRQ(info);
881: info = TaoSetStepDirectionVector(tao, tl->D); CHKERRQ(info);
883: // Set linear solver to default for symmetric matrices
884: info = TaoGetHessian(tao, &H); CHKERRQ(info);
885: info = TaoCreateLinearSolver(tao, H, 200, 0); CHKERRQ(info);
887: // Check sizes for compatability
888: info = TaoCheckFGH(tao); CHKERRQ(info);
889: TaoFunctionReturn(0);
890: }
892: /*------------------------------------------------------------*/
895: static int TaoDestroy_NTL(TAO_SOLVER tao, void *solver)
896: {
897: TAO_NTL *tl = (TAO_NTL *)solver;
898: int info;
900: TaoFunctionBegin;
901: info = TaoVecDestroy(tl->G); CHKERRQ(info);
902: info = TaoVecDestroy(tl->D); CHKERRQ(info);
903: info = TaoVecDestroy(tl->W); CHKERRQ(info);
905: info = TaoVecDestroy(tl->Xold); CHKERRQ(info);
906: info = TaoVecDestroy(tl->Gold); CHKERRQ(info);
908: info = TaoSetLagrangianGradientVector(tao, 0); CHKERRQ(info);
909: info = TaoSetStepDirectionVector(tao, 0); CHKERRQ(info);
911: if (tl->Diag) {
912: info = TaoVecDestroy(tl->Diag); CHKERRQ(info);
913: tl->Diag = 0;
914: }
916: if (tl->M) {
917: info = TaoMatDestroy(tl->M); CHKERRQ(info);
918: tl->M = 0;
919: }
920: TaoFunctionReturn(0);
921: }
923: /*------------------------------------------------------------*/
926: static int TaoSetOptions_NTL(TAO_SOLVER tao, void *solver)
927: {
928: TAO_NTL *tl = (TAO_NTL *)solver;
929: int info;
931: TaoFunctionBegin;
932: info = TaoOptionsHead("Newton line search method for unconstrained optimization"); CHKERRQ(info);
933: info = TaoOptionList("-tao_ntl_ksp_type", "ksp type", "", NTL_KSP, NTL_KSP_TYPES, NTL_KSP[tl->ksp_type], &tl->ksp_type, 0); CHKERRQ(info);
934: info = TaoOptionList("-tao_ntl_pc_type", "pc type", "", NTL_PC, NTL_PC_TYPES, NTL_PC[tl->pc_type], &tl->pc_type, 0); CHKERRQ(info);
935: info = TaoOptionList("-tao_ntl_bfgs_scale_type", "bfgs scale type", "", BFGS_SCALE, BFGS_SCALE_TYPES, BFGS_SCALE[tl->bfgs_scale_type], &tl->bfgs_scale_type, 0); CHKERRQ(info);
936: info = TaoOptionList("-tao_ntl_init_type", "radius initialization type", "", NTL_INIT, NTL_INIT_TYPES, NTL_INIT[tl->init_type], &tl->init_type, 0); CHKERRQ(info);
937: info = TaoOptionList("-tao_ntl_update_type", "radius update type", "", NTL_UPDATE, NTL_UPDATE_TYPES, NTL_UPDATE[tl->update_type], &tl->update_type, 0); CHKERRQ(info);
938: info = TaoOptionDouble("-tao_ntl_eta1", "poor steplength; reduce radius", "", tl->eta1, &tl->eta1, 0); CHKERRQ(info);
939: info = TaoOptionDouble("-tao_ntl_eta2", "reasonable steplength; leave radius alone", "", tl->eta2, &tl->eta2, 0); CHKERRQ(info);
940: info = TaoOptionDouble("-tao_ntl_eta3", "good steplength; increase radius", "", tl->eta3, &tl->eta3, 0); CHKERRQ(info);
941: info = TaoOptionDouble("-tao_ntl_eta4", "excellent steplength; greatly increase radius", "", tl->eta4, &tl->eta4, 0); CHKERRQ(info);
942: info = TaoOptionDouble("-tao_ntl_alpha1", "", "", tl->alpha1, &tl->alpha1, 0); CHKERRQ(info);
943: info = TaoOptionDouble("-tao_ntl_alpha2", "", "", tl->alpha2, &tl->alpha2, 0); CHKERRQ(info);
944: info = TaoOptionDouble("-tao_ntl_alpha3", "", "", tl->alpha3, &tl->alpha3, 0); CHKERRQ(info);
945: info = TaoOptionDouble("-tao_ntl_alpha4", "", "", tl->alpha4, &tl->alpha4, 0); CHKERRQ(info);
946: info = TaoOptionDouble("-tao_ntl_alpha5", "", "", tl->alpha5, &tl->alpha5, 0); CHKERRQ(info);
947: info = TaoOptionDouble("-tao_ntl_nu1", "poor steplength; reduce radius", "", tl->nu1, &tl->nu1, 0); CHKERRQ(info);
948: info = TaoOptionDouble("-tao_ntl_nu2", "reasonable steplength; leave radius alone", "", tl->nu2, &tl->nu2, 0); CHKERRQ(info);
949: info = TaoOptionDouble("-tao_ntl_nu3", "good steplength; increase radius", "", tl->nu3, &tl->nu3, 0); CHKERRQ(info);
950: info = TaoOptionDouble("-tao_ntl_nu4", "excellent steplength; greatly increase radius", "", tl->nu4, &tl->nu4, 0); CHKERRQ(info);
951: info = TaoOptionDouble("-tao_ntl_omega1", "", "", tl->omega1, &tl->omega1, 0); CHKERRQ(info);
952: info = TaoOptionDouble("-tao_ntl_omega2", "", "", tl->omega2, &tl->omega2, 0); CHKERRQ(info);
953: info = TaoOptionDouble("-tao_ntl_omega3", "", "", tl->omega3, &tl->omega3, 0); CHKERRQ(info);
954: info = TaoOptionDouble("-tao_ntl_omega4", "", "", tl->omega4, &tl->omega4, 0); CHKERRQ(info);
955: info = TaoOptionDouble("-tao_ntl_omega5", "", "", tl->omega5, &tl->omega5, 0); CHKERRQ(info);
956: info = TaoOptionDouble("-tao_ntl_mu1_i", "", "", tl->mu1_i, &tl->mu1_i, 0); CHKERRQ(info);
957: info = TaoOptionDouble("-tao_ntl_mu2_i", "", "", tl->mu2_i, &tl->mu2_i, 0); CHKERRQ(info);
958: info = TaoOptionDouble("-tao_ntl_gamma1_i", "", "", tl->gamma1_i, &tl->gamma1_i, 0); CHKERRQ(info);
959: info = TaoOptionDouble("-tao_ntl_gamma2_i", "", "", tl->gamma2_i, &tl->gamma2_i, 0); CHKERRQ(info);
960: info = TaoOptionDouble("-tao_ntl_gamma3_i", "", "", tl->gamma3_i, &tl->gamma3_i, 0); CHKERRQ(info);
961: info = TaoOptionDouble("-tao_ntl_gamma4_i", "", "", tl->gamma4_i, &tl->gamma4_i, 0); CHKERRQ(info);
962: info = TaoOptionDouble("-tao_ntl_theta_i", "", "", tl->theta_i, &tl->theta_i, 0); CHKERRQ(info);
963: info = TaoOptionDouble("-tao_ntl_mu1", "", "", tl->mu1, &tl->mu1, 0); CHKERRQ(info);
964: info = TaoOptionDouble("-tao_ntl_mu2", "", "", tl->mu2, &tl->mu2, 0); CHKERRQ(info);
965: info = TaoOptionDouble("-tao_ntl_gamma1", "", "", tl->gamma1, &tl->gamma1, 0); CHKERRQ(info);
966: info = TaoOptionDouble("-tao_ntl_gamma2", "", "", tl->gamma2, &tl->gamma2, 0); CHKERRQ(info);
967: info = TaoOptionDouble("-tao_ntl_gamma3", "", "", tl->gamma3, &tl->gamma3, 0); CHKERRQ(info);
968: info = TaoOptionDouble("-tao_ntl_gamma4", "", "", tl->gamma4, &tl->gamma4, 0); CHKERRQ(info);
969: info = TaoOptionDouble("-tao_ntl_theta", "", "", tl->theta, &tl->theta, 0); CHKERRQ(info);
970: info = TaoOptionDouble("-tao_ntl_min_radius", "lower bound on initial radius", "", tl->min_radius, &tl->min_radius, 0); CHKERRQ(info);
971: info = TaoOptionDouble("-tao_ntl_max_radius", "upper bound on radius", "", tl->max_radius, &tl->max_radius, 0); CHKERRQ(info);
972: info = TaoOptionDouble("-tao_ntl_epsilon", "tolerance used when computing actual and predicted reduction", "", tl->epsilon, &tl->epsilon, 0); CHKERRQ(info);
973: info = TaoLineSearchSetFromOptions(tao); CHKERRQ(info);
974: info = TaoOptionsTail(); CHKERRQ(info);
975: TaoFunctionReturn(0);
976: }
979: /*------------------------------------------------------------*/
982: static int TaoView_NTL(TAO_SOLVER tao,void* solver)
983: {
984: TAO_NTL *tl = (TAO_NTL *)solver;
985: int info;
987: TaoFunctionBegin;
988: if (NTL_PC_BFGS == tl->pc_type && tl->M) {
989: info = TaoPrintInt(tao, " Rejected matrix updates: %d\n", tl->M->GetRejects()); CHKERRQ(info);
990: }
991: info = TaoPrintInt(tao, " Trust-region steps: %d\n", tl->trust); CHKERRQ(info);
992: info = TaoPrintInt(tao, " Newton search steps: %d\n", tl->newt); CHKERRQ(info);
993: info = TaoPrintInt(tao, " BFGS search steps: %d\n", tl->bfgs); CHKERRQ(info);
994: info = TaoPrintInt(tao, " Scaled gradient search steps: %d\n", tl->sgrad); CHKERRQ(info);
995: info = TaoPrintInt(tao, " Gradient search steps: %d\n", tl->grad); CHKERRQ(info);
996: info = TaoLineSearchView(tao); CHKERRQ(info);
997: TaoFunctionReturn(0);
998: }
1000: /* ---------------------------------------------------------- */
1004: int TaoCreate_NTL(TAO_SOLVER tao)
1005: {
1006: TAO_NTL *tl;
1007: int info;
1009: TaoFunctionBegin;
1011: info = TaoNew(TAO_NTL, &tl); CHKERRQ(info);
1012: info = PetscLogObjectMemory(tao, sizeof(TAO_NTL)); CHKERRQ(info);
1014: info = TaoSetTaoSolveRoutine(tao, TaoSolve_NTL, (void *)tl); CHKERRQ(info);
1015: info = TaoSetTaoSetUpDownRoutines(tao, TaoSetUp_NTL, TaoDestroy_NTL); CHKERRQ(info);
1016: info = TaoSetTaoOptionsRoutine(tao, TaoSetOptions_NTL); CHKERRQ(info);
1017: info = TaoSetTaoViewRoutine(tao, TaoView_NTL); CHKERRQ(info);
1019: info = TaoSetMaximumIterates(tao, 50); CHKERRQ(info);
1020: info = TaoSetTolerances(tao, 1e-10, 1e-10, 0, 0); CHKERRQ(info);
1022: info = TaoSetTrustRegionRadius(tao, 100.0); CHKERRQ(info);
1023: info = TaoSetTrustRegionTolerance(tao, 1.0e-12); CHKERRQ(info);
1025: // Default values for trust-region radius update based on steplength
1026: tl->nu1 = 0.25;
1027: tl->nu2 = 0.50;
1028: tl->nu3 = 1.00;
1029: tl->nu4 = 1.25;
1031: tl->omega1 = 0.25;
1032: tl->omega2 = 0.50;
1033: tl->omega3 = 1.00;
1034: tl->omega4 = 2.00;
1035: tl->omega5 = 4.00;
1037: // Default values for trust-region radius update based on reduction
1038: tl->eta1 = 1.0e-4;
1039: tl->eta2 = 0.25;
1040: tl->eta3 = 0.50;
1041: tl->eta4 = 0.90;
1043: tl->alpha1 = 0.25;
1044: tl->alpha2 = 0.50;
1045: tl->alpha3 = 1.00;
1046: tl->alpha4 = 2.00;
1047: tl->alpha5 = 4.00;
1049: // Default values for trust-region radius update based on interpolation
1050: tl->mu1 = 0.10;
1051: tl->mu2 = 0.50;
1053: tl->gamma1 = 0.25;
1054: tl->gamma2 = 0.50;
1055: tl->gamma3 = 2.00;
1056: tl->gamma4 = 4.00;
1058: tl->theta = 0.05;
1060: // Default values for trust region initialization based on interpolation
1061: tl->mu1_i = 0.35;
1062: tl->mu2_i = 0.50;
1064: tl->gamma1_i = 0.0625;
1065: tl->gamma2_i = 0.5;
1066: tl->gamma3_i = 2.0;
1067: tl->gamma4_i = 5.0;
1068:
1069: tl->theta_i = 0.25;
1071: // Remaining parameters
1072: tl->min_radius = 1.0e-10;
1073: tl->max_radius = 1.0e10;
1074: tl->epsilon = 1.0e-6;
1076: tl->ksp_type = NTL_KSP_STCG;
1077: tl->pc_type = NTL_PC_BFGS;
1078: tl->bfgs_scale_type = BFGS_SCALE_AHESS;
1079: tl->init_type = NTL_INIT_INTERPOLATION;
1080: tl->update_type = NTL_UPDATE_REDUCTION;
1082: info = TaoCreateMoreThuenteLineSearch(tao, 0, 0); CHKERRQ(info);
1083: TaoFunctionReturn(0);
1084: }
1087: #endif