Actual source code: cg.c
1: /*$Id$*/
3: #include "cg.h"
5: #define CG_FletcherReeves 0
6: #define CG_PolakRibiere 1
7: #define CG_PolakRibierePlus 2
8: #define CG_HestenesStiefel 3
9: #define CG_DaiYuan 4
10: #define CG_Types 5
12: static const char *CG_Table[64] = {
13: "fr", "pr", "prp", "hs", "dy"
14: };
16: #define TAO_ZER_SAFEGUARD 1e-8
17: #define TAO_INF_SAFEGUARD 1e+8
21: static int TaoSolve_CG(TAO_SOLVER tao, void *solver)
22: {
23: TAO_CG *cg = (TAO_CG *)solver;
24: TaoVec *X, *Xm1 = cg->X2;
25: TaoVec *G = cg->G1, *Gm1 = cg->G2;
26: TaoVec *D = cg->D, *W = cg->W;
28: TaoTerminateReason reason;
30: double f, f_full, fm1, gnorm, gnorm2, gnorm2m1, ginner, gd, gm1d;
31: double beta, delta, step = 1.0;
33: int info = 0;
34: TaoInt status = 0, iter=0;
36: TaoFunctionBegin;
38: // Get vectors we will need
39: info = TaoGetSolution(tao, &X); CHKERRQ(info);
41: // Check convergence criteria
42: info = TaoComputeFunctionGradient(tao, X, &f, G); CHKERRQ(info);
43: info = G->Norm2(&gnorm); CHKERRQ(info);
44: if (TaoInfOrNaN(f) || TaoInfOrNaN(gnorm)) {
45: SETERRQ(1, "User provided compute function generated Inf or NaN");
46: }
48: info = TaoMonitor(tao, iter, f, gnorm, 0.0, step, &reason); CHKERRQ(info);
49: if (reason != TAO_CONTINUE_ITERATING) {
50: TaoFunctionReturn(0);
51: }
53: // Have not converged; initialize variables
54: info = D->ScaleCopyFrom(-1.0, G); CHKERRQ(info);
55: gnorm2 = gnorm*gnorm;
57: // Set initial scaling for the function
58: if (f != 0.0) {
59: delta = 2.0 * TaoAbsDouble(f) / gnorm2;
60: delta = TaoMax(delta, cg->delta_min);
61: delta = TaoMin(delta, cg->delta_max);
62: }
63: else {
64: delta = 2.0 / gnorm2;
65: delta = TaoMax(delta, cg->delta_min);
66: delta = TaoMin(delta, cg->delta_max);
67: }
69: // Set counter for gradient/reset steps
70: cg->grad = 0;
71: cg->reset = 0;
73: while (1) {
74: // Save the current gradient information
75: fm1 = f;
76: gnorm2m1 = gnorm2;
77: info = Xm1->CopyFrom(X); CHKERRQ(info);
78: info = Gm1->CopyFrom(G); CHKERRQ(info);
80: info = D->Dot(G, &gd); CHKERRQ(info);
81: if ((gd >= 0) || TaoInfOrNaN(gd)) {
82: // Step is not descent or direction generated not a number
83: // Use steepest descent direction
84: ++cg->grad;
86: if (f != 0.0) {
87: delta = 2.0 * TaoAbsDouble(f) / gnorm2;
88: delta = TaoMax(delta, cg->delta_min);
89: delta = TaoMin(delta, cg->delta_max);
90: }
91: else {
92: delta = 2.0 / gnorm2;
93: delta = TaoMax(delta, cg->delta_min);
94: delta = TaoMin(delta, cg->delta_max);
95: }
97: info = D->ScaleCopyFrom(-1.0, G); CHKERRQ(info);
99: // Gradient step cannot include not a number; this test is not needed.
100: // info = D->Norm2(&dnorm); CHKERRQ(info);
101: // if (TaoInfOrNaN(dnorm)) {
102: // SETERRQ(1, "Direction generated Not-a-Number");
103: // }
104: }
106: // Search direction for improving point
107: step = delta;
108: info = TaoLineSearchApply(tao, X, G, D, W, &f, &f_full, &step, &status); CHKERRQ(info);
110: if (status) {
111: // Linesearch failed
112: // Reset factors and use scaled gradient step
113: ++cg->reset;
115: f = fm1;
116: gnorm2 = gnorm2m1;
117: info = X->CopyFrom(Xm1); CHKERRQ(info);
118: info = G->CopyFrom(Gm1); CHKERRQ(info);
120: if (f != 0.0) {
121: delta = 2.0 * TaoAbsDouble(f) / gnorm2;
122: delta = TaoMax(delta, cg->delta_min);
123: delta = TaoMin(delta, cg->delta_max);
124: }
125: else {
126: delta = 2.0 / gnorm2;
127: delta = TaoMax(delta, cg->delta_min);
128: delta = TaoMin(delta, cg->delta_max);
129: }
131: info = D->ScaleCopyFrom(-1.0, G); CHKERRQ(info);
133: // Gradient step cannot include not a number; this test is not needed.
134: // info = D->Norm2(&dnorm); CHKERRQ(info);
135: // if (TaoInfOrNaN(dnorm)) {
136: // SETERRQ(1, "Direction generated Not-a-Number");
137: // }
139: // This may be incorrect; linesearch has values for stepmax and stepmin
140: // that should be reset.
141: step = delta;
142: info = TaoLineSearchApply(tao, X, G, D, W, &f, &f_full, &step, &status); CHKERRQ(info);
144: if (status) {
145: // Linesearch failed,
146: // Switch to unscaled gradient
148: f = fm1;
149: gnorm2 = gnorm2m1;
150: info = X->CopyFrom(Xm1); CHKERRQ(info);
151: info = G->CopyFrom(Gm1); CHKERRQ(info);
153: delta = 1.0;
155: info = D->ScaleCopyFrom(-1.0, G); CHKERRQ(info);
157: // Gradient step cannot include not a number; this test is not needed.
158: // info = D->Norm2(&dnorm); CHKERRQ(info);
159: // if (TaoInfOrNaN(dnorm)) {
160: // SETERRQ(1, "Direction generated Not-a-Number");
161: // }
163: // This may be incorrect; linesearch has values for stepmax and stepmin
164: // that should be reset.
165: step = delta;
166: info = TaoLineSearchApply(tao, X, G, D, W, &f, &f_full, &step, &status); CHKERRQ(info);
167: if (status) {
168: // Steepest descent direction did not produce a new value
169: // Stop here
171: f = fm1;
172: gnorm2 = gnorm2m1;
173: info = X->CopyFrom(Xm1); CHKERRQ(info);
174: info = G->CopyFrom(Gm1); CHKERRQ(info);
175: step = 0.0;
176: }
177: }
178: }
180: // Check for termination
181: info = G->Norm2(&gnorm); CHKERRQ(info);
182: if (TaoInfOrNaN(f) || TaoInfOrNaN(gnorm)) {
183: SETERRQ(1, "User provided compute function generated Inf or NaN");
184: }
185: gnorm2 = gnorm*gnorm;
187: // Check for termination
188: info = TaoMonitor(tao, ++iter, f, gnorm, 0.0, step, &reason); CHKERRQ(info);
189: if (reason != TAO_CONTINUE_ITERATING) {
190: break;
191: }
193: // Check for restart condition
194: info = G->Dot(Gm1, &ginner); CHKERRQ(info);
195: if (fabs(ginner) >= cg->eta * gnorm2) {
196: // Gradients far from orthogonal; use steepest descent direction
197: beta = 0.0;
198: }
199: else {
200: // Gradients close to orthogonal; use conjugate gradient formula
202: switch(cg->cg_type) {
203: case CG_FletcherReeves:
204: beta = gnorm2 / gnorm2m1;
205: break;
207: case CG_PolakRibiere:
208: beta = (gnorm2 - ginner) / gnorm2m1;
209: break;
211: case CG_PolakRibierePlus:
212: beta = TaoMax((gnorm2 - ginner) / gnorm2m1, 0.0);
213: break;
215: case CG_HestenesStiefel:
216: info = G->Dot(D, &gd); CHKERRQ(info);
217: info = Gm1->Dot(D, &gm1d); CHKERRQ(info);
218: beta = (gnorm2 - ginner) / (gd - gm1d);
219: break;
221: case CG_DaiYuan:
222: info = G->Dot(D, &gd); CHKERRQ(info);
223: info = Gm1->Dot(D, &gm1d); CHKERRQ(info);
224: beta = gnorm2 / (gd - gm1d);
225: break;
227: default:
228: beta = 0.0;
229: break;
230: }
231: }
233: // Compute the direction
234: info = D->Axpby(-1.0, G, beta); CHKERRQ(info);
236: // Update initial steplength choice
237: delta = 1.0;
238: delta = TaoMax(delta, cg->delta_min);
239: delta = TaoMin(delta, cg->delta_max);
240: }
241: TaoFunctionReturn(0);
242: }
244: /* ---------------------------------------------------------- */
247: static int TaoSetUp_CG(TAO_SOLVER tao, void *solver)
248: {
249: TAO_CG *cg = (TAO_CG *)solver;
250: TaoVec *X;
251: int info;
253: TaoFunctionBegin;
254:
255: info = TaoGetSolution(tao, &X); CHKERRQ(info);
256: info = X->Clone(&cg->X2); CHKERRQ(info);
257: info = X->Clone(&cg->G1); CHKERRQ(info);
258: info = X->Clone(&cg->G2); CHKERRQ(info);
259: info = X->Clone(&cg->D); CHKERRQ(info);
260: info = X->Clone(&cg->W); CHKERRQ(info);
262: info = TaoSetLagrangianGradientVector(tao, cg->G1); CHKERRQ(info);
263: info = TaoSetStepDirectionVector(tao, cg->D); CHKERRQ(info);
265: info = TaoCheckFG(tao); CHKERRQ(info);
266: TaoFunctionReturn(0);
267: }
269: /* ---------------------------------------------------------- */
272: static int TaoDestroy_CG(TAO_SOLVER tao, void *solver)
273: {
274: TAO_CG *cg = (TAO_CG *)solver;
275: int info;
277: TaoFunctionBegin;
279: info = TaoVecDestroy(cg->X2); CHKERRQ(info);
280: info = TaoVecDestroy(cg->G1); CHKERRQ(info);
281: info = TaoVecDestroy(cg->G2); CHKERRQ(info);
282: info = TaoVecDestroy(cg->D); CHKERRQ(info);
283: info = TaoVecDestroy(cg->W); CHKERRQ(info);
285: info = TaoSetLagrangianGradientVector(tao, 0); CHKERRQ(info);
286: info = TaoSetStepDirectionVector(tao, 0); CHKERRQ(info);
288: TaoFunctionReturn(0);
289: }
291: /*------------------------------------------------------------*/
294: static int TaoSetOptions_CG(TAO_SOLVER tao, void *solver)
295: {
296: TAO_CG *cg = (TAO_CG *)solver;
297: int info;
299: TaoFunctionBegin;
300: info = TaoOptionsHead("Nonlinear Conjugate Gradient method for unconstrained optimization"); CHKERRQ(info);
302: info = TaoOptionDouble("-tao_cg_eta", "restart tolerance", "", cg->eta, &cg->eta, 0); CHKERRQ(info);
303: info = TaoOptionList("-tao_cg_type", "cg formula", "", CG_Table, CG_Types, CG_Table[cg->cg_type], &cg->cg_type, 0); CHKERRQ(info);
304: info = TaoOptionDouble("-tao_cg_delta_min", "minimum delta value", "", cg->delta_min, &cg->delta_min, 0); CHKERRQ(info);
305: info = TaoOptionDouble("-tao_cg_delta_max", "maximum delta value", "", cg->delta_max, &cg->delta_max, 0); CHKERRQ(info);
307: info = TaoLineSearchSetFromOptions(tao); CHKERRQ(info);
308: info = TaoOptionsTail(); CHKERRQ(info);
309: TaoFunctionReturn(0);
310: }
312: /*------------------------------------------------------------*/
315: static int TaoView_CG(TAO_SOLVER tao, void *solver)
316: {
317: TAO_CG *cg = (TAO_CG *)solver;
318: int info;
320: TaoFunctionBegin;
321: info = TaoPrintInt(tao, " CG Type: %d\n", cg->cg_type); CHKERRQ(info);
322: info = TaoPrintInt(tao, " Gradient steps: %d\n", cg->grad); CHKERRQ(info);
323: info = TaoPrintInt(tao, " Reset steps: %d\n", cg->reset); CHKERRQ(info);
324: info = TaoLineSearchView(tao); CHKERRQ(info);
325: TaoFunctionReturn(0);
326: }
328: /*------------------------------------------------------------*/
332: int TaoCreate_CG(TAO_SOLVER tao)
333: {
334: TAO_CG *cg;
335: int info;
337: TaoFunctionBegin;
339: info = TaoNew(TAO_CG, &cg); CHKERRQ(info);
340: info = PetscLogObjectMemory(tao, sizeof(TAO_CG)); CHKERRQ(info);
342: info=TaoSetTaoSolveRoutine(tao, TaoSolve_CG, (void *)cg); CHKERRQ(info);
343: info=TaoSetTaoSetUpDownRoutines(tao, TaoSetUp_CG, TaoDestroy_CG); CHKERRQ(info);
344: info=TaoSetTaoOptionsRoutine(tao, TaoSetOptions_CG); CHKERRQ(info);
345: info=TaoSetTaoViewRoutine(tao, TaoView_CG); CHKERRQ(info);
347: info = TaoSetMaximumIterates(tao, 2000); CHKERRQ(info);
348: info = TaoSetMaximumFunctionEvaluations(tao, 4000); CHKERRQ(info);
349: info = TaoSetTolerances(tao, 1e-4, 1e-4, 0, 0); CHKERRQ(info);
351: cg->eta = 0.1;
352: cg->delta_min = 1e-7;
353: cg->delta_max = 100;
355: cg->cg_type = CG_PolakRibierePlus;
357: // Note: nondefault values should be used for nonlinear conjugate gradient
358: // method. In particular, gtol should be less that 0.5; the value used in
359: // Nocedal and Wright is 0.10. We use the default values for the
360: // linesearch because it seems to work better.
361: info = TaoCreateMoreThuenteLineSearch(tao, 0, 0); CHKERRQ(info);
362: TaoFunctionReturn(0);
363: }