Actual source code: cg.c

  1: /*$Id$*/

  3: #include "cg.h"

  5: #define CG_FletcherReeves       0
  6: #define CG_PolakRibiere         1
  7: #define CG_PolakRibierePlus     2
  8: #define CG_HestenesStiefel      3
  9: #define CG_DaiYuan              4
 10: #define CG_Types                5

 12: static const char *CG_Table[64] = {
 13:   "fr", "pr", "prp", "hs", "dy"
 14: };

 16: #define TAO_ZER_SAFEGUARD        1e-8
 17: #define TAO_INF_SAFEGUARD        1e+8

 21: static int TaoSolve_CG(TAO_SOLVER tao, void *solver)
 22: {
 23:   TAO_CG *cg = (TAO_CG *)solver;
 24:   TaoVec *X, *Xm1 = cg->X2;
 25:   TaoVec *G = cg->G1, *Gm1 = cg->G2;
 26:   TaoVec *D = cg->D, *W = cg->W;

 28:   TaoTerminateReason reason;

 30:   double f, f_full, fm1, gnorm, gnorm2, gnorm2m1, ginner, gd, gm1d;
 31:   double beta, delta, step = 1.0;

 33:   int info = 0;
 34:   TaoInt status = 0, iter=0;

 36:   TaoFunctionBegin;

 38:   // Get vectors we will need
 39:   info = TaoGetSolution(tao, &X); CHKERRQ(info);

 41:   // Check convergence criteria
 42:   info = TaoComputeFunctionGradient(tao, X, &f, G); CHKERRQ(info);
 43:   info = G->Norm2(&gnorm); CHKERRQ(info);
 44:   if (TaoInfOrNaN(f) || TaoInfOrNaN(gnorm)) {
 45:     SETERRQ(1, "User provided compute function generated Inf or NaN");
 46:   }

 48:   info = TaoMonitor(tao, iter, f, gnorm, 0.0, step, &reason); CHKERRQ(info);
 49:   if (reason != TAO_CONTINUE_ITERATING) {
 50:     TaoFunctionReturn(0);
 51:   }

 53:   // Have not converged; initialize variables
 54:   info = D->ScaleCopyFrom(-1.0, G); CHKERRQ(info);
 55:   gnorm2 = gnorm*gnorm;

 57:   // Set initial scaling for the function
 58:   if (f != 0.0) {
 59:     delta = 2.0 * TaoAbsDouble(f) / gnorm2;
 60:     delta = TaoMax(delta, cg->delta_min);
 61:     delta = TaoMin(delta, cg->delta_max);
 62:   }
 63:   else {
 64:     delta = 2.0 / gnorm2;
 65:     delta = TaoMax(delta, cg->delta_min);
 66:     delta = TaoMin(delta, cg->delta_max);
 67:   }

 69:   // Set counter for gradient/reset steps
 70:   cg->grad = 0;
 71:   cg->reset = 0;

 73:   while (1) {
 74:     // Save the current gradient information
 75:     fm1 = f;
 76:     gnorm2m1 = gnorm2;
 77:     info = Xm1->CopyFrom(X); CHKERRQ(info);
 78:     info = Gm1->CopyFrom(G); CHKERRQ(info);

 80:     info = D->Dot(G, &gd); CHKERRQ(info);
 81:     if ((gd >= 0) || TaoInfOrNaN(gd)) {
 82:       // Step is not descent or direction generated not a number
 83:       // Use steepest descent direction
 84:       ++cg->grad;

 86:       if (f != 0.0) {
 87:         delta = 2.0 * TaoAbsDouble(f) / gnorm2;
 88:         delta = TaoMax(delta, cg->delta_min);
 89:         delta = TaoMin(delta, cg->delta_max);
 90:       }
 91:       else {
 92:         delta = 2.0 / gnorm2;
 93:         delta = TaoMax(delta, cg->delta_min);
 94:         delta = TaoMin(delta, cg->delta_max);
 95:       }

 97:       info = D->ScaleCopyFrom(-1.0, G); CHKERRQ(info);

 99:       // Gradient step cannot include not a number; this test is not needed.
100:       // info = D->Norm2(&dnorm); CHKERRQ(info);
101:       // if (TaoInfOrNaN(dnorm)) {
102:       //   SETERRQ(1, "Direction generated Not-a-Number");
103:       // }
104:     }

106:     // Search direction for improving point
107:     step = delta;
108:     info = TaoLineSearchApply(tao, X, G, D, W, &f, &f_full, &step, &status); CHKERRQ(info);

110:     if (status) {
111:       // Linesearch failed
112:       // Reset factors and use scaled gradient step
113:       ++cg->reset;

115:       f = fm1;
116:       gnorm2 = gnorm2m1;
117:       info = X->CopyFrom(Xm1); CHKERRQ(info);
118:       info = G->CopyFrom(Gm1); CHKERRQ(info);

120:       if (f != 0.0) {
121:         delta = 2.0 * TaoAbsDouble(f) / gnorm2;
122:         delta = TaoMax(delta, cg->delta_min);
123:         delta = TaoMin(delta, cg->delta_max);
124:       }
125:       else {
126:         delta = 2.0 / gnorm2;
127:         delta = TaoMax(delta, cg->delta_min);
128:         delta = TaoMin(delta, cg->delta_max);
129:       }

131:       info = D->ScaleCopyFrom(-1.0, G); CHKERRQ(info);

133:       // Gradient step cannot include not a number; this test is not needed.
134:       // info = D->Norm2(&dnorm); CHKERRQ(info);
135:       // if (TaoInfOrNaN(dnorm)) {
136:       //   SETERRQ(1, "Direction generated Not-a-Number");
137:       // }

139:       // This may be incorrect; linesearch has values for stepmax and stepmin
140:       // that should be reset.
141:       step = delta;
142:       info = TaoLineSearchApply(tao, X, G, D, W, &f, &f_full, &step, &status); CHKERRQ(info);

144:       if (status) {
145:         // Linesearch failed,
146:         // Switch to unscaled gradient

148:         f = fm1;
149:         gnorm2 = gnorm2m1;
150:         info = X->CopyFrom(Xm1); CHKERRQ(info);
151:         info = G->CopyFrom(Gm1); CHKERRQ(info);

153:         delta = 1.0;

155:         info = D->ScaleCopyFrom(-1.0, G); CHKERRQ(info);

157:         // Gradient step cannot include not a number; this test is not needed.
158:         // info = D->Norm2(&dnorm); CHKERRQ(info);
159:         // if (TaoInfOrNaN(dnorm)) {
160:         //   SETERRQ(1, "Direction generated Not-a-Number");
161:         // }

163:         // This may be incorrect; linesearch has values for stepmax and stepmin
164:         // that should be reset.
165:         step = delta;
166:         info = TaoLineSearchApply(tao, X, G, D, W, &f, &f_full, &step, &status); CHKERRQ(info);
167:         if (status) {
168:           // Steepest descent direction did not produce a new value
169:           // Stop here

171:           f = fm1;
172:           gnorm2 = gnorm2m1;
173:           info = X->CopyFrom(Xm1); CHKERRQ(info);
174:           info = G->CopyFrom(Gm1); CHKERRQ(info);
175:           step = 0.0;
176:         }
177:       }
178:     }

180:     // Check for termination
181:     info = G->Norm2(&gnorm); CHKERRQ(info);
182:     if (TaoInfOrNaN(f) || TaoInfOrNaN(gnorm)) {
183:       SETERRQ(1, "User provided compute function generated Inf or NaN");
184:     }
185:     gnorm2 = gnorm*gnorm;

187:     // Check for termination
188:     info = TaoMonitor(tao, ++iter, f, gnorm, 0.0, step, &reason); CHKERRQ(info);
189:     if (reason != TAO_CONTINUE_ITERATING) {
190:       break;
191:     }

193:     // Check for restart condition
194:     info = G->Dot(Gm1, &ginner); CHKERRQ(info);
195:     if (fabs(ginner) >= cg->eta * gnorm2) {
196:       // Gradients far from orthogonal; use steepest descent direction
197:       beta = 0.0;
198:     }
199:     else {
200:       // Gradients close to orthogonal; use conjugate gradient formula

202:       switch(cg->cg_type) {
203:       case CG_FletcherReeves:
204:         beta = gnorm2 / gnorm2m1;
205:         break;

207:       case CG_PolakRibiere:
208:         beta = (gnorm2 - ginner) / gnorm2m1;
209:         break;

211:       case CG_PolakRibierePlus:
212:         beta = TaoMax((gnorm2 - ginner) / gnorm2m1, 0.0);
213:         break;

215:       case CG_HestenesStiefel:
216:         info = G->Dot(D, &gd); CHKERRQ(info);
217:         info = Gm1->Dot(D, &gm1d); CHKERRQ(info);
218:         beta = (gnorm2 - ginner) / (gd - gm1d); 
219:         break;

221:       case CG_DaiYuan:
222:         info = G->Dot(D, &gd); CHKERRQ(info);
223:         info = Gm1->Dot(D, &gm1d); CHKERRQ(info);
224:         beta = gnorm2 / (gd - gm1d); 
225:         break;

227:       default:
228:         beta = 0.0;
229:         break;
230:       }
231:     }

233:     // Compute the direction
234:     info = D->Axpby(-1.0, G, beta); CHKERRQ(info);

236:     // Update initial steplength choice
237:     delta = 1.0;
238:     delta = TaoMax(delta, cg->delta_min);
239:     delta = TaoMin(delta, cg->delta_max);
240:   }
241:   TaoFunctionReturn(0);
242: }

244: /* ---------------------------------------------------------- */
247: static int TaoSetUp_CG(TAO_SOLVER tao, void *solver)
248: {
249:   TAO_CG *cg = (TAO_CG *)solver;
250:   TaoVec *X;
251:   int info;

253:   TaoFunctionBegin;
254:   
255:   info = TaoGetSolution(tao, &X); CHKERRQ(info);
256:   info = X->Clone(&cg->X2); CHKERRQ(info);
257:   info = X->Clone(&cg->G1); CHKERRQ(info);
258:   info = X->Clone(&cg->G2); CHKERRQ(info);
259:   info = X->Clone(&cg->D); CHKERRQ(info);
260:   info = X->Clone(&cg->W); CHKERRQ(info);

262:   info = TaoSetLagrangianGradientVector(tao, cg->G1); CHKERRQ(info);
263:   info = TaoSetStepDirectionVector(tao, cg->D); CHKERRQ(info);

265:   info = TaoCheckFG(tao); CHKERRQ(info);
266:   TaoFunctionReturn(0);
267: }

269: /* ---------------------------------------------------------- */
272: static int TaoDestroy_CG(TAO_SOLVER tao, void *solver)
273: {
274:   TAO_CG *cg = (TAO_CG *)solver;
275:   int info;

277:   TaoFunctionBegin;

279:   info = TaoVecDestroy(cg->X2); CHKERRQ(info);
280:   info = TaoVecDestroy(cg->G1); CHKERRQ(info);
281:   info = TaoVecDestroy(cg->G2); CHKERRQ(info);
282:   info = TaoVecDestroy(cg->D); CHKERRQ(info);
283:   info = TaoVecDestroy(cg->W); CHKERRQ(info);

285:   info = TaoSetLagrangianGradientVector(tao, 0); CHKERRQ(info);
286:   info = TaoSetStepDirectionVector(tao, 0); CHKERRQ(info);

288:   TaoFunctionReturn(0);
289: }

291: /*------------------------------------------------------------*/
294: static int TaoSetOptions_CG(TAO_SOLVER tao, void *solver)
295: {
296:   TAO_CG *cg = (TAO_CG *)solver;
297:   int info;

299:   TaoFunctionBegin;
300:   info = TaoOptionsHead("Nonlinear Conjugate Gradient method for unconstrained optimization"); CHKERRQ(info);

302:   info = TaoOptionDouble("-tao_cg_eta", "restart tolerance", "", cg->eta, &cg->eta, 0); CHKERRQ(info);
303:   info = TaoOptionList("-tao_cg_type", "cg formula", "", CG_Table, CG_Types, CG_Table[cg->cg_type], &cg->cg_type, 0); CHKERRQ(info);
304:   info = TaoOptionDouble("-tao_cg_delta_min", "minimum delta value", "", cg->delta_min, &cg->delta_min, 0); CHKERRQ(info);
305:   info = TaoOptionDouble("-tao_cg_delta_max", "maximum delta value", "", cg->delta_max, &cg->delta_max, 0); CHKERRQ(info);

307:   info = TaoLineSearchSetFromOptions(tao); CHKERRQ(info);
308:   info = TaoOptionsTail(); CHKERRQ(info);
309:   TaoFunctionReturn(0);
310: }

312: /*------------------------------------------------------------*/
315: static int TaoView_CG(TAO_SOLVER tao, void *solver)
316: {
317:   TAO_CG   *cg = (TAO_CG *)solver;
318:   int      info;

320:   TaoFunctionBegin;
321:   info = TaoPrintInt(tao, "  CG Type: %d\n", cg->cg_type); CHKERRQ(info);
322:   info = TaoPrintInt(tao, "  Gradient steps: %d\n", cg->grad); CHKERRQ(info);
323:   info = TaoPrintInt(tao, "  Reset steps: %d\n", cg->reset); CHKERRQ(info);
324:   info = TaoLineSearchView(tao); CHKERRQ(info);
325:   TaoFunctionReturn(0);
326: }

328: /*------------------------------------------------------------*/
332: int TaoCreate_CG(TAO_SOLVER tao)
333: {
334:   TAO_CG *cg;
335:   int info;

337:   TaoFunctionBegin;

339:   info = TaoNew(TAO_CG, &cg); CHKERRQ(info);
340:   info = PetscLogObjectMemory(tao, sizeof(TAO_CG)); CHKERRQ(info);

342:   info=TaoSetTaoSolveRoutine(tao, TaoSolve_CG, (void *)cg); CHKERRQ(info);
343:   info=TaoSetTaoSetUpDownRoutines(tao, TaoSetUp_CG, TaoDestroy_CG); CHKERRQ(info);
344:   info=TaoSetTaoOptionsRoutine(tao, TaoSetOptions_CG); CHKERRQ(info);
345:   info=TaoSetTaoViewRoutine(tao, TaoView_CG); CHKERRQ(info);

347:   info = TaoSetMaximumIterates(tao, 2000); CHKERRQ(info);
348:   info = TaoSetMaximumFunctionEvaluations(tao, 4000); CHKERRQ(info);
349:   info = TaoSetTolerances(tao, 1e-4, 1e-4, 0, 0); CHKERRQ(info);

351:   cg->eta = 0.1;
352:   cg->delta_min = 1e-7;
353:   cg->delta_max = 100;

355:   cg->cg_type = CG_PolakRibierePlus;

357:   // Note: nondefault values should be used for nonlinear conjugate gradient 
358:   // method.  In particular, gtol should be less that 0.5; the value used in 
359:   // Nocedal and Wright is 0.10.  We use the default values for the 
360:   // linesearch because it seems to work better.
361:   info = TaoCreateMoreThuenteLineSearch(tao, 0, 0); CHKERRQ(info);
362:   TaoFunctionReturn(0);
363: }