Actual source code: pounders.c

  1: #include "pounders.h"

  5: static PetscErrorCode pounders_h(TaoSolver subtao, Vec v, Mat *H, Mat *Hpre, MatStructure *flag, void *ctx)
  6: {
  8:   *flag = SAME_NONZERO_PATTERN;
  9:   return(0);
 10: }
 13: static PetscErrorCode  pounders_fg(TaoSolver subtao, Vec x, PetscReal *f, Vec g, void *ctx)
 14: {
 15:   TAO_POUNDERS *mfqP = (TAO_POUNDERS*)ctx;
 16:   PetscReal d1,d2;
 19:   /* g = A*x  (add b later)*/
 20:   MatMult(mfqP->Hs,x,g); 


 23:   /* f = 1/2 * x'*(Ax) + b'*x  */
 24:   VecDot(x,g,&d1); 
 25:   VecDot(mfqP->b,x,&d2); 
 26:   *f = 0.5 *d1 + d2;

 28:   /* now  g = g + b */
 29:   VecAXPY(g, 1.0, mfqP->b); 

 31:   return(0);
 32: }
 35: PetscErrorCode gqtwrap(TaoSolver tao,PetscReal *gnorm, PetscReal *qmin) 
 36: {
 38:     PetscReal atol=1.0e-10;
 39:     PetscInt info,its;
 40:     TAO_POUNDERS *mfqP = (TAO_POUNDERS*)tao->data;
 41:                                                                     

 44:     if (! mfqP->usegqt) {
 45:       Vec       x,xl,xu,pdel,ndel;
 46:       PetscReal maxval;
 47:       PetscInt i,j;
 48:       VecCreateSeqWithArray(PETSC_COMM_SELF,mfqP->n,mfqP->Xsubproblem,&x); 
 49:       VecCreateSeqWithArray(PETSC_COMM_SELF,mfqP->n,mfqP->Gres,&mfqP->b); 
 50:       VecCreateSeq(PETSC_COMM_SELF,mfqP->n,&xl); 
 51:       VecDuplicate(xl,&xu); 
 52:       VecDuplicate(xl,&pdel); 
 53:       VecDuplicate(xl,&ndel); 
 54:       VecSet(x,0.0); 
 55:       VecSet(ndel,-mfqP->delta); 
 56:       VecSet(pdel,mfqP->delta); 
 57:       for (i=0;i<mfqP->n;i++) {
 58:         for (j=i;j<mfqP->n;j++) {
 59:             mfqP->Hres[j+mfqP->n*i] = mfqP->Hres[mfqP->n*j+i];
 60:         }
 61:       }
 62:       MatCreateSeqDense(PETSC_COMM_SELF,mfqP->n,mfqP->n,mfqP->Hres,&mfqP->Hs); 
 63:       
 64:       TaoResetStatistics(mfqP->subtao); 
 65:       TaoSetInitialVector(mfqP->subtao,x); 
 66:       TaoSetHessianRoutine(mfqP->subtao,mfqP->Hs,mfqP->Hs,pounders_h,(void*)mfqP); 
 67:       TaoSetTolerances(mfqP->subtao,PETSC_NULL,PETSC_NULL,*gnorm,*gnorm,PETSC_NULL); 
 68:       /* enforce bound constraints -- experimental */
 69:       if (tao->XU && tao->XL) {
 70:         VecCopy(tao->XU,xu); 
 71:         VecAXPY(xu,-1.0,tao->solution); 
 72:         VecScale(xu,1.0/mfqP->delta); 
 73:         VecCopy(tao->XL,xl); 
 74:         VecAXPY(xl,-1.0,tao->solution); 
 75:         VecScale(xl,1.0/mfqP->delta); 
 76:         
 77:         VecPointwiseMin(xu,xu,pdel); 
 78:         VecPointwiseMax(xl,xl,ndel); 
 79:       } else {
 80:         VecCopy(pdel,xu); 
 81:         VecCopy(ndel,xl); 
 82:       }
 83:       /* Make sure xu > xl */
 84:       VecCopy(x,pdel); 
 85:       VecAXPY(pdel,-1.0,xu);  
 86:       VecMax(pdel,PETSC_NULL,&maxval); 
 87:       if (maxval > 1e-10) {
 88:         SETERRQ(PETSC_COMM_WORLD,1,"upper bound < lower bound in subproblem");
 89:       }
 90:       /* Make sure xu > tao->solution > xl */
 91:       VecCopy(xl,pdel); 
 92:       VecAXPY(pdel,-1.0,x);  
 93:       VecMax(pdel,PETSC_NULL,&maxval); 
 94:       if (maxval > 1e-10) {
 95:         SETERRQ(PETSC_COMM_WORLD,1,"initial guess < lower bound in subproblem");
 96:       }

 98:       VecCopy(x,pdel); 
 99:       VecAXPY(pdel,-1.0,xu);  
100:       VecMax(pdel,PETSC_NULL,&maxval); 
101:       if (maxval > 1e-10) {
102:         SETERRQ(PETSC_COMM_WORLD,1,"initial guess > upper bound in subproblem");
103:       }

105:       TaoSetVariableBounds(mfqP->subtao,xl,xu); 
106:       TaoSolve(mfqP->subtao); 
107:       TaoGetSolutionStatus(mfqP->subtao,PETSC_NULL,qmin,PETSC_NULL,PETSC_NULL,PETSC_NULL,PETSC_NULL); 
108:       VecDestroy(&x); 
109:       VecDestroy(&xl); 
110:       VecDestroy(&xu); 
111:       VecDestroy(&pdel); 
112:       VecDestroy(&ndel); 
113:       VecDestroy(&mfqP->b); 
114:       MatDestroy(&mfqP->Hs); 
115:       

117:     } else {
118:       gqt(mfqP->n,mfqP->Hres,mfqP->n,mfqP->Gres,1.0,mfqP->gqt_rtol,atol,
119:         mfqP->gqt_maxits,gnorm,qmin,mfqP->Xsubproblem,&info,&its,
120:         mfqP->work,mfqP->work2, mfqP->work3);
121:       /*dgqt_(&mfqP->n,mfqP->Hres,&mfqP->n,mfqP->Gres,&one,&mfqP->gqt_rtol,&atol,
122:         &mfqP->gqt_maxits,gnorm,qmin,mfqP->Xsubproblem,&info,&its,
123:         mfqP->work,mfqP->work2, mfqP->work3);*/
124:     }
125:     *qmin *= -1;
126:     return(0);
127: }


132: PetscErrorCode phi2eval(PetscReal *x, PetscInt n, PetscReal *phi) {
133: /* Phi = .5*[x(1)^2  sqrt(2)*x(1)*x(2) ... sqrt(2)*x(1)*x(n) ... x(2)^2 sqrt(2)*x(2)*x(3) .. x(n)^2] */
134:     PetscInt i,j,k;
135:     PetscReal sqrt2 = PetscSqrtReal(2.0);
137:     j=0;

139:     for (i=0;i<n;i++) {
140:         phi[j] = 0.5 * x[i]*x[i];
141:         j++;
142:         for (k=i+1;k<n;k++) {
143:             phi[j]  = x[i]*x[k]/sqrt2;
144:             j++;
145:         }
146:         
147:     }

149:     return(0);
150: }

154: PetscErrorCode getquadpounders(TAO_POUNDERS *mfqP) {
155: /* Computes the parameters of the quadratic Q(x) = c + g'*x + 0.5*x*G*x'
156:    that satisfies the interpolation conditions Q(X[:,j]) = f(j)
157:    for j=1,...,m and with a Hessian matrix of least Frobenius norm */

159:     /* NB --we are ignoring c */
160:     PetscInt i,j,k,num,np = mfqP->nmodelpoints;
161:     PetscReal one = 1.0,zero=0.0,negone=-1.0;
162:     PetscBLASInt blasnpmax = mfqP->npmax;
163:     PetscBLASInt blasnplus1 = mfqP->n+1;
164:     PetscBLASInt blasnp = np;
165:     PetscBLASInt blasint = mfqP->n*(mfqP->n+1) / 2;
166:     PetscBLASInt blasint2 = np - mfqP->n-1;
167:     PetscBLASInt blasinfo,ione=1;
168:     PetscReal sqrt2 = PetscSqrtReal(2.0);
169:         

172:     for (i=0;i<mfqP->n*mfqP->m;i++) {
173:         mfqP->Gdel[i] = 0;
174:     }
175:     for (i=0;i<mfqP->n*mfqP->n*mfqP->m;i++) {
176:         mfqP->Hdel[i] = 0;
177:     }

179:     /* Let Ltmp = (L'*L) */
180:     BLASgemm_("T","N",&blasint2,&blasint2,&blasint,&one,&mfqP->L[(mfqP->n+1)*blasint],&blasint,&mfqP->L[(mfqP->n+1)*blasint],&blasint,&zero,mfqP->L_tmp,&blasint);
181:     
182:     /* factor Ltmp */
183:     LAPACKpotrf_("L",&blasint2,mfqP->L_tmp,&blasint,&blasinfo);
184:     if (blasinfo != 0) {
185:         SETERRQ1(PETSC_COMM_SELF,1,"LAPACK routine potrf returned with value %D\n",blasinfo);
186:     }

188:     /* factor M */
189:     LAPACKgetrf_(&blasnplus1,&blasnpmax,mfqP->M,&blasnplus1,mfqP->npmaxiwork,&blasinfo);
190:     if (blasinfo != 0) {
191:         SETERRQ1(PETSC_COMM_SELF,1,"LAPACK routine getrf returned with value %D\n",blasinfo);
192:     }
193:     
194:     for (k=0;k<mfqP->m;k++) {
195:         /* Solve L'*L*Omega = Z' * RESk*/
196:         BLASgemv_("T",&blasnp,&blasint2,&one,mfqP->Z,&blasnpmax,&mfqP->RES[mfqP->npmax*k],&ione,&zero,mfqP->omega,&ione);

198:         LAPACKpotrs_("L",&blasint2,&ione,mfqP->L_tmp,&blasint,mfqP->omega,&blasint2,&blasinfo);
199:         if (blasinfo != 0) {
200:             SETERRQ1(PETSC_COMM_SELF,1,"LAPACK routine potrs returned with value %D\n",blasinfo);
201:         }
202:         
203:         
204:         /* Beta = L*Omega */
205:         BLASgemv_("N",&blasint,&blasint2,&one,&mfqP->L[(mfqP->n+1)*blasint],&blasint,mfqP->omega,&ione,&zero,mfqP->beta,&ione);
206:         
207:         /* solve M'*Alpha = RESk - N'*Beta */
208:         BLASgemv_("T",&blasint,&blasnp,&negone,mfqP->N,&blasint,mfqP->beta,&ione,&one,&mfqP->RES[mfqP->npmax*k],&ione);
209:         LAPACKgetrs_("T",&blasnplus1,&ione,mfqP->M,&blasnplus1,mfqP->npmaxiwork,&mfqP->RES[mfqP->npmax*k],&blasnplus1,&blasinfo);
210:         if (blasinfo != 0) {
211:             SETERRQ1(PETSC_COMM_SELF,1,"LAPACK routine getrs returned with value %D\n",blasinfo);
212:         }

214:         /* Gdel(:,k) = Alpha(2:n+1) */
215:         for (i=0;i<mfqP->n;i++) {
216:             mfqP->Gdel[i + mfqP->n*k] = mfqP->RES[mfqP->npmax*k + i+1];
217:         }
218:         
219:         /* Set Hdels */
220:         num=0;
221:         for (i=0;i<mfqP->n;i++) {
222:             /* H[i,i,k] = Beta(num) */
223:             mfqP->Hdel[(i*mfqP->n + i)*mfqP->m + k] = mfqP->beta[num];
224:             num++;
225:             for (j=i+1;j<mfqP->n;j++) {
226:                 /* H[i,j,k] = H[j,i,k] = Beta(num)/sqrt(2) */
227:                 mfqP->Hdel[(j*mfqP->n + i)*mfqP->m + k] = mfqP->beta[num]/sqrt2;
228:                 mfqP->Hdel[(i*mfqP->n + j)*mfqP->m + k] = mfqP->beta[num]/sqrt2;
229:                 num++;
230:             }
231:         }
232:     }
233:     
234:     return(0);
235: }

239: PetscErrorCode morepoints(TAO_POUNDERS *mfqP) {
240:     /* Assumes mfqP->model_indices[0]  is minimum index
241:        Finishes adding points to mfqP->model_indices (up to npmax)
242:        Computes L,Z,M,N
243:        np is actual number of points in model (should equal npmax?) */
244:     PetscInt point,i,j,offset;
245:     PetscInt reject;
246:     PetscBLASInt blasn=mfqP->n,blasnpmax=mfqP->npmax,blasnplus1=mfqP->n+1,blasinfo,blasnpmax_x_5=mfqP->npmax*5,blasint,blasint2,blasnp,blasmaxmn;
247:     PetscReal *x,normd;

251:     for (i=0;i<mfqP->n+1;i++) {
252:         VecGetArray(mfqP->Xhist[mfqP->model_indices[i]],&x); 
253:         mfqP->M[(mfqP->n+1)*i] = 1.0;
254:         for (j=0;j<mfqP->n;j++) {
255:             mfqP->M[j+1+((mfqP->n+1)*i)] = (x[j]  - mfqP->xmin[j]) / mfqP->delta;
256:         }
257:         VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]],&x); 
258:         phi2eval(&mfqP->M[1+((mfqP->n+1)*i)],mfqP->n,&mfqP->N[mfqP->n*(mfqP->n+1)/2 * i]); 
259:         


262:     }

264:     /* Now we add points until we have npmax starting with the most recent ones */
265:     point = mfqP->nHist-1;
266:     mfqP->nmodelpoints = mfqP->n+1;
267:     
268:     while (mfqP->nmodelpoints < mfqP->npmax && point>=0) {
269:         /* Reject any points already in the model */
270:         reject = 0;
271:         for (j=0;j<mfqP->n+1;j++) {
272:             if (point == mfqP->model_indices[j]) {
273:                 reject = 1;
274:                 break;
275:             }
276:         }

278:         /* Reject if norm(d) >c2 */
279:         if (!reject) {
280:             VecCopy(mfqP->Xhist[point],mfqP->workxvec); 
281:             VecAXPY(mfqP->workxvec,-1.0,mfqP->Xhist[mfqP->minindex]); 
282:             VecNorm(mfqP->workxvec,NORM_2,&normd); 
283:             normd /= mfqP->delta;
284:             if (normd > mfqP->c2) {
285:                 reject =1;
286:             }
287:         }
288:         if (reject)
289:         {
290:             point--;
291:             continue;
292:         }

294:         
295:         VecGetArray(mfqP->Xhist[point],&x); 
296:         mfqP->M[(mfqP->n+1)*mfqP->nmodelpoints] = 1.0;
297:         for (j=0;j<mfqP->n;j++) {
298:             mfqP->M[j+1+((mfqP->n+1)*mfqP->nmodelpoints)] = (x[j]  - mfqP->xmin[j]) / mfqP->delta;
299:         }

301:         VecRestoreArray(mfqP->Xhist[point],&x); 
302:         phi2eval(&mfqP->M[1+(mfqP->n+1)*mfqP->nmodelpoints],mfqP->n,&mfqP->N[mfqP->n*(mfqP->n+1)/2 * (mfqP->nmodelpoints)]); 

304:         /* Update QR factorization */

306:         /* Copy M' to Q_tmp */
307:         for (i=0;i<mfqP->n+1;i++) {
308:             for (j=0;j<mfqP->npmax;j++) {
309:                 mfqP->Q_tmp[j+mfqP->npmax*i] = mfqP->M[i+(mfqP->n+1)*j];
310:             }
311:         }
312:         blasnp = mfqP->nmodelpoints+1;
313:         /* Q_tmp,R = qr(M') */
314:         blasmaxmn=PetscMax(mfqP->m,mfqP->n+1);
315:         LAPACKgeqrf_(&blasnp,&blasnplus1,mfqP->Q_tmp,&blasnpmax,mfqP->tau_tmp,mfqP->mwork,&blasmaxmn,&blasinfo);

317:         if (blasinfo != 0) {
318:             SETERRQ1(PETSC_COMM_SELF,1,"LAPACK routine geqrf returned with value %D\n",blasinfo);
319:         }
320:         
321:         /* Reject if min(svd(N*Q(:,n+2:np+1)) <= theta2 */
322:         /* L = N*Qtmp */
323:         blasint2 = mfqP->n * (mfqP->n+1) / 2;
324:         /* Copy N to L_tmp */
325:         for (i=0;i<mfqP->n*(mfqP->n+1)/2 * mfqP->npmax;i++) {
326:             mfqP->L_tmp[i]= mfqP->N[i];
327:         }
328:         
329:         /* Copy L_save to L_tmp */

331:         /* L_tmp = N*Qtmp' */
332:         LAPACKormqr_("R","N",&blasint2,&blasnp,&blasnplus1,mfqP->Q_tmp,&blasnpmax,mfqP->tau_tmp,
333:                      mfqP->L_tmp,&blasint2,mfqP->npmaxwork,&blasnpmax_x_5,&blasinfo);

335:         if (blasinfo != 0) {
336:             SETERRQ1(PETSC_COMM_SELF,1,"LAPACK routine ormqr returned with value %D\n",blasinfo);
337:         }

339:         /* Copy L_tmp to L_save */
340:         for (i=0;i<mfqP->npmax * mfqP->n*(mfqP->n+1)/2;i++) {
341:             mfqP->L_save[i] = mfqP->L_tmp[i];
342:         }
343:         
344:         /* Get svd for L_tmp(:,n+2:np+1) (L_tmp is modified in process) */
345:         blasint = mfqP->nmodelpoints - mfqP->n;
346:         LAPACKgesvd_("N","N",&blasint2,&blasint,&mfqP->L_tmp[(mfqP->n+1)*blasint2],&blasint2,
347:                      mfqP->beta,mfqP->work,&blasn,mfqP->work,&blasn,mfqP->npmaxwork,&blasnpmax_x_5,
348:                      &blasinfo);
349:         if (blasinfo != 0) {
350:             SETERRQ1(PETSC_COMM_SELF,1,"LAPACK routine gesvd returned with value %D\n",blasinfo);
351:         }

353:         if (mfqP->beta[PetscMin(blasint,blasint2)-1] > mfqP->theta2) {
354:             /* accept point */
355:             mfqP->model_indices[mfqP->nmodelpoints] = point;
356:             /* Copy Q_tmp to Q */
357:             for (i=0;i<mfqP->npmax* mfqP->npmax;i++) {
358:                 mfqP->Q[i] = mfqP->Q_tmp[i];
359:             }
360:             for (i=0;i<mfqP->npmax;i++){
361:                 mfqP->tau[i] = mfqP->tau_tmp[i]; 
362:             }
363:             mfqP->nmodelpoints++;
364:             blasnp = mfqP->nmodelpoints+1;

366:             /* Copy L_save to L */
367:             for (i=0;i<mfqP->npmax * mfqP->n*(mfqP->n+1)/2;i++) {
368:                 mfqP->L[i] = mfqP->L_save[i];
369:             }
370:         }
371:         point--;

373:     }    


376:     blasnp = mfqP->nmodelpoints;
377:     /* Copy Q(:,n+2:np) to Z */
378:     /* First set Q_tmp to I */
379:     for (i=0;i<mfqP->npmax*mfqP->npmax;i++) {
380:         mfqP->Q_tmp[i] = 0.0;
381:     }
382:     for (i=0;i<mfqP->npmax;i++) {
383:         mfqP->Q_tmp[i + mfqP->npmax*i] = 1.0;
384:     }

386:     /* Q_tmp = I * Q */
387:     LAPACKormqr_("R","N",&blasnp,&blasnp,&blasnplus1,mfqP->Q,&blasnpmax,mfqP->tau,mfqP->Q_tmp,&blasnpmax,mfqP->npmaxwork,&blasnpmax_x_5,&blasinfo);

389:     if (blasinfo != 0) {
390:         SETERRQ1(PETSC_COMM_SELF,1,"LAPACK routine ormqr returned with value %D\n",blasinfo);
391:     }

393:     /* Copy Q_tmp(:,n+2:np) to Z) */
394:     offset = mfqP->npmax * (mfqP->n+1);
395:     for (i=offset;i<mfqP->npmax*mfqP->npmax;i++) {
396:         mfqP->Z[i-offset] = mfqP->Q_tmp[i];
397:     }
398:     return(0);
399: }

403: /* Only call from modelimprove, addpoint() needs ->Q_tmp and ->work to be set */
404: PetscErrorCode addpoint(TaoSolver tao, TAO_POUNDERS *mfqP, PetscInt index) 
405: {
408:   /* Create new vector in history: X[newidx] = X[mfqP->index] + delta*X[index]*/
409:     
410:   VecDuplicate(mfqP->Xhist[0],&mfqP->Xhist[mfqP->nHist]); 
411:   VecSetValues(mfqP->Xhist[mfqP->nHist],mfqP->n,mfqP->indices,&mfqP->Q_tmp[index*mfqP->npmax],INSERT_VALUES); 
412:   VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]); 
413:   VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]); 
414:   VecAYPX(mfqP->Xhist[mfqP->nHist],mfqP->delta,mfqP->Xhist[mfqP->minindex]); 

416:   /* Project into feasible region */
417:   if (tao->XU && tao->XL) {
418:     VecMedian(mfqP->Xhist[mfqP->nHist], tao->XL, tao->XU,
419:                      mfqP->Xhist[mfqP->nHist]); 
420:   }

422:   /* Compute value of new vector */
423:   VecDuplicate(mfqP->Fhist[0],&mfqP->Fhist[mfqP->nHist]); 
424:   CHKMEMQ;
425:   TaoComputeSeparableObjective(tao,mfqP->Xhist[mfqP->nHist],mfqP->Fhist[mfqP->nHist]); 
426:   VecNorm(mfqP->Fhist[mfqP->nHist],NORM_2,&mfqP->Fres[mfqP->nHist]); 
427:   if (PetscIsInfOrNanReal(mfqP->Fres[mfqP->nHist])) {
428:     SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
429:   }
430:   mfqP->Fres[mfqP->nHist]*=mfqP->Fres[mfqP->nHist];

432:   /* Add new vector to model */
433:   mfqP->model_indices[mfqP->nmodelpoints] = mfqP->nHist;
434:   mfqP->nmodelpoints++;
435:   mfqP->nHist++;

437:   
438:   return(0);
439: }

443: PetscErrorCode modelimprove(TaoSolver tao, TAO_POUNDERS *mfqP, PetscInt addallpoints) {
444:   /* modeld = Q(:,np+1:n)' */
446:   PetscInt i,j,minindex=0;
447:   PetscReal dp,half=0.5,one=1.0,minvalue=TAO_INFINITY;
448:   PetscBLASInt blasn=mfqP->n,  blasnpmax = mfqP->npmax, blask,info;
449:   PetscBLASInt blas1=1,blasnpmax_x_5 = mfqP->npmax*5;
450:   blask = mfqP->nmodelpoints;

452:    
453:   /* Qtmp = I(n x n) */
454:   for (i=0;i<mfqP->n;i++) {
455:     for (j=0;j<mfqP->n;j++) {
456:       mfqP->Q_tmp[i + mfqP->npmax*j] = 0.0;
457:     }
458:   }
459:   for (j=0;j<mfqP->n;j++) {
460:     mfqP->Q_tmp[j + mfqP->npmax*j] = 1.0;
461:   }
462:   

464:   /* Qtmp = Q * I */
465:   LAPACKormqr_("R","N",&blasn,&blasn,&blask,mfqP->Q,&blasnpmax,
466:                mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork,&blasnpmax_x_5, &info);
467:   
468:   for (i=mfqP->nmodelpoints;i<mfqP->n;i++) {
469:     dp = BLASdot_(&blasn,&mfqP->Q_tmp[i*mfqP->npmax],&blas1,mfqP->Gres,&blas1);
470:     if (dp>0.0) { /* Model says use the other direction! */
471:       for (j=0;j<mfqP->n;j++) {
472:         mfqP->Q_tmp[i*mfqP->npmax+j] *= -1;
473:       }
474:     }
475:     /* mfqP->work[i] = Cres+Modeld(i,:)*(Gres+.5*Hres*Modeld(i,:)') */
476:     for (j=0;j<mfqP->n;j++) {
477:       mfqP->work2[j] = mfqP->Gres[j];
478:     }
479:     BLASgemv_("N",&blasn,&blasn,&half,mfqP->Hres,&blasn,
480:               &mfqP->Q_tmp[i*mfqP->npmax], &blas1, &one, mfqP->work2,&blas1);
481:     mfqP->work[i] = BLASdot_(&blasn,&mfqP->Q_tmp[i*mfqP->npmax],&blas1,
482:                              mfqP->work2,&blas1);
483:     if (i==mfqP->nmodelpoints || mfqP->work[i] < minvalue) {
484:       minindex=i;
485:       minvalue = mfqP->work[i];
486:     }
487:     
488:     if (addallpoints != 0) {
489:         addpoint(tao,mfqP,i); 
490:     }

492:   }
493:   
494:   if (!addallpoints) {
495:       addpoint(tao,mfqP,minindex); 
496:   }
497:   return(0);
498: }


503: PetscErrorCode affpoints(TAO_POUNDERS *mfqP, PetscReal *xmin, 
504:                                 PetscReal c) {
505:     PetscInt i,j;
506:     PetscBLASInt blasm=mfqP->m,blasj,blask,blasn=mfqP->n,ione=1,info;
507:     PetscBLASInt blasnpmax = mfqP->npmax,blasmaxmn;
508:     PetscReal proj,normd;
509:     PetscReal *x;

513:     for (i=mfqP->nHist-1;i>=0;i--) {
514:         VecGetArray(mfqP->Xhist[i],&x); 
515:         for (j=0;j<mfqP->n;j++) {
516:             mfqP->work[j] = (x[j] - xmin[j])/mfqP->delta;
517:         }
518:         VecRestoreArray(mfqP->Xhist[i],&x); 
519:         BLAScopy_(&blasn,mfqP->work,&ione,mfqP->work2,&ione);
520:         normd = BLASnrm2_(&blasn,mfqP->work,&ione);
521:         if (normd <= c*c) {
522:           blasj=(mfqP->n - mfqP->nmodelpoints);
523:             if (!mfqP->q_is_I) {
524:                 /* project D onto null */
525:                 blask=(mfqP->nmodelpoints);
526:                 LAPACKormqr_("R","N",&ione,&blasn,&blask,mfqP->Q,&blasnpmax,mfqP->tau,
527:                              mfqP->work2,&ione,mfqP->mwork,&blasm,&info);
528:                 
529:                 if (info < 0) {
530:                     SETERRQ1(PETSC_COMM_SELF,1,"ormqr returned value %D\n",info);
531:                 }
532:             }
533:             proj = BLASnrm2_(&blasj,&mfqP->work2[mfqP->nmodelpoints],&ione);

535:             if (proj >= mfqP->theta1) { /* add this index to model */
536:                 mfqP->model_indices[mfqP->nmodelpoints]=i;
537:                 mfqP->nmodelpoints++;
538:                 BLAScopy_(&blasn,mfqP->work,&ione,&mfqP->Q_tmp[mfqP->npmax*(mfqP->nmodelpoints-1)],&ione);
539:                 blask=mfqP->npmax*(mfqP->nmodelpoints);
540:                 BLAScopy_(&blask,mfqP->Q_tmp,&ione,mfqP->Q,&ione);
541:                 blask = mfqP->nmodelpoints;
542:                 blasmaxmn = PetscMax(mfqP->m,mfqP->n);
543:                 LAPACKgeqrf_(&blasn,&blask,mfqP->Q,&blasnpmax,mfqP->tau,mfqP->mwork,
544:                              &blasmaxmn,&info);
545:                 mfqP->q_is_I = 0;
546:                 if (info < 0) {
547:                     SETERRQ1(PETSC_COMM_SELF,1,"geqrf returned value %D\n",info);
548:                 }

550:                     
551:             }
552:             if (mfqP->nmodelpoints == mfqP->n)  {
553:                 break;
554:             }
555:         }                
556:     }
557:     
558:     return(0);
559: }
562: static PetscErrorCode TaoSolve_POUNDERS(TaoSolver tao)
563: {
564:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;

566:   PetscInt i,ii,j,k,l,iter=0;
567:   PetscReal step=1.0;
568:   TaoSolverTerminationReason reason = TAO_CONTINUE_ITERATING;

570:   PetscInt low,high;
571:   PetscReal minnorm;
572:   PetscReal *x,*f,*fmin,*xmint;
573:   PetscReal cres,deltaold;
574:   PetscReal gnorm;
575:   PetscBLASInt info,ione=1,iblas;
576:   PetscBool valid;
577:   PetscReal mdec, rho, normxsp;
578:   PetscReal one=1.0,zero=0.0,ratio;
579:   PetscBLASInt blasm,blasn,blasnpmax,blasn2;
581:   
582:   
583:   /* n = # of parameters 
584:      m = dimension (components) of function  */
585:   
587:   
588:   if (tao->XL && tao->XU) {
589:     /* Check x0 <= XU */
590:     PetscReal val;
591:     VecCopy(tao->solution,mfqP->Xhist[0]); 
592:     VecAXPY(mfqP->Xhist[0],-1.0,tao->XU); 
593:     VecMax(mfqP->Xhist[0],PETSC_NULL,&val); 
594:     if (val > 1e-10) {
595:       SETERRQ(PETSC_COMM_WORLD,1,"X0 > upper bound");
596:     }
597:     
598:     /* Check x0 >= xl */
599:     VecCopy(tao->XL,mfqP->Xhist[0]); 
600:     VecAXPY(mfqP->Xhist[0],-1.0,tao->solution); 
601:     VecMax(mfqP->Xhist[0],PETSC_NULL,&val); 
602:     if (val > 1e-10) {
603:       SETERRQ(PETSC_COMM_WORLD,1,"X0 < lower bound");
604:     }
605:     /* Check x0 + delta < XU  -- should be able to get around this eventually */

607:     VecSet(mfqP->Xhist[0],mfqP->delta); 
608:     VecAXPY(mfqP->Xhist[0],1.0,tao->solution); 
609:     VecAXPY(mfqP->Xhist[0],-1.0,tao->XU); 
610:     VecMax(mfqP->Xhist[0],PETSC_NULL,&val); 
611:     if (val > 1e-10) {
612:       SETERRQ(PETSC_COMM_WORLD,1,"X0 + delta > upper bound");
613:     }
614:     
615:   }

617:   CHKMEMQ;
618:   blasm = mfqP->m; blasn=mfqP->n; blasnpmax = mfqP->npmax;
619:   for (i=0;i<mfqP->n*mfqP->n*mfqP->m;i++) mfqP->H[i]=0;
620:   
621:   VecCopy(tao->solution,mfqP->Xhist[0]); 
622:   CHKMEMQ;
623:   TaoComputeSeparableObjective(tao,tao->solution,mfqP->Fhist[0]); 
624:   
625:   VecNorm(mfqP->Fhist[0],NORM_2,&mfqP->Fres[0]); 
626:   if (PetscIsInfOrNanReal(mfqP->Fres[0])) {
627:     SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
628:   }
629:   mfqP->Fres[0]*=mfqP->Fres[0];
630:   mfqP->minindex = 0;
631:   minnorm = mfqP->Fres[0];
632:   VecGetOwnershipRange(mfqP->Xhist[0],&low,&high); 
633:   for (i=1;i<mfqP->n+1;i++) {
634:       VecCopy(tao->solution,mfqP->Xhist[i]); 
635:       if (i-1 >= low && i-1 < high) {
636:           VecGetArray(mfqP->Xhist[i],&x); 
637:           x[i-1-low] += mfqP->delta;
638:           VecRestoreArray(mfqP->Xhist[i],&x); 
639:       }
640:       CHKMEMQ;
641:       TaoComputeSeparableObjective(tao,mfqP->Xhist[i],mfqP->Fhist[i]); 
642:       VecNorm(mfqP->Fhist[i],NORM_2,&mfqP->Fres[i]); 
643:       if (PetscIsInfOrNanReal(mfqP->Fres[i])) {
644:         SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
645:       }
646:       mfqP->Fres[i]*=mfqP->Fres[i];
647:       if (mfqP->Fres[i] < minnorm) {
648:           mfqP->minindex = i;
649:           minnorm = mfqP->Fres[i];
650:       }
651:   }

653:   VecCopy(mfqP->Xhist[mfqP->minindex],tao->solution); 
654:   VecCopy(mfqP->Fhist[mfqP->minindex],tao->sep_objective); 
655:   /* Gather mpi vecs to one big local vec */

657:   

659:   /* Begin serial code */

661:   /* Disp[i] = Xi-xmin, i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
662:   /* Fdiff[i] = (Fi-Fmin)', i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
663:   /* (Column oriented for blas calls) */
664:   ii=0;

666:   if (mfqP->mpisize == 1) {
667:       VecGetArray(mfqP->Xhist[mfqP->minindex],&xmint); 
668:       for (i=0;i<mfqP->n;i++) mfqP->xmin[i] = xmint[i]; 
669:       VecRestoreArray(mfqP->Xhist[mfqP->minindex],&xmint); 
670:       VecGetArray(mfqP->Fhist[mfqP->minindex],&fmin); 
671:       for (i=0;i<mfqP->n+1;i++) {
672:           if (i == mfqP->minindex) continue;

674:           VecGetArray(mfqP->Xhist[i],&x); 
675:           for (j=0;j<mfqP->n;j++) {
676:               mfqP->Disp[ii+mfqP->npmax*j] = (x[j] - mfqP->xmin[j])/mfqP->delta;
677:           }
678:           VecRestoreArray(mfqP->Xhist[i],&x); 

680:           VecGetArray(mfqP->Fhist[i],&f); 
681:           for (j=0;j<mfqP->m;j++) {
682:               mfqP->Fdiff[ii+mfqP->n*j] = f[j] - fmin[j];
683:           }
684:           VecRestoreArray(mfqP->Fhist[i],&f); 
685:           mfqP->model_indices[ii++] = i;

687:       }
688:       for (j=0;j<mfqP->m;j++) {
689:           mfqP->C[j] = fmin[j];
690:       }
691:       VecRestoreArray(mfqP->Fhist[mfqP->minindex],&fmin); 

693:   } else {
694:       VecScatterBegin(mfqP->scatterx,mfqP->Xhist[mfqP->minindex],mfqP->localxmin,INSERT_VALUES,SCATTER_FORWARD); 
695:       VecScatterEnd(mfqP->scatterx,mfqP->Xhist[mfqP->minindex],mfqP->localxmin,INSERT_VALUES,SCATTER_FORWARD); 
696:       VecGetArray(mfqP->localxmin,&xmint); 
697:       for (i=0;i<mfqP->n;i++) mfqP->xmin[i] = xmint[i];
698:       VecRestoreArray(mfqP->localxmin,&mfqP->xmin); 



702:       VecScatterBegin(mfqP->scatterf,mfqP->Fhist[mfqP->minindex],mfqP->localfmin,INSERT_VALUES,SCATTER_FORWARD); 
703:       VecScatterEnd(mfqP->scatterf,mfqP->Fhist[mfqP->minindex],mfqP->localfmin,INSERT_VALUES,SCATTER_FORWARD); 
704:       VecGetArray(mfqP->localfmin,&fmin); 
705:       for (i=0;i<mfqP->n+1;i++) {
706:           if (i == mfqP->minindex) continue;
707:                                  
708:           mfqP->model_indices[ii++] = i;
709:           VecScatterBegin(mfqP->scatterx,mfqP->Xhist[ii],mfqP->localx,
710:                                  INSERT_VALUES, SCATTER_FORWARD); 
711:           VecScatterEnd(mfqP->scatterx,mfqP->Xhist[ii],mfqP->localx,
712:                                  INSERT_VALUES, SCATTER_FORWARD); 
713:           VecGetArray(mfqP->localx,&x); 
714:           for (j=0;j<mfqP->n;j++) {
715:               mfqP->Disp[i+mfqP->npmax*j] = (x[j] - mfqP->xmin[j])/mfqP->delta;
716:           }
717:           VecRestoreArray(mfqP->localx,&x); 

719:           
720:           VecScatterBegin(mfqP->scatterf,mfqP->Fhist[ii],mfqP->localf,
721:                                  INSERT_VALUES, SCATTER_FORWARD); 
722:           VecScatterEnd(mfqP->scatterf,mfqP->Fhist[ii],mfqP->localf,
723:                                  INSERT_VALUES, SCATTER_FORWARD); 
724:           VecGetArray(mfqP->localf,&f); 
725:           for (j=0;j<mfqP->m;j++) {
726:               mfqP->Fdiff[i*mfqP->n+j] = f[j] - fmin[j];

728:           }
729:           VecRestoreArray(mfqP->localf,&f); 
730:       }
731:       for (j=0;j<mfqP->m;j++) {
732:           mfqP->C[j] = fmin[j];
733:       }
734:           
735:       VecRestoreArray(mfqP->localfmin,&fmin); 
736:   
737:   }

739:           
740:   /* Determine the initial quadratic models */
741:   
742:   /* G = D(ModelIn,:) \ (F(ModelIn,1:m)-repmat(F(xkin,1:m),n,1)); */
743:   /* D (nxn) Fdiff (nxm)  => G (nxm) */
744:   blasn2 = blasn;
745:   LAPACKgesv_(&blasn,&blasm,mfqP->Disp,&blasnpmax,mfqP->iwork,mfqP->Fdiff,&blasn2,&info);
746:   PetscInfo1(tao,"gesv returned %D\n",info); 

748:   cres = minnorm;
749:   /* Gres = G*F(xkin,1:m)'  G (nxm)   Fk (m)   */
750:   BLASgemv_("N",&blasn,&blasm,&one,mfqP->Fdiff,&blasn,mfqP->C,&ione,&zero,mfqP->Gres,&ione);

752:   /*  Hres = G*G'  */
753:   BLASgemm_("N","T",&blasn,&blasn,&blasm,&one,mfqP->Fdiff, &blasn,mfqP->Fdiff,&blasn,&zero,mfqP->Hres,&blasn);

755:   valid = PETSC_TRUE;

757:   VecSetValues(tao->gradient,mfqP->n,mfqP->indices,mfqP->Gres,INSERT_VALUES); 
758:   VecAssemblyBegin(tao->gradient);
759:   VecAssemblyEnd(tao->gradient);
760:   VecNorm(tao->gradient,NORM_2,&gnorm); 
761:   gnorm *= mfqP->delta;
762:   VecCopy(mfqP->Xhist[mfqP->minindex],tao->solution); 
763:   TaoMonitor(tao, iter, minnorm, gnorm, 0.0, step, &reason); 
764:   mfqP->nHist = mfqP->n+1;
765:   mfqP->nmodelpoints = mfqP->n+1;

767:   while (reason == TAO_CONTINUE_ITERATING) {

769:     iter++;
770:     PetscReal gnm = 10;
771:     /* Solve the subproblem min{Q(s): ||s|| <= delta} */
772:     gqtwrap(tao,&gnm,&mdec); 
773:     /* Evaluate the function at the new point */

775:     for (i=0;i<mfqP->n;i++) {
776:         mfqP->work[i] = mfqP->Xsubproblem[i]*mfqP->delta + mfqP->xmin[i];
777:     }
778:     VecDuplicate(tao->solution,&mfqP->Xhist[mfqP->nHist]); 
779:     VecDuplicate(tao->sep_objective,&mfqP->Fhist[mfqP->nHist]); 
780:     VecSetValues(mfqP->Xhist[mfqP->nHist],mfqP->n,mfqP->indices,mfqP->work,INSERT_VALUES); 
781:     VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]); 
782:     VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]); 

784:     TaoComputeSeparableObjective(tao,mfqP->Xhist[mfqP->nHist],mfqP->Fhist[mfqP->nHist]); 
785:     VecNorm(mfqP->Fhist[mfqP->nHist],NORM_2,&mfqP->Fres[mfqP->nHist]); 
786:     if (PetscIsInfOrNanReal(mfqP->Fres[mfqP->nHist])) {
787:       SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
788:     }
789:     mfqP->Fres[mfqP->nHist]*=mfqP->Fres[mfqP->nHist];
790:     rho = (mfqP->Fres[mfqP->minindex] - mfqP->Fres[mfqP->nHist]) / mdec;
791:     mfqP->nHist++;

793:     /* Update the center */
794:     if ((rho >= mfqP->eta1) || (rho > mfqP->eta0 && valid==PETSC_TRUE)) {
795:         /* Update model to reflect new base point */
796:         for (i=0;i<mfqP->n;i++) {
797:             mfqP->work[i] = (mfqP->work[i] - mfqP->xmin[i])/mfqP->delta;
798:         }
799:         for (j=0;j<mfqP->m;j++) {
800:             /* C(j) = C(j) + work*G(:,j) + .5*work*H(:,:,j)*work';
801:                G(:,j) = G(:,j) + H(:,:,j)*work' */
802:             for (k=0;k<mfqP->n;k++) {
803:                 mfqP->work2[k]=0.0;
804:                 for (l=0;l<mfqP->n;l++) {
805:                     mfqP->work2[k]+=mfqP->H[j + mfqP->m*(k + l*mfqP->n)]*mfqP->work[l];
806:                 }
807:             }
808:             for (i=0;i<mfqP->n;i++) {
809:                 mfqP->C[j]+=mfqP->work[i]*(mfqP->Fdiff[i + mfqP->n* j] + 0.5*mfqP->work2[i]);
810:                 mfqP->Fdiff[i+mfqP->n*j] +=mfqP-> work2[i];
811:             }
812:         }
813:         /* Cres += work*Gres + .5*work*Hres*work';
814:            Gres += Hres*work'; */

816:         BLASgemv_("N",&blasn,&blasn,&one,mfqP->Hres,&blasn,mfqP->work,&ione,
817:                   &zero,mfqP->work2,&ione);
818:         for (i=0;j<mfqP->n;j++) {
819:             cres += mfqP->work[i]*(mfqP->Gres[i]  + 0.5*mfqP->work2[i]);
820:             mfqP->Gres[i] += mfqP->work2[i];
821:         }
822:         mfqP->minindex = mfqP->nHist-1;
823:         minnorm = mfqP->Fres[mfqP->minindex];
824:         /* Change current center */
825:         VecGetArray(mfqP->Xhist[mfqP->minindex],&xmint); 
826:         for (i=0;i<mfqP->n;i++) {
827:             mfqP->xmin[i] = xmint[i];
828:         }
829:         VecRestoreArray(mfqP->Xhist[mfqP->minindex],&xmint); 


832:     }

834:     /* Evaluate at a model-improving point if necessary */
835:     if (valid == PETSC_FALSE) {
836:         mfqP->q_is_I = 1;
837:         affpoints(mfqP,mfqP->xmin,mfqP->c1); 
838:         if (mfqP->nmodelpoints < mfqP->n) {
839:           PetscInfo(tao,"Model not valid -- model-improving");
840:           modelimprove(tao,mfqP,1); 
841:         }
842:     }
843:     

845:     /* Update the trust region radius */
846:     deltaold = mfqP->delta;
847:     normxsp = 0;
848:     for (i=0;i<mfqP->n;i++) {
849:         normxsp += mfqP->Xsubproblem[i]*mfqP->Xsubproblem[i];
850:     }
851:     normxsp = PetscSqrtReal(normxsp);
852:     if (rho >= mfqP->eta1 && normxsp > 0.5*mfqP->delta) {
853:         mfqP->delta = PetscMin(mfqP->delta*mfqP->gamma1,mfqP->deltamax); 
854:     } else if (valid == PETSC_TRUE) {
855:         mfqP->delta = PetscMax(mfqP->delta*mfqP->gamma0,mfqP->deltamin);
856:     }

858:     /* Compute the next interpolation set */
859:     mfqP->q_is_I = 1;
860:     mfqP->nmodelpoints=0;
861:     affpoints(mfqP,mfqP->xmin,mfqP->c1); 
862:     if (mfqP->nmodelpoints == mfqP->n) {
863:       valid = PETSC_TRUE;
864:     } else {
865:       valid = PETSC_FALSE;
866:       affpoints(mfqP,mfqP->xmin,mfqP->c2); 
867:       if (mfqP->n > mfqP->nmodelpoints) {
868:         PetscInfo(tao,"Model not valid -- adding geometry points");
869:         modelimprove(tao,mfqP,mfqP->n - mfqP->nmodelpoints); 
870:       }
871:     }
872:     for (i=mfqP->nmodelpoints;i>0;i--) {
873:         mfqP->model_indices[i] = mfqP->model_indices[i-1];
874:     }
875:     mfqP->nmodelpoints++;
876:     mfqP->model_indices[0] = mfqP->minindex;
877:     morepoints(mfqP); 
878:     if (mfqP->nmodelpoints - mfqP->n - 1 == 0) {
879:       PetscInfo(tao,"Cannot add enough model points");
880:       reason = TAO_DIVERGED_LS_FAILURE;
881:       tao->reason = TAO_DIVERGED_LS_FAILURE;
882:       continue;
883:     }
884:     for (i=0;i<mfqP->nmodelpoints;i++) {
885:         VecGetArray(mfqP->Xhist[mfqP->model_indices[i]],&x); 
886:         for (j=0;j<mfqP->n;j++) {
887:             mfqP->Disp[i + mfqP->npmax*j] = (x[j]  - mfqP->xmin[j]) / deltaold;
888:         }
889:         VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]],&x); 
890:         VecGetArray(mfqP->Fhist[mfqP->model_indices[i]],&f); 
891:         for (j=0;j<mfqP->m;j++) {
892:             for (k=0;k<mfqP->n;k++)  {
893:                 mfqP->work[k]=0.0;
894:                 for (l=0;l<mfqP->n;l++) {
895:                     mfqP->work[k] += mfqP->H[j + mfqP->m*(k + mfqP->n*l)] * mfqP->Disp[i + mfqP->npmax*l];
896:                 }
897:             }
898:             mfqP->RES[j*mfqP->npmax + i] = -mfqP->C[j] - BLASdot_(&blasn,&mfqP->Fdiff[j*mfqP->n],&ione,&mfqP->Disp[i],&blasnpmax) - 0.5*BLASdot_(&blasn,mfqP->work,&ione,&mfqP->Disp[i],&blasnpmax) + f[j];
899:         }
900:         VecRestoreArray(mfqP->Fhist[mfqP->model_indices[i]],&f); 
901:     }


904:     /* Update the quadratic model */
905:     getquadpounders(mfqP); 
906:     VecGetArray(mfqP->Fhist[mfqP->minindex],&fmin); 
907:     BLAScopy_(&blasm,fmin,&ione,mfqP->C,&ione);
908:     /* G = G*(delta/deltaold) + Gdel */
909:     ratio = mfqP->delta/deltaold;
910:     iblas = blasm*blasn;
911:     BLASscal_(&iblas,&ratio,mfqP->Fdiff,&ione);
912:     BLASaxpy_(&iblas,&one,mfqP->Gdel,&ione,mfqP->Fdiff,&ione);
913:     /* H = H*(delta/deltaold) + Hdel */
914:     iblas = blasm*blasn*blasn;
915:     ratio *= ratio;
916:     BLASscal_(&iblas,&ratio,mfqP->H,&ione);
917:     BLASaxpy_(&iblas,&one,mfqP->Hdel,&ione,mfqP->H,&ione);


920:     /* Get residuals */
921:     cres = mfqP->Fres[mfqP->minindex];
922:     /* Gres = G*F(xkin,1:m)' */
923:     BLASgemv_("N",&blasn,&blasm,&one,mfqP->Fdiff,&blasn,mfqP->C,&ione,&zero,mfqP->Gres,&ione);
924:     /* Hres = sum i=1..m {F(xkin,i)*H(:,:,i)}   + G*G' */
925:     BLASgemm_("N","T",&blasn,&blasn,&blasm,&one,mfqP->Fdiff,&blasn,mfqP->Fdiff,&blasn,
926:               &zero,mfqP->Hres,&blasn);

928:     iblas = mfqP->n*mfqP->n;

930:     for (j=0;j<mfqP->m;j++) { 
931:         BLASaxpy_(&iblas,&fmin[j],&mfqP->H[j],&blasm,mfqP->Hres,&ione);
932:     }
933:     
934:     /* Export solution and gradient residual to TAO */
935:     VecCopy(mfqP->Xhist[mfqP->minindex],tao->solution); 
936:     VecSetValues(tao->gradient,mfqP->n,mfqP->indices,mfqP->Gres,INSERT_VALUES); 
937:     VecAssemblyBegin(tao->gradient);
938:     VecAssemblyEnd(tao->gradient);
939:     VecNorm(tao->gradient,NORM_2,&gnorm); 
940:     gnorm *= mfqP->delta;
941:     /*  final criticality test */
942:     TaoMonitor(tao, iter, minnorm, gnorm, 0.0, step, &reason); 
943:   }
944:   return(0);
945: }

949: static PetscErrorCode TaoSetUp_POUNDERS(TaoSolver tao)
950: {
951:     TAO_POUNDERS *mfqP = (TAO_POUNDERS*)tao->data;
952:     PetscInt i;
953:     IS isfloc,isfglob,isxloc,isxglob;


958:     if (!tao->gradient) {VecDuplicate(tao->solution,&tao->gradient);   }
959:     if (!tao->stepdirection) {VecDuplicate(tao->solution,&tao->stepdirection);   }
960:     VecGetSize(tao->solution,&mfqP->n); 
961:     VecGetSize(tao->sep_objective,&mfqP->m); 
962:     mfqP->c1 = PetscSqrtReal((PetscReal)mfqP->n);
963:     if (mfqP->npmax == PETSC_DEFAULT) {
964:       mfqP->npmax = 2*mfqP->n + 1;
965:     }
966:     mfqP->npmax = PetscMin((mfqP->n+1)*(mfqP->n+2)/2,mfqP->npmax); 
967:     mfqP->npmax = PetscMax(mfqP->npmax, mfqP->n+2);

969:     PetscMalloc((tao->max_funcs+10)*sizeof(Vec),&mfqP->Xhist); 
970:     PetscMalloc((tao->max_funcs+10)*sizeof(Vec),&mfqP->Fhist); 
971:     for (i=0;i<mfqP->n +1;i++) {
972:         VecDuplicate(tao->solution,&mfqP->Xhist[i]); 
973:         VecDuplicate(tao->sep_objective,&mfqP->Fhist[i]); 
974:     }
975:     VecDuplicate(tao->solution,&mfqP->workxvec); 
976:     mfqP->nHist = 0;

978:     PetscMalloc((tao->max_funcs+10)*sizeof(PetscReal),&mfqP->Fres); 
979:     PetscMalloc(mfqP->npmax*mfqP->m*sizeof(PetscReal),&mfqP->RES); 
980:     PetscMalloc(mfqP->n*sizeof(PetscReal),&mfqP->work); 
981:     PetscMalloc(mfqP->n*sizeof(PetscReal),&mfqP->work2); 
982:     PetscMalloc(mfqP->n*sizeof(PetscReal),&mfqP->work3); 
983:     PetscMalloc(PetscMax(mfqP->m,mfqP->n+1)*sizeof(PetscReal),&mfqP->mwork); 
984:     PetscMalloc((mfqP->npmax - mfqP->n - 1)*sizeof(PetscReal),&mfqP->omega); 
985:     PetscMalloc((mfqP->n * (mfqP->n+1) / 2)*sizeof(PetscReal),&mfqP->beta); 
986:     PetscMalloc((mfqP->n + 1) *sizeof(PetscReal),&mfqP->alpha); 

988:     PetscMalloc(mfqP->n*mfqP->n*mfqP->m*sizeof(PetscReal),&mfqP->H); 
989:     PetscMalloc(mfqP->npmax*mfqP->npmax*sizeof(PetscReal),&mfqP->Q); 
990:     PetscMalloc(mfqP->npmax*mfqP->npmax*sizeof(PetscReal),&mfqP->Q_tmp); 
991:     PetscMalloc(mfqP->n*(mfqP->n+1)/2*(mfqP->npmax)*sizeof(PetscReal),&mfqP->L); 
992:     PetscMalloc(mfqP->n*(mfqP->n+1)/2*(mfqP->npmax)*sizeof(PetscReal),&mfqP->L_tmp); 
993:     PetscMalloc(mfqP->n*(mfqP->n+1)/2*(mfqP->npmax)*sizeof(PetscReal),&mfqP->L_save); 
994:     PetscMalloc(mfqP->n*(mfqP->n+1)/2*(mfqP->npmax)*sizeof(PetscReal),&mfqP->N); 
995:     PetscMalloc(mfqP->npmax * (mfqP->n+1) * sizeof(PetscReal),&mfqP->M); 
996:     PetscMalloc(mfqP->npmax * (mfqP->npmax - mfqP->n - 1) * sizeof(PetscReal), &mfqP->Z); 
997:     PetscMalloc(mfqP->npmax*sizeof(PetscReal),&mfqP->tau); 
998:     PetscMalloc(mfqP->npmax*sizeof(PetscReal),&mfqP->tau_tmp); 
999:     PetscMalloc(5*mfqP->npmax*sizeof(PetscReal),&mfqP->npmaxwork); 
1000:     PetscMalloc(5*mfqP->npmax*sizeof(PetscBLASInt),&mfqP->npmaxiwork); 
1001:     PetscMalloc(mfqP->n*sizeof(PetscReal),&mfqP->xmin); 
1002:     PetscMalloc(mfqP->m*sizeof(PetscReal),&mfqP->C); 
1003:     PetscMalloc(mfqP->m*mfqP->n*sizeof(PetscReal),&mfqP->Fdiff); 
1004:     PetscMalloc(mfqP->npmax*mfqP->n*sizeof(PetscReal),&mfqP->Disp); 
1005:     PetscMalloc(mfqP->n*sizeof(PetscReal),&mfqP->Gres); 
1006:     PetscMalloc(mfqP->n*mfqP->n*sizeof(PetscReal),&mfqP->Hres); 
1007:     PetscMalloc(mfqP->n*mfqP->n*sizeof(PetscReal),&mfqP->Gpoints); 
1008:     PetscMalloc(mfqP->npmax*sizeof(PetscInt),&mfqP->model_indices); 
1009:     PetscMalloc(mfqP->n*sizeof(PetscReal),&mfqP->Xsubproblem); 
1010:     PetscMalloc(mfqP->m*mfqP->n*sizeof(PetscReal),&mfqP->Gdel); 
1011:     PetscMalloc(mfqP->n*mfqP->n*mfqP->m*sizeof(PetscReal), &mfqP->Hdel); 
1012:     PetscMalloc(PetscMax(mfqP->m,mfqP->n)*sizeof(PetscInt),&mfqP->indices); 
1013:     PetscMalloc(mfqP->n*sizeof(PetscBLASInt),&mfqP->iwork); 
1014:     for (i=0;i<PetscMax(mfqP->m,mfqP->n);i++) {
1015:         mfqP->indices[i] = i;
1016:     }
1017:   MPI_Comm_size(((PetscObject)tao)->comm,&mfqP->mpisize); 
1018:   if (mfqP->mpisize > 1) {
1019:       VecCreateSeq(PETSC_COMM_SELF,mfqP->n,&mfqP->localx); 
1020:       VecCreateSeq(PETSC_COMM_SELF,mfqP->n,&mfqP->localxmin); 
1021:       VecCreateSeq(PETSC_COMM_SELF,mfqP->m,&mfqP->localf); 
1022:       VecCreateSeq(PETSC_COMM_SELF,mfqP->m,&mfqP->localfmin); 
1023:       ISCreateStride(MPI_COMM_SELF,mfqP->n,0,1,&isxloc); 
1024:       ISCreateStride(MPI_COMM_SELF,mfqP->n,0,1,&isxglob); 
1025:       ISCreateStride(MPI_COMM_SELF,mfqP->m,0,1,&isfloc); 
1026:       ISCreateStride(MPI_COMM_SELF,mfqP->m,0,1,&isfglob); 
1027:       

1029:       VecScatterCreate(tao->solution,isxglob,mfqP->localx,isxloc,&mfqP->scatterx); 
1030:       VecScatterCreate(tao->sep_objective,isfglob,mfqP->localf,isfloc,&mfqP->scatterf); 

1032:       ISDestroy(&isxloc); 
1033:       ISDestroy(&isxglob); 
1034:       ISDestroy(&isfloc); 
1035:       ISDestroy(&isfglob); 

1037:   }

1039:   if (!mfqP->usegqt) {
1040:       KSP       ksp;
1041:       PC        pc;
1042:       TaoCreate(PETSC_COMM_SELF,&mfqP->subtao); 
1043:       TaoSetType(mfqP->subtao,"tao_bqpip"); 
1044:       TaoSetOptionsPrefix(mfqP->subtao,"pounders_subsolver_"); 
1045:       TaoSetObjectiveAndGradientRoutine(mfqP->subtao,pounders_fg,(void*)mfqP); 
1046:       TaoSetMaximumIterations(mfqP->subtao,mfqP->gqt_maxits); 
1047:       TaoGetKSP(mfqP->subtao,&ksp); 
1048:       KSPGetPC(ksp,&pc); 
1049:       PCSetType(pc,PCNONE); 
1050:       TaoSetFromOptions(mfqP->subtao); 
1051:   }    
1052:   return(0);
1053: }

1057: static PetscErrorCode TaoDestroy_POUNDERS(TaoSolver tao)
1058: {
1059:   TAO_POUNDERS *mfqP = (TAO_POUNDERS*)tao->data;
1060:   PetscInt i;
1062:   

1065:   if (!mfqP->usegqt) {
1066:     TaoDestroy(&mfqP->subtao); 
1067:   }
1068:   PetscFree(mfqP->Fres); 
1069:   PetscFree(mfqP->RES); 
1070:   PetscFree(mfqP->work); 
1071:   PetscFree(mfqP->work2); 
1072:   PetscFree(mfqP->work3); 
1073:   PetscFree(mfqP->mwork); 
1074:   PetscFree(mfqP->omega); 
1075:   PetscFree(mfqP->beta); 
1076:   PetscFree(mfqP->alpha); 
1077:   PetscFree(mfqP->H); 
1078:   PetscFree(mfqP->Q); 
1079:   PetscFree(mfqP->Q_tmp); 
1080:   PetscFree(mfqP->L); 
1081:   PetscFree(mfqP->L_tmp); 
1082:   PetscFree(mfqP->L_save); 
1083:   PetscFree(mfqP->N); 
1084:   PetscFree(mfqP->M); 
1085:   PetscFree(mfqP->Z); 
1086:   PetscFree(mfqP->tau); 
1087:   PetscFree(mfqP->tau_tmp); 
1088:   PetscFree(mfqP->npmaxwork); 
1089:   PetscFree(mfqP->npmaxiwork); 
1090:   PetscFree(mfqP->xmin); 
1091:   PetscFree(mfqP->C); 
1092:   PetscFree(mfqP->Fdiff); 
1093:   PetscFree(mfqP->Disp); 
1094:   PetscFree(mfqP->Gres); 
1095:   PetscFree(mfqP->Hres); 
1096:   PetscFree(mfqP->Gpoints); 
1097:   PetscFree(mfqP->model_indices); 
1098:   PetscFree(mfqP->Xsubproblem); 
1099:   PetscFree(mfqP->Gdel); 
1100:   PetscFree(mfqP->Hdel); 
1101:   PetscFree(mfqP->indices); 
1102:   PetscFree(mfqP->iwork); 
1103:   
1104:   for (i=0;i<mfqP->nHist;i++) {
1105:       VecDestroy(&mfqP->Xhist[i]); 
1106:       VecDestroy(&mfqP->Fhist[i]); 
1107:   }
1108:   if (mfqP->workxvec) {
1109:     VecDestroy(&mfqP->workxvec); 
1110:   }
1111:   PetscFree(mfqP->Xhist); 
1112:   PetscFree(mfqP->Fhist); 

1114:   if (mfqP->mpisize > 1) {
1115:       VecDestroy(&mfqP->localx);  
1116:       VecDestroy(&mfqP->localxmin);  
1117:       VecDestroy(&mfqP->localf);  
1118:       VecDestroy(&mfqP->localfmin);  
1119:   }



1123:   if (tao->data) {
1124:     PetscFree(tao->data); 
1125:   }
1126:   tao->data = PETSC_NULL;
1127:   return(0);
1128: }

1132: static PetscErrorCode TaoSetFromOptions_POUNDERS(TaoSolver tao)
1133: {
1134:   TAO_POUNDERS *mfqP = (TAO_POUNDERS*)tao->data;

1138:   PetscOptionsHead("POUNDERS method for least-squares optimization"); 
1139:   PetscOptionsReal("-tao_pounders_delta","initial delta","",mfqP->delta,&mfqP->delta,0); 
1140:   mfqP->npmax = PETSC_DEFAULT;
1141:   PetscOptionsInt("-tao_pounders_npmax","max number of points in model","",mfqP->npmax,&mfqP->npmax,0); 
1142:   mfqP->usegqt = PETSC_FALSE;
1143:   PetscOptionsBool("-tao_pounders_gqt","use gqt algorithm for subproblem","",mfqP->usegqt,&mfqP->usegqt,0); 
1144:   PetscOptionsTail(); 
1145:   return(0);
1146: }

1150: static PetscErrorCode TaoView_POUNDERS(TaoSolver tao, PetscViewer viewer)
1151: {
1152:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1153:   PetscBool isascii;


1158:   PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
1159:   if (isascii) {
1160:     PetscViewerASCIIPushTab(viewer); 
1161:     if (mfqP->usegqt) {
1162:       PetscViewerASCIIPrintf(viewer, "Subproblem solver: gqt\n"); 
1163:     } else {
1164:       PetscViewerASCIIPrintf(viewer, "Subproblem solver: bqpip\n"); 
1165:     }      
1166:     PetscViewerASCIIPopTab(viewer); 
1167:   } else {
1168:     SETERRQ1(((PetscObject)tao)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for TAO NLS",((PetscObject)viewer)->type_name);
1169:   }
1170:   return(0);
1171: }

1176: PetscErrorCode TaoCreate_POUNDERS(TaoSolver tao)
1177: {
1178:   TAO_POUNDERS *mfqP = (TAO_POUNDERS*)tao->data;
1180:   

1183:   tao->ops->setup = TaoSetUp_POUNDERS;
1184:   tao->ops->solve = TaoSolve_POUNDERS;
1185:   tao->ops->view = TaoView_POUNDERS;
1186:   tao->ops->setfromoptions = TaoSetFromOptions_POUNDERS;
1187:   tao->ops->destroy = TaoDestroy_POUNDERS;


1190:   PetscNewLog(tao, TAO_POUNDERS, &mfqP); 
1191:   tao->data = (void*)mfqP;
1192:   tao->max_it = 2000;
1193:   tao->max_funcs = 4000;
1194:   tao->fatol = 1e-8;
1195:   tao->frtol = 1e-8;
1196:   mfqP->delta = 0.1;
1197:   mfqP->deltamax=1e3;
1198:   mfqP->deltamin=1e-6;
1199:   mfqP->c2 = 100.0;
1200:   mfqP->theta1=1e-5;
1201:   mfqP->theta2=1e-4;
1202:   mfqP->gamma0=0.5;
1203:   mfqP->gamma1=2.0;
1204:   mfqP->eta0 = 0.0;
1205:   mfqP->eta1 = 0.1;
1206:   mfqP->gqt_rtol = 0.001;
1207:   mfqP->gqt_maxits = 50;
1208:   mfqP->workxvec = 0;
1209:   return(0);
1210: }