Actual source code: elliptic.c

  1: #include "private/taosolver_impl.h"
  2: #include "src/pde_constrained/impls/lcl/lcl.h"
  3: #include "petsctime.h"


  6: /*T
  7:    Concepts: TAO - Solving a system of nonlinear equations, nonlinear ;east squares
  8:    Routines: TaoInitialize(); TaoFinalize(); 
  9:    Routines: TaoCreate();
 10:    Routines: TaoSetType(); 
 11:    Routines: TaoSetInitialVector();
 12:    Routines: TaoSetObjectiveRoutine();
 13:    Routines: TaoSetGradientRoutine();
 14:    Routines: TaoSetConstraintsRoutine();
 15:    Routines: TaoSetJacobianStateRoutine();
 16:    Routines: TaoSetJacobianDesignRoutine();
 17:    Routines: TaoSetStateDesignIS();
 18:    Routines: TaoSetFromOptions();
 19:    Routines: TaoSetHistory(); TaoGetHistory();
 20:    Routines: TaoSolve();
 21:    Routines: TaoGetTerminationReason(); TaoDestroy(); 
 22:    Processors: n
 23: T*/

 25: typedef struct {
 26:   PetscInt n; /* Number of total variables */
 27:   PetscInt m; /* Number of constraints */
 28:   PetscInt nstate;
 29:   PetscInt ndesign;
 30:   PetscInt mx; /* grid points in each direction */
 31:   PetscInt ns; /* Number of data samples (1<=ns<=8) 
 32:                   Currently only ns=1 is supported */
 33:   PetscInt ndata; /* Number of data points per sample */
 34:   IS s_is;
 35:   IS d_is;

 37:   VecScatter state_scatter;
 38:   VecScatter design_scatter;
 39:   VecScatter *yi_scatter, *di_scatter;
 40:   Vec suby,subq,subd;
 41:   Mat Js,Jd,JsPrec,JsInv,JsBlock;

 43:   PetscReal alpha; /* Regularization parameter */
 44:   PetscReal beta; /* Weight attributed to ||u||^2 in regularization functional */
 45:   PetscReal noise; /* Amount of noise to add to data */
 46:   PetscReal *ones;
 47:   Mat Q;
 48:   Mat MQ; 
 49:   Mat L;

 51:   Mat Grad;
 52:   Mat Av,Avwork;
 53:   Mat Div, Divwork;
 54:   Mat DSG;
 55:   Mat Diag,Ones;
 56:   

 58:   Vec q;
 59:   Vec ur; /* reference */

 61:   Vec d;
 62:   Vec dwork;

 64:   Vec x; /* super vec of y,u */

 66:   Vec y; /* state variables */
 67:   Vec ywork;

 69:   Vec ytrue;

 71:   Vec u; /* design variables */
 72:   Vec uwork;

 74:   Vec utrue;
 75:  
 76:   Vec js_diag;
 77:   
 78:   Vec c; /* constraint vector */
 79:   Vec cwork;
 80:   
 81:   Vec lwork;
 82:   Vec S;
 83:   Vec Swork,Twork,Sdiag,Ywork;
 84:   Vec Av_u;

 86:   KSP solver;
 87:   PC prec;

 89:   PetscReal tola,tolb,tolc,told;
 90:   PetscInt ksp_its;
 91:   PetscInt ksp_its_initial;
 92:   PetscInt stages[10];
 93:   PetscBool use_ptap;
 94:   PetscBool use_lrc;
 95:   PetscReal tau[4];
 96:   TAO_LCL* lcl;

 98: } AppCtx;

100: PetscErrorCode FormFunction(TaoSolver, Vec, PetscReal*, void*);
101: PetscErrorCode FormGradient(TaoSolver, Vec, Vec, void*);
102: PetscErrorCode FormFunctionGradient(TaoSolver, Vec, PetscReal*, Vec, void*);
103: PetscErrorCode FormJacobianState(TaoSolver, Vec, Mat*, Mat*, Mat*, MatStructure*,void*);
104: PetscErrorCode FormJacobianDesign(TaoSolver, Vec, Mat*,void*);
105: PetscErrorCode FormConstraints(TaoSolver, Vec, Vec, void*);
106: PetscErrorCode FormHessian(TaoSolver, Vec, Mat*, Mat*, MatStructure*, void*);
107: PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
108: PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
109: PetscErrorCode EllipticInitialize(AppCtx *user);
110: PetscErrorCode EllipticDestroy(AppCtx *user);
111: PetscErrorCode EllipticMonitor(TaoSolver, void*);

113: PetscErrorCode StateBlockMatMult(Mat,Vec,Vec);
114: PetscErrorCode StateMatMult(Mat,Vec,Vec);

116: PetscErrorCode StateInvMatMult(Mat,Vec,Vec);
117: PetscErrorCode DesignMatMult(Mat,Vec,Vec);
118: PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);

120: PetscErrorCode QMatMult(Mat,Vec,Vec);
121: PetscErrorCode QMatMultTranspose(Mat,Vec,Vec);

123: static  char help[]="";

127: int main(int argc, char **argv)
128: {
130:   Vec x0;
131:   
132:   TaoSolver tao;
133:   TaoSolverTerminationReason reason;
134:   AppCtx user;
135:   PetscBool flag,showtime;
136:   PetscInt ntests = 1;
137:   PetscLogDouble v1,v2;
138:   PetscInt i;


141:   PetscInitialize(&argc, &argv, (char*)0,help);
142:   TaoInitialize(&argc, &argv, (char*)0,help);
143:   showtime = PETSC_FALSE;
144:   PetscOptionsBool("-showtime","Display time elapsed","",showtime,&showtime,&flag); 
145:   user.mx = 8;
146:   PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,&flag); 
147:   user.ns = 6;
148:   PetscOptionsInt("-ns","Number of data samples (1<=ns<=8)","",user.ns,&user.ns,&flag); 
149:   user.ndata = 64;
150:   PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,&flag); 
151:   user.alpha = 0.1;
152:   PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,&flag); 
153:   user.beta = 0.00001;
154:   PetscOptionsReal("-beta","Weight attributed to ||u||^2 in regularization functional","",user.beta,&user.beta,&flag); 
155:   user.noise = 0.01;
156:   PetscOptionsReal("-noise","Amount of noise to add to data","",user.noise,&user.noise,&flag); 
157:   user.tau[0] = user.tau[1] = user.tau[2] = user.tau[3] = 1.0e-4;
158:   PetscOptionsReal("-tola","Tolerance for first forward solve","",user.tau[0],&user.tau[0],&flag); 
159:   PetscOptionsReal("-tolb","Tolerance for first adjoint solve","",user.tau[1],&user.tau[1],&flag); 
160:   PetscOptionsReal("-tolc","Tolerance for second forward solve","",user.tau[2],&user.tau[2],&flag); 
161:   PetscOptionsReal("-told","Tolerance for second adjoint solve","",user.tau[3],&user.tau[3],&flag); 

163:   PetscOptionsBool("-use_ptap","Use ptap matrix for DSG","",PETSC_FALSE,&user.use_ptap,&flag); 
164:   PetscOptionsBool("-use_lrc","Use lrc matrix for Js","",PETSC_FALSE,&user.use_lrc,&flag); 

166:   user.m = user.ns*user.mx*user.mx*user.mx; /* number of constraints */
167:   user.nstate =  user.m;
168:   user.ndesign = user.mx*user.mx*user.mx;
169:   user.n = user.nstate + user.ndesign; /* number of variables */




174:   /* Create TAO solver and set desired solution method */
175:   TaoCreate(PETSC_COMM_WORLD,&tao); 
176:   TaoSetType(tao,"tao_lcl"); 
177:   user.lcl = (TAO_LCL*)(tao->data); 

179:   /* Set up initial vectors and matrices */
180:   EllipticInitialize(&user); 


183:   Gather(user.x,user.y,user.state_scatter,user.u,user.design_scatter); 
184:   VecDuplicate(user.x,&x0); 
185:   VecCopy(user.x,x0); 
186:   


189:   /* Set solution vector with an initial guess */
190:   TaoSetInitialVector(tao,user.x); 
191:   TaoSetObjectiveRoutine(tao, FormFunction, (void *)&user); 
192:   TaoSetGradientRoutine(tao, FormGradient, (void *)&user); 
193:   TaoSetConstraintsRoutine(tao, user.c, FormConstraints, (void *)&user); 
194:  
195:   TaoSetJacobianStateRoutine(tao, user.Js, PETSC_NULL, user.JsInv, FormJacobianState, (void *)&user);  
196:   TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, (void *)&user); 

198:   TaoSetStateDesignIS(tao,user.s_is,user.d_is); 
199:   TaoSetFromOptions(tao); 
200:   


203:   /* SOLVE THE APPLICATION */
204:   PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,&flag); 
205:   PetscLogStageRegister("Trials",&user.stages[1]); 
206:   PetscLogStagePush(user.stages[1]); 
207:   for (i=0; i<ntests; i++){
208:     PetscGetTime(&v1); 
209:     TaoSolve(tao);  
210:     PetscGetTime(&v2); 
211:     if (showtime) {
212:       PetscPrintf(PETSC_COMM_WORLD,"Elapsed time = %G\n",v2-v1); 
213:     }
214:     PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %D\n",user.ksp_its); 
215:     VecCopy(x0,user.x); 
216:   }
217:   PetscLogStagePop(); 
218:   PetscBarrier((PetscObject)user.x); 
219:   PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: ");
220:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its_initial);

222:   TaoGetTerminationReason(tao,&reason); 

224:   if (reason < 0)
225:   {
226:     PetscPrintf(MPI_COMM_WORLD, "TAO failed to converge.\n");
227:   }
228:   else
229:   {
230:     PetscPrintf(MPI_COMM_WORLD, "Optimization terminated with status %D.\n", reason);
231:   }


234:   /* Free TAO data structures */
235:   TaoDestroy(&tao); 

237:   /* Free PETSc data structures */
238:   VecDestroy(&x0); 

240:   EllipticDestroy(&user); 

242:   /* Finalize TAO, PETSc */
243:   TaoFinalize();
244:   PetscFinalize(); 

246:   return 0;
247: }
248: /* ------------------------------------------------------------------- */
251: /* 
252:    dwork = Qy - d  
253:    lwork = L*(u-ur)
254:    f = 1/2 * (dwork.dwork + alpha*lwork.lwork)
255: */
256: PetscErrorCode FormFunction(TaoSolver tao,Vec X,PetscReal *f,void *ptr)
257: {
259:   PetscReal d1=0,d2=0;
260:   AppCtx *user = (AppCtx*)ptr;
262:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
263:   MatMult(user->MQ,user->y,user->dwork); 
264:   VecAXPY(user->dwork,-1.0,user->d); 
265:   VecDot(user->dwork,user->dwork,&d1); 

267:   VecWAXPY(user->uwork,-1.0,user->ur,user->u); 
268:   MatMult(user->L,user->uwork,user->lwork); 
269:   VecDot(user->lwork,user->lwork,&d2); 
270:   
271:   *f = 0.5 * (d1 + user->alpha*d2); 
272:   return(0);
273: }

275: /* ------------------------------------------------------------------- */
278: /*  
279:     state: g_s = Q' *(Qy - d)
280:     design: g_d = alpha*L'*L*(u-ur)
281: */
282: PetscErrorCode FormGradient(TaoSolver tao,Vec X,Vec G,void *ptr)
283: {
285:   AppCtx *user = (AppCtx*)ptr;
287:   CHKMEMQ;
288:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
289:   MatMult(user->MQ,user->y,user->dwork); 
290:   VecAXPY(user->dwork,-1.0,user->d); 

292:   MatMultTranspose(user->MQ,user->dwork,user->ywork); 
293:   
294:   VecWAXPY(user->uwork,-1.0,user->ur,user->u); 
295:   MatMult(user->L,user->uwork,user->lwork); 
296:   MatMultTranspose(user->L,user->lwork,user->uwork); 
297:   VecScale(user->uwork, user->alpha); 

299:                       
300:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter); 
301:   CHKMEMQ;
302:   return(0);
303: }

307: PetscErrorCode FormFunctionGradient(TaoSolver tao, Vec X, PetscReal *f, Vec G, void *ptr)
308: {
310:   PetscReal d1,d2;
311:   AppCtx *user = (AppCtx*)ptr;
313:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 

315:   MatMult(user->MQ,user->y,user->dwork); 
316:   VecAXPY(user->dwork,-1.0,user->d); 
317:   VecDot(user->dwork,user->dwork,&d1); 
318:   MatMultTranspose(user->MQ,user->dwork,user->ywork); 

320:   VecWAXPY(user->uwork,-1.0,user->ur,user->u); 
321:   MatMult(user->L,user->uwork,user->lwork); 
322:   VecDot(user->lwork,user->lwork,&d2); 
323:   MatMultTranspose(user->L,user->lwork,user->uwork); 
324:   VecScale(user->uwork, user->alpha); 
325:   *f = 0.5 * (d1 + user->alpha*d2); 

327:   
328:   
329:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter); 
330:   return(0);

332: }


335: /* ------------------------------------------------------------------- */
338: /* A 
339: MatShell object
340: */
341: PetscErrorCode FormJacobianState(TaoSolver tao, Vec X, Mat *J, Mat* JPre, Mat* JInv, MatStructure* flag, void *ptr)
342: {
344:   AppCtx *user = (AppCtx*)ptr;

347:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
348:   /* DSG = Div * (1/Av_u) * Grad */
349:   VecSet(user->uwork,0); 
350:   VecAXPY(user->uwork,-1.0,user->u); 
351:   VecExp(user->uwork); 
352:   MatMult(user->Av,user->uwork,user->Av_u); 
353:   VecCopy(user->Av_u,user->Swork);  
354:   VecReciprocal(user->Swork); 
355:   if (user->use_ptap) {
356:     MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES); 
357:     MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG);
358:   } else {
359:     MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN); 
360:     MatDiagonalScale(user->Divwork,PETSC_NULL,user->Swork); 
361:     MatMatMultNumeric(user->Divwork,user->Grad,user->DSG); 
362:   }
363:   *flag = SAME_NONZERO_PATTERN;
364:   return(0);
365: }
366: /* ------------------------------------------------------------------- */
369: /* B */
370: PetscErrorCode FormJacobianDesign(TaoSolver tao, Vec X, Mat *J, void *ptr)
371: {
373:   AppCtx *user = (AppCtx*)ptr;

376:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
377:   *J = user->Jd;

379:   return(0);
380: }

384: PetscErrorCode StateBlockMatMult(Mat J_shell, Vec X, Vec Y) 
385: {
387:   PetscReal sum;
388:   void *ptr;
389:   AppCtx *user;
391:   MatShellGetContext(J_shell,&ptr); 
392:   user = (AppCtx*)ptr;
393:   MatMult(user->DSG,X,Y);
394:   VecSum(X,&sum); 
395:   sum /= user->ndesign;
396:   VecShift(Y,sum); 
397:   return(0);
398: }



404: PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y) 
405: {
407:   void *ptr;
408:   PetscInt i;
409:   AppCtx *user;
411:   MatShellGetContext(J_shell,&ptr); 
412:   user = (AppCtx*)ptr;
413:   if (user->ns == 1) {
414:     MatMult(user->JsBlock,X,Y); 
415:   } else {
416:     for (i=0;i<user->ns;i++) {
417:       Scatter(X,user->subq,user->yi_scatter[i],0,0); 
418:       Scatter(Y,user->suby,user->yi_scatter[i],0,0); 
419:       MatMult(user->JsBlock,user->subq,user->suby); 
420:       Gather(X,user->subq,user->yi_scatter[i],0,0); 
421:       Gather(Y,user->suby,user->yi_scatter[i],0,0); 

423:     }
424:   }
425:   return(0);
426: }

430: PetscErrorCode StateInvMatMult(Mat J_shell, Vec X, Vec Y) 
431: {
433:   PetscInt its,i;
434:   PetscReal tau;
435:   void *ptr;
436:   AppCtx *user;
438:   MatShellGetContext(J_shell,&ptr); 
439:   user = (AppCtx*)ptr;
440:   KSPSetOperators(user->solver,user->JsBlock,user->DSG,SAME_NONZERO_PATTERN); 

442:   if (Y == user->ytrue) {
443:     KSPSetTolerances(user->solver,1e-8,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
444:   } else if (user->lcl) {
445:     tau = user->tau[user->lcl->solve_type];
446:     KSPSetTolerances(user->solver,tau,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
447:   }

449:   if (user->ns == 1) {
450:     KSPSolve(user->solver,X,Y); 
451:     KSPGetIterationNumber(user->solver,&its); 
452:     user->ksp_its+=its;
453:   } else {
454:     for (i=0;i<user->ns;i++) {
455:       Scatter(X,user->subq,user->yi_scatter[i],0,0); 
456:       Scatter(Y,user->suby,user->yi_scatter[i],0,0); 
457:       KSPSolve(user->solver,user->subq,user->suby); 
458:       KSPGetIterationNumber(user->solver,&its); 
459:       user->ksp_its+=its;
460:       Gather(X,user->subq,user->yi_scatter[i],0,0); 
461:       Gather(Y,user->suby,user->yi_scatter[i],0,0); 
462:     }    
463:   }
464:   

466:   return(0);
467: }
470: PetscErrorCode QMatMult(Mat J_shell, Vec X, Vec Y)
471: {
473:   void *ptr;
474:   AppCtx *user;
475:   PetscInt i;
477:   MatShellGetContext(J_shell,&ptr); 
478:   user = (AppCtx*)ptr;
479:   if (user->ns == 1) {
480:     MatMult(user->Q,X,Y); 
481:   } else {
482:     for (i=0;i<user->ns;i++) {
483:       Scatter(X,user->subq,user->yi_scatter[i],0,0); 
484:       Scatter(Y,user->subd,user->di_scatter[i],0,0); 
485:       MatMult(user->Q,user->subq,user->subd); 
486:       Gather(X,user->subq,user->yi_scatter[i],0,0); 
487:       Gather(Y,user->subd,user->di_scatter[i],0,0); 
488:     }
489:   }
490:     
491:   return(0);

493: }

497: PetscErrorCode QMatMultTranspose(Mat J_shell, Vec X, Vec Y)
498: {
500:   void *ptr;
501:   AppCtx *user;
502:   PetscInt i;
504:   MatShellGetContext(J_shell,&ptr); 
505:   user = (AppCtx*)ptr;
506:   if (user->ns == 1) {
507:       MatMultTranspose(user->Q,X,Y); 
508:   } else {
509:     for (i=0;i<user->ns;i++) {
510:       Scatter(X,user->subd,user->di_scatter[i],0,0); 
511:       Scatter(Y,user->suby,user->yi_scatter[i],0,0); 
512:       MatMultTranspose(user->Q,user->subd,user->suby); 
513:       Gather(X,user->subd,user->di_scatter[i],0,0); 
514:       Gather(Y,user->suby,user->yi_scatter[i],0,0); 
515:     }
516:   }
517:   return(0);

519: }

523: PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y) 
524: {
526:   void *ptr;
527:   PetscInt i;
528:   AppCtx *user;
530:   MatShellGetContext(J_shell,&ptr); 
531:   user = (AppCtx*)ptr;

533:   /* sdiag(1./v) */ 
534:   VecSet(user->uwork,0); 
535:   VecAXPY(user->uwork,-1.0,user->u); 
536:   VecExp(user->uwork);   

538:   /* sdiag(1./((Av*(1./v)).^2)) */
539:   MatMult(user->Av,user->uwork,user->Swork); 
540:   VecPointwiseMult(user->Swork,user->Swork,user->Swork); 
541:   VecReciprocal(user->Swork);  

543:   /* (Av * (sdiag(1./v) * b)) */ 
544:   VecPointwiseMult(user->uwork,user->uwork,X); 
545:   MatMult(user->Av,user->uwork,user->Twork); 

547:   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
548:   VecPointwiseMult(user->Swork,user->Twork,user->Swork);  

550:   if (user->ns == 1) {
551:     /* (sdiag(Grad*y(:,i)) */
552:     MatMult(user->Grad,user->y,user->Twork); 
553:   
554:     /* Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
555:     VecPointwiseMult(user->Swork,user->Twork,user->Swork);  
556:     MatMultTranspose(user->Grad,user->Swork,Y); 
557:   } else {
558:     for (i=0;i<user->ns;i++) {
559:       Scatter(user->y,user->suby,user->yi_scatter[i],0,0); 
560:       Scatter(Y,user->subq,user->yi_scatter[i],0,0); 
561:   
562:       MatMult(user->Grad,user->suby,user->Twork); 
563:       VecPointwiseMult(user->Twork,user->Twork,user->Swork); 
564:       MatMultTranspose(user->Grad,user->Twork,user->subq); 
565:       Gather(user->y,user->suby,user->yi_scatter[i],0,0); 
566:       Gather(Y,user->subq,user->yi_scatter[i],0,0); 


569:     }
570:   }
571:   return(0);
572: }

576: PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y) 
577: {
579:   void *ptr;
580:   PetscInt i;
581:   AppCtx *user;
583:   MatShellGetContext(J_shell,&ptr); 
584:   user = (AppCtx*)ptr;

586:   VecZeroEntries(Y); 

588:   /* Sdiag = 1./((Av*(1./v)).^2) */
589:   VecSet(user->uwork,0); 
590:   VecAXPY(user->uwork,-1.0,user->u); 
591:   VecExp(user->uwork); 
592:   MatMult(user->Av,user->uwork,user->Swork); 
593:   VecPointwiseMult(user->Sdiag,user->Swork,user->Swork); 
594:   VecReciprocal(user->Sdiag); 
595:   
596:   for (i=0;i<user->ns;i++) {
597:     Scatter(X,user->subq,user->yi_scatter[i],0,0); 
598:     Scatter(user->y,user->suby,user->yi_scatter[i],0,0); 

600:     /* Swork = (Div' * b(:,i)) */
601:     MatMult(user->Grad,user->subq,user->Swork); 

603:     /* Twork = Grad*y(:,i) */
604:     MatMult(user->Grad,user->suby,user->Twork); 

606:     /* Twork = sdiag(Twork) * Swork */
607:     VecPointwiseMult(user->Twork,user->Swork,user->Twork); 
608:   

610:     /* Swork = pointwisemult(Sdiag,Twork) */
611:     VecPointwiseMult(user->Swork,user->Twork,user->Sdiag); 

613:     /* Ywork = Av' * Swork */
614:     MatMultTranspose(user->Av,user->Swork,user->Ywork); 
615:   
616:     /* Ywork = pointwisemult(uwork,Ywork) */
617:     VecPointwiseMult(user->Ywork,user->uwork,user->Ywork); 
618:     
619:     VecAXPY(Y,1.0,user->Ywork); 
620:     
621:     Gather(X,user->subq,user->yi_scatter[i],0,0); 
622:     Gather(user->y,user->suby,user->yi_scatter[i],0,0); 
623:   }  

625:   return(0);
626: }

630: PetscErrorCode FormConstraints(TaoSolver tao, Vec X, Vec C, void *ptr)
631: {
632:    /* C=Ay - q      A = Div * Sigma * Grad + hx*hx*hx*ones(n,n) */
634:    PetscReal sum;
635:    PetscInt i;
636:    AppCtx *user = (AppCtx*)ptr;
638:    
639:    Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
640:    
641:    if (user->ns == 1) {
642:      MatMult(user->Grad,user->y,user->Swork);  
643:      VecPointwiseDivide(user->Swork,user->Swork,user->Av_u); 
644:      MatMultTranspose(user->Grad,user->Swork,C);  
645:  
646:      VecSum(user->y,&sum); 
647:      sum /= user->ndesign;
648:      VecShift(C,sum); 

650:    } else {
651:      for (i=0;i<user->ns;i++) {
652:       Scatter(user->y,user->suby,user->yi_scatter[i],0,0); 
653:       Scatter(C,user->subq,user->yi_scatter[i],0,0); 
654:       MatMult(user->Grad,user->suby,user->Swork);  
655:       VecPointwiseDivide(user->Swork,user->Swork,user->Av_u); 
656:       MatMultTranspose(user->Grad,user->Swork,user->subq);  
657:  

659:       VecSum(user->suby,&sum); 
660:       sum /= user->ndesign;
661:       VecShift(user->subq,sum); 

663:       Gather(user->y,user->suby,user->yi_scatter[i],0,0); 
664:       Gather(C,user->subq,user->yi_scatter[i],0,0); 
665:      }
666:    }
667:    VecAXPY(C,-1.0,user->q); 
668:    return(0);
669: }

673: PetscErrorCode Scatter(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
674: {
677:   VecScatterBegin(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD); 
678:   VecScatterEnd(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD); 
679:   if (sub2) {
680:     VecScatterBegin(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD); 
681:     VecScatterEnd(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD); 
682:   }
683:   return(0);
684: }

688: PetscErrorCode Gather(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
689: {
692:   VecScatterBegin(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE); 
693:   VecScatterEnd(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE); 
694:   if (sub2) {
695:     VecScatterBegin(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE); 
696:     VecScatterEnd(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE); 
697:   }
698:   return(0);
699: }
700:   
701:     
704: PetscErrorCode EllipticInitialize(AppCtx *user)
705: {
707:   PetscInt m,n,i,j,k,l,linear_index,is,js,ks,ls,istart,iend,iblock;
708:   Vec XX,YY,ZZ,XXwork,YYwork,ZZwork,UTwork;
709:   PetscReal *x,*y,*z;
710:   PetscReal h,meanut;
711:   PetscScalar PI = 3.141592653589793238,hinv,neg_hinv,half = 0.5,sqrt_beta;
712:   PetscInt im,indx1,indx2,indy1,indy2,indz1,indz2,nx,ny,nz;
713:   IS is_alldesign,is_allstate;
714:   IS is_from_d;
715:   IS is_from_y;
716:   PetscInt lo,hi,hi2,lo2,ysubnlocal,dsubnlocal;
717:   const PetscInt *ranges, *subranges;
718:   PetscMPIInt mpisize,mpirank;
719:   PetscReal xri,yri,zri,xim,yim,zim,dx1,dx2,dy1,dy2,dz1,dz2,Dx,Dy,Dz;
720:   PetscScalar v,vx,vy,vz;
721:   PetscInt offset,subindex,subvec,nrank,kk;
722:   /* Data locations */
723:   PetscScalar xr[64] = {0.4970,     0.8498,     0.7814,     0.6268,     0.7782,     0.6402,     0.3617,     0.3160,     
724:                         0.3610,     0.5298,     0.6987,     0.3331,     0.7962,     0.5596,     0.3866,     0.6774,     
725:                         0.5407,     0.4518,     0.6702,     0.6061,     0.7580,     0.8997,     0.5198,     0.8326,     
726:                         0.2138,     0.9198,     0.3000,     0.2833,     0.8288,     0.7076,     0.1820,     0.0728,     
727:                         0.8447,     0.2367,     0.3239,     0.6413,     0.3114,     0.4731,     0.1192,     0.9273,     
728:                         0.5724,     0.4331,     0.5136,     0.3547,     0.4413,     0.2602,     0.5698,     0.7278,     
729:                         0.5261,     0.6230,     0.2454,     0.3948,     0.7479,     0.6582,     0.4660,     0.5594,     
730:                         0.7574,     0.1143,     0.5900,     0.1065,     0.4260,     0.3294,     0.8276,     0.0756};
731:   
732:   PetscScalar yr[64] = {0.7345,     0.9120,     0.9288,     0.7528,     0.4463,     0.4985,     0.2497,     0.6256,     
733:                         0.3425,     0.9026,     0.6983,     0.4230,     0.7140,     0.2970,     0.4474,     0.8792,     
734:                         0.6604,     0.2485,     0.7968,     0.6127,     0.1796,     0.2437,     0.5938,     0.6137,     
735:                         0.3867,     0.5658,     0.4575,     0.1009,     0.0863,     0.3361,     0.0738,     0.3985,     
736:                         0.6602,     0.1437,     0.0934,     0.5983,     0.5950,     0.0763,     0.0768,     0.2288,     
737:                         0.5761,     0.1129,     0.3841,     0.6150,     0.6904,     0.6686,     0.1361,     0.4601,     
738:                         0.4491,     0.3716,     0.1969,     0.6537,     0.6743,     0.6991,     0.4811,     0.5480,     
739:                         0.1684,     0.4569,     0.6889,     0.8437,     0.3015,     0.2854,     0.8199,     0.2658};
740:   
741:   PetscScalar zr[64] = {0.7668,     0.8573,     0.2654,     0.2719,     0.1060,     0.1311,     0.6232,     0.2295,     
742:                         0.8009,     0.2147,     0.2119,     0.9325,     0.4473,     0.3600,     0.3374,     0.3819,     
743:                         0.4066,     0.5801,     0.1673,     0.0959,     0.4638,     0.8236,     0.8800,     0.2939,     
744:                         0.2028,     0.8262,     0.2706,     0.6276,     0.9085,     0.6443,     0.8241,     0.0712,     
745:                         0.1824,     0.7789,     0.4389,     0.8415,     0.7055,     0.6639,     0.3653,     0.2078,     
746:                         0.1987,     0.2297,     0.4321,     0.8115,     0.4915,     0.7764,     0.4657,     0.4627,     
747:                         0.4569,     0.4232,     0.8514,     0.0674,     0.3227,     0.1055,     0.6690,     0.6313,     
748:                         0.9226,     0.5461,     0.4126,     0.2364,     0.6096,     0.7042,     0.3914,     0.0711};

751:   MPI_Comm_size(PETSC_COMM_WORLD,&mpisize); 
752:   MPI_Comm_rank(PETSC_COMM_WORLD,&mpirank); 
753:   PetscLogStageRegister("Elliptic Setup",&user->stages[0]); 
754:   PetscLogStagePush(user->stages[0]); 

756:   /* Create u,y,c,x */
757:   VecCreate(PETSC_COMM_WORLD,&user->u); 
758:   VecCreate(PETSC_COMM_WORLD,&user->y); 
759:   VecCreate(PETSC_COMM_WORLD,&user->c); 
760:   VecSetSizes(user->u,PETSC_DECIDE,user->ndesign); 
761:   VecSetFromOptions(user->u); 
762:   VecGetLocalSize(user->u,&ysubnlocal); 
763:   VecSetSizes(user->y,ysubnlocal*user->ns,user->nstate); 
764:   VecSetSizes(user->c,ysubnlocal*user->ns,user->m); 
765:   VecSetFromOptions(user->y); 
766:   VecSetFromOptions(user->c); 

768:   /* 
769:      *******************************
770:      Create scatters for x <-> y,u
771:      *******************************

773:      If the state vector y and design vector u are partitioned as 
774:      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
775:      then the solution vector x is organized as
776:      [y_1; u_1; y_2; u_2; ...; y_np; u_np]. 
777:      The index sets user->s_is and user->d_is correspond to the indices of the
778:      state and design variables owned by the current processor.
779:   */
780:   VecCreate(PETSC_COMM_WORLD,&user->x); 

782:   VecGetOwnershipRange(user->y,&lo,&hi); 
783:   VecGetOwnershipRange(user->u,&lo2,&hi2);  

785:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate); 
786:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user->s_is); 
787:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign); 
788:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user->d_is); 
789:   
790:   VecSetSizes(user->x,hi-lo+hi2-lo2,user->n); 
791:   VecSetFromOptions(user->x); 

793:   VecScatterCreate(user->x,user->s_is,user->y,is_allstate,&user->state_scatter); 
794:   VecScatterCreate(user->x,user->d_is,user->u,is_alldesign,&user->design_scatter); 
795:   ISDestroy(&is_alldesign); 
796:   ISDestroy(&is_allstate); 
797:   /* 
798:      *******************************
799:      Create scatter from y to y_1,y_2,...,y_ns 
800:      *******************************
801:   */
802:   PetscMalloc(user->ns*sizeof(VecScatter),&user->yi_scatter);
803:   VecDuplicate(user->u,&user->suby); 
804:   VecDuplicate(user->u,&user->subq); 

806:   VecGetOwnershipRange(user->y,&lo2,&hi2); 
807:   istart = 0;
808:   for (i=0; i<user->ns; i++){
809:     VecGetOwnershipRange(user->suby,&lo,&hi); 
810:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_y); 
811:     VecScatterCreate(user->y,is_from_y,user->suby,PETSC_NULL,&user->yi_scatter[i]); 
812:     istart = istart + hi-lo;
813:     ISDestroy(&is_from_y); 
814:   }
815:   /* 
816:      *******************************
817:      Create scatter from d to d_1,d_2,...,d_ns 
818:      *******************************
819:   */
820:   VecCreate(PETSC_COMM_WORLD,&user->subd); 
821:   VecSetSizes(user->subd,PETSC_DECIDE,user->ndata); 
822:   VecSetFromOptions(user->subd); 
823:   VecCreate(PETSC_COMM_WORLD,&user->d); 
824:   VecGetLocalSize(user->subd,&dsubnlocal); 
825:   VecSetSizes(user->d,dsubnlocal*user->ns,user->ndata*user->ns); 
826:   VecSetFromOptions(user->d); 
827:   PetscMalloc(user->ns*sizeof(VecScatter),&user->di_scatter);

829:   VecGetOwnershipRange(user->d,&lo2,&hi2); 
830:   istart = 0;
831:   for (i=0; i<user->ns; i++){
832:     VecGetOwnershipRange(user->subd,&lo,&hi); 
833:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_d); 
834:     VecScatterCreate(user->d,is_from_d,user->subd,PETSC_NULL,&user->di_scatter[i]); 
835:     istart = istart + hi-lo;
836:     ISDestroy(&is_from_d); 
837:   }
838:   
839:   PetscMalloc(user->mx*sizeof(PetscReal),&x); 
840:   PetscMalloc(user->mx*sizeof(PetscReal),&y); 
841:   PetscMalloc(user->mx*sizeof(PetscReal),&z); 

843:   user->ksp_its = 0;
844:   user->ksp_its_initial = 0;

846:   n = user->mx * user->mx * user->mx;
847:   m = 3 * user->mx * user->mx * (user->mx-1);
848:   sqrt_beta = PetscSqrtScalar(user->beta);


851:   VecCreate(PETSC_COMM_WORLD,&XX); 
852:   VecCreate(PETSC_COMM_WORLD,&user->q); 
853:   VecSetSizes(XX,ysubnlocal,n); 
854:   VecSetSizes(user->q,ysubnlocal*user->ns,user->m); 
855:   VecSetFromOptions(XX); 
856:   VecSetFromOptions(user->q); 

858:   VecDuplicate(XX,&YY); 
859:   VecDuplicate(XX,&ZZ); 
860:   VecDuplicate(XX,&XXwork); 
861:   VecDuplicate(XX,&YYwork); 
862:   VecDuplicate(XX,&ZZwork); 
863:   VecDuplicate(XX,&UTwork); 
864:   VecDuplicate(XX,&user->utrue); 

866:   /* map for striding q */
867:   /*
868:   PetscMalloc((mpisize+1)*sizeof(PetscInt),&ranges); 
869:   PetscMalloc((mpisize+1)*sizeof(PetscInt),&subranges);  */
870:   VecGetOwnershipRanges(user->q,&ranges); 
871:   VecGetOwnershipRanges(user->u,&subranges); 
872:   
873:   VecGetOwnershipRange(user->q,&lo2,&hi2); 
874:   VecGetOwnershipRange(user->u,&lo,&hi); 
875:   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
876:   h = 1.0/user->mx;
877:   hinv = user->mx;
878:   neg_hinv = -hinv;

880:   VecGetOwnershipRange(XX,&istart,&iend); 
881:   for (linear_index=istart; linear_index<iend; linear_index++){
882:     i = linear_index % user->mx;
883:     j = ((linear_index-i)/user->mx) % user->mx;
884:     k = ((linear_index-i)/user->mx-j) / user->mx;
885:     vx = h*(i+0.5);
886:     vy = h*(j+0.5);
887:     vz = h*(k+0.5);        
888:     VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES); 
889:     VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES); 
890:     VecSetValues(ZZ,1,&linear_index,&vz,INSERT_VALUES); 
891:     for (is=0; is<2; is++){
892:       for (js=0; js<2; js++){
893:         for (ks=0; ks<2; ks++){
894:           ls = is*4 + js*2 + ks;
895:           if (ls<user->ns){
896:             l =ls*n + linear_index;
897:             /* remap */
898:             subindex = l%n;
899:             subvec = l/n;
900:             nrank=0;
901:             while (subindex >= subranges[nrank+1]) nrank++;
902:             offset = subindex - subranges[nrank];
903:             istart=0;
904:             for (kk=0;kk<nrank;kk++) istart+=user->ns*(subranges[kk+1]-subranges[kk]);
905:             istart += (subranges[nrank+1]-subranges[nrank])*subvec;
906:             l = istart+offset;
907:             v = 100*PetscSinScalar(2*PI*(vx+0.25*is))*sin(2*PI*(vy+0.25*js))*sin(2*PI*(vz+0.25*ks));
908:             VecSetValues(user->q,1,&l,&v,INSERT_VALUES); 
909:           }
910:         }
911:       }
912:     }
913:   }

915:   VecAssemblyBegin(XX); 
916:   VecAssemblyEnd(XX); 
917:   VecAssemblyBegin(YY); 
918:   VecAssemblyEnd(YY); 
919:   VecAssemblyBegin(ZZ); 
920:   VecAssemblyEnd(ZZ); 
921:   VecAssemblyBegin(user->q); 
922:   VecAssemblyEnd(user->q);   
923:   
924:   /* Compute true parameter function
925:      ut = exp(-((x-0.25)^2+(y-0.25)^2+(z-0.25)^2)/0.05) - exp((x-0.75)^2-(y-0.75)^2-(z-0.75))^2/0.05) */
926:   VecCopy(XX,XXwork); 
927:   VecCopy(YY,YYwork); 
928:   VecCopy(ZZ,ZZwork); 

930:   VecShift(XXwork,-0.25); 
931:   VecShift(YYwork,-0.25); 
932:   VecShift(ZZwork,-0.25); 

934:   VecPointwiseMult(XXwork,XXwork,XXwork); 
935:   VecPointwiseMult(YYwork,YYwork,YYwork); 
936:   VecPointwiseMult(ZZwork,ZZwork,ZZwork); 

938:   VecCopy(XXwork,UTwork); 
939:   VecAXPY(UTwork,1.0,YYwork); 
940:   VecAXPY(UTwork,1.0,ZZwork); 
941:   VecScale(UTwork,-20.0); 
942:   VecExp(UTwork); 
943:   VecCopy(UTwork,user->utrue); 

945:   VecCopy(XX,XXwork); 
946:   VecCopy(YY,YYwork); 
947:   VecCopy(ZZ,ZZwork); 

949:   VecShift(XXwork,-0.75); 
950:   VecShift(YYwork,-0.75); 
951:   VecShift(ZZwork,-0.75); 

953:   VecPointwiseMult(XXwork,XXwork,XXwork); 
954:   VecPointwiseMult(YYwork,YYwork,YYwork); 
955:   VecPointwiseMult(ZZwork,ZZwork,ZZwork); 

957:   VecCopy(XXwork,UTwork); 
958:   VecAXPY(UTwork,1.0,YYwork); 
959:   VecAXPY(UTwork,1.0,ZZwork); 
960:   VecScale(UTwork,-20.0); 
961:   VecExp(UTwork); 

963:   VecAXPY(user->utrue,-1.0,UTwork); 

965:   VecDestroy(&XX); 
966:   VecDestroy(&YY); 
967:   VecDestroy(&ZZ); 
968:   VecDestroy(&XXwork); 
969:   VecDestroy(&YYwork); 
970:   VecDestroy(&ZZwork); 
971:   VecDestroy(&UTwork); 
972:  
973:   /* Initial guess and reference model */
974:   VecDuplicate(user->utrue,&user->ur); 
975:   VecSum(user->utrue,&meanut); 
976:   meanut = meanut / n;
977:   VecSet(user->ur,meanut); 
978:   VecCopy(user->ur,user->u); 

980:   /* Generate Grad matrix */
981:   MatCreate(PETSC_COMM_WORLD,&user->Grad); 
982:   MatSetSizes(user->Grad,PETSC_DECIDE,ysubnlocal,m,n); 
983:   MatSetFromOptions(user->Grad); 
984:   MatMPIAIJSetPreallocation(user->Grad,2,PETSC_NULL,2,PETSC_NULL); 
985:   MatSeqAIJSetPreallocation(user->Grad,2,PETSC_NULL); 
986:   MatGetOwnershipRange(user->Grad,&istart,&iend);

988:   for (i=istart; i<iend; i++){
989:     if (i<m/3){
990:       iblock = i / (user->mx-1);
991:       j = iblock*user->mx + (i % (user->mx-1));
992:       MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
993:       j = j+1;
994:       MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES); 
995:     }
996:     if (i>=m/3 && i<2*m/3){
997:       iblock = (i-m/3) / (user->mx*(user->mx-1));
998:       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
999:       MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
1000:       j = j + user->mx;
1001:       MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES); 
1002:     }
1003:     if (i>=2*m/3){
1004:       j = i-2*m/3;
1005:       MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
1006:       j = j + user->mx*user->mx;
1007:       MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES); 
1008:     }
1009:   }

1011:   MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY); 
1012:   MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY); 



1016:   /* Generate arithmetic averaging matrix Av */
1017:   MatCreate(PETSC_COMM_WORLD,&user->Av); 
1018:   MatSetSizes(user->Av,PETSC_DECIDE,ysubnlocal,m,n); 
1019:   MatSetFromOptions(user->Av); 
1020:   MatMPIAIJSetPreallocation(user->Av,2,PETSC_NULL,2,PETSC_NULL); 
1021:   MatSeqAIJSetPreallocation(user->Av,2,PETSC_NULL); 
1022:   MatGetOwnershipRange(user->Av,&istart,&iend);

1024:   for (i=istart; i<iend; i++){
1025:     if (i<m/3){
1026:       iblock = i / (user->mx-1);
1027:       j = iblock*user->mx + (i % (user->mx-1));
1028:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1029:       j = j+1;
1030:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1031:     }
1032:     if (i>=m/3 && i<2*m/3){
1033:       iblock = (i-m/3) / (user->mx*(user->mx-1));
1034:       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
1035:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1036:       j = j + user->mx;
1037:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1038:     }
1039:     if (i>=2*m/3){
1040:       j = i-2*m/3;
1041:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1042:       j = j + user->mx*user->mx;
1043:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1044:     }
1045:   }

1047:   MatAssemblyBegin(user->Av,MAT_FINAL_ASSEMBLY); 
1048:   MatAssemblyEnd(user->Av,MAT_FINAL_ASSEMBLY); 


1051:   MatCreate(PETSC_COMM_WORLD,&user->L); 
1052:   MatSetSizes(user->L,PETSC_DECIDE,ysubnlocal,m+n,n); 
1053:   MatSetFromOptions(user->L); 
1054:   MatMPIAIJSetPreallocation(user->L,2,PETSC_NULL,2,PETSC_NULL); 
1055:   MatSeqAIJSetPreallocation(user->L,2,PETSC_NULL); 
1056:   MatGetOwnershipRange(user->L,&istart,&iend);

1058:   for (i=istart; i<iend; i++){
1059:     if (i<m/3){
1060:       iblock = i / (user->mx-1);
1061:       j = iblock*user->mx + (i % (user->mx-1));
1062:       MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
1063:       j = j+1;
1064:       MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES); 
1065:     }
1066:     if (i>=m/3 && i<2*m/3){
1067:       iblock = (i-m/3) / (user->mx*(user->mx-1));
1068:       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
1069:       MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
1070:       j = j + user->mx;
1071:       MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES); 
1072:     }
1073:     if (i>=2*m/3 && i<m){
1074:       j = i-2*m/3;
1075:       MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
1076:       j = j + user->mx*user->mx;
1077:       MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES); 
1078:     }
1079:     if (i>=m){
1080:       j = i - m;
1081:       MatSetValues(user->L,1,&i,1,&j,&sqrt_beta,INSERT_VALUES); 
1082:     }
1083:   }

1085:   MatAssemblyBegin(user->L,MAT_FINAL_ASSEMBLY); 
1086:   MatAssemblyEnd(user->L,MAT_FINAL_ASSEMBLY); 

1088:   MatScale(user->L,PetscPowScalar(h,1.5)); 

1090:   /* Generate Div matrix */
1091:   if (!user->use_ptap) {
1092:     /* Generate Div matrix */
1093:     MatCreate(PETSC_COMM_WORLD,&user->Div); 
1094:     MatSetSizes(user->Div,ysubnlocal,PETSC_DECIDE,n,m); 
1095:     MatSetFromOptions(user->Div); 
1096:     MatMPIAIJSetPreallocation(user->Div,4,PETSC_NULL,4,PETSC_NULL); 
1097:     MatSeqAIJSetPreallocation(user->Div,6,PETSC_NULL); 
1098:     MatGetOwnershipRange(user->Grad,&istart,&iend);

1100:     for (i=istart; i<iend; i++){
1101:       if (i<m/3){
1102:         iblock = i / (user->mx-1);
1103:         j = iblock*user->mx + (i % (user->mx-1));
1104:         MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES); 
1105:         j = j+1;
1106:         MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES); 
1107:       }
1108:       if (i>=m/3 && i<2*m/3){
1109:         iblock = (i-m/3) / (user->mx*(user->mx-1));
1110:         j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
1111:         MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES); 
1112:         j = j + user->mx;
1113:         MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES); 
1114:       }
1115:       if (i>=2*m/3){
1116:         j = i-2*m/3;
1117:         MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES); 
1118:         j = j + user->mx*user->mx;
1119:         MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES); 
1120:       }
1121:     }

1123:     MatAssemblyBegin(user->Div,MAT_FINAL_ASSEMBLY); 
1124:     MatAssemblyEnd(user->Div,MAT_FINAL_ASSEMBLY); 

1126:     MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork); 
1127:   } else {
1128:     MatCreate(PETSC_COMM_WORLD,&user->Diag); 
1129:     MatSetSizes(user->Diag,PETSC_DECIDE,PETSC_DECIDE,m,m);
1130:     MatSetFromOptions(user->Diag); 
1131:     MatMPIAIJSetPreallocation(user->Diag,1,PETSC_NULL,0,PETSC_NULL); 
1132:     MatSeqAIJSetPreallocation(user->Diag,1,PETSC_NULL); 
1133:   

1135:   }


1138:   /* Build work vectors and matrices */
1139:   VecCreate(PETSC_COMM_WORLD,&user->S); 
1140:   VecSetSizes(user->S, PETSC_DECIDE, m); 
1141:   VecSetFromOptions(user->S); 

1143:   VecCreate(PETSC_COMM_WORLD,&user->lwork); 
1144:   VecSetSizes(user->lwork,PETSC_DECIDE,m+user->mx*user->mx*user->mx);  
1145:   VecSetFromOptions(user->lwork); 

1147:   MatDuplicate(user->Av,MAT_SHARE_NONZERO_PATTERN,&user->Avwork); 


1150:   VecDuplicate(user->S,&user->Swork); 
1151:   VecDuplicate(user->S,&user->Sdiag); 
1152:   VecDuplicate(user->S,&user->Av_u); 
1153:   VecDuplicate(user->S,&user->Twork); 
1154:   VecDuplicate(user->y,&user->ywork); 
1155:   VecDuplicate(user->u,&user->Ywork); 
1156:   VecDuplicate(user->u,&user->uwork); 

1158:   VecDuplicate(user->u,&user->js_diag); 

1160:   VecDuplicate(user->c,&user->cwork); 
1161:   VecDuplicate(user->d,&user->dwork); 

1163:   
1164:   /* Create a matrix-free shell user->Jd for computing B*x */
1165:   MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal,user->nstate,user->ndesign,user,&user->Jd); 
1166:   MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult); 
1167:   MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose); 


1170:   /* Compute true state function ytrue given utrue */
1171:   VecDuplicate(user->y,&user->ytrue); 

1173:   /* First compute Av_u = Av*exp(-u) */
1174:   VecSet(user->uwork, 0); 
1175:   VecAXPY(user->uwork,-1.0,user->utrue); /* Note: user->utrue */
1176:   VecExp(user->uwork); 
1177:   MatMult(user->Av,user->uwork,user->Av_u); 

1179:   /* Next form DSG = Div*S*Grad */
1180:   VecCopy(user->Av_u,user->Swork);  
1181:   VecReciprocal(user->Swork); 
1182:   if (user->use_ptap) {
1183:     MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES); 
1184:     MatPtAP(user->Diag,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG);
1185:   } else {
1186:     MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN); 
1187:     MatDiagonalScale(user->Divwork,PETSC_NULL,user->Swork); 
1188:     MatMatMultSymbolic(user->Divwork,user->Grad,1.0,&user->DSG); 
1189:     MatMatMultNumeric(user->Divwork,user->Grad,user->DSG); 
1190:   }
1191:     
1192:   MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE); 
1193:   MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE); 

1195:   if (user->use_lrc == PETSC_TRUE) {
1196:     v=PetscSqrtReal(1.0 /user->ndesign);
1197:     PetscMalloc(user->ndesign*sizeof(PetscReal),&user->ones); 
1198:     
1199:     for (i=0;i<user->ndesign;i++) {
1200:       user->ones[i]=v;
1201:     }
1202:     MatCreateMPIDense(PETSC_COMM_WORLD,ysubnlocal,PETSC_DECIDE,user->ndesign,1,user->ones,&user->Ones); 
1203:     MatAssemblyBegin(user->Ones, MAT_FINAL_ASSEMBLY); 
1204:     MatAssemblyEnd(user->Ones, MAT_FINAL_ASSEMBLY); 
1205:     MatCreateLRC(user->DSG,user->Ones,user->Ones,&user->JsBlock); 
1206:   } else {
1207:     /* Create matrix-free shell user->Js for computing (A + h^3*e*e^T)*x */
1208:     MatCreateShell(PETSC_COMM_WORLD,ysubnlocal,ysubnlocal,user->ndesign,user->ndesign,user,&user->JsBlock); 
1209:     MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateBlockMatMult); 
1210:     MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateBlockMatMult); 

1212:   }
1213:   MatSetOption(user->JsBlock,MAT_SYMMETRIC,PETSC_TRUE); 
1214:   MatSetOption(user->JsBlock,MAT_SYMMETRY_ETERNAL,PETSC_TRUE); 
1215:   MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->Js); 
1216:   MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult); 
1217:   MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMult); 
1218:   MatSetOption(user->Js,MAT_SYMMETRIC,PETSC_TRUE); 
1219:   MatSetOption(user->Js,MAT_SYMMETRY_ETERNAL,PETSC_TRUE); 


1222:   MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->JsInv); 
1223:   MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateInvMatMult); 
1224:   MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateInvMatMult); 
1225:   MatSetOption(user->JsInv,MAT_SYMMETRIC,PETSC_TRUE); 
1226:   MatSetOption(user->JsInv,MAT_SYMMETRY_ETERNAL,PETSC_TRUE); 




1231:     
1232:   MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE); 
1233:   MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE); 
1234:   /* Now solve for ytrue */
1235:   KSPCreate(PETSC_COMM_WORLD,&user->solver); 
1236:   KSPSetFromOptions(user->solver); 


1239:   KSPSetOperators(user->solver,user->JsBlock,user->DSG,SAME_NONZERO_PATTERN); 
1240:   user->lcl->solve_type = LCL_FORWARD1;
1241:   MatMult(user->JsInv,user->q,user->ytrue); 
1242:   /* First compute Av_u = Av*exp(-u) */
1243:   VecSet(user->uwork,0);
1244:   VecAXPY(user->uwork,-1.0,user->u); /* Note: user->u */
1245:   VecExp(user->uwork); 
1246:   MatMult(user->Av,user->uwork,user->Av_u); 
1247:  
1248:   /* Next update DSG = Div*S*Grad  with user->u */
1249:   VecCopy(user->Av_u,user->Swork);  
1250:   VecReciprocal(user->Swork); 
1251:   if (user->use_ptap) {
1252:     MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES); 
1253:     MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG);
1254:   } else {
1255:     MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN); 
1256:     MatDiagonalScale(user->Divwork,PETSC_NULL,user->Av_u); 
1257:     MatMatMultNumeric(user->Divwork,user->Grad,user->DSG); 
1258:   }


1261:   /* Now solve for y */
1262:   user->lcl->solve_type = LCL_FORWARD1;
1263:   MatMult(user->JsInv,user->q,user->y); 

1265:   user->ksp_its_initial = user->ksp_its;
1266:   user->ksp_its = 0;
1267:   /* Construct projection matrix Q (blocks) */
1268:   MatCreate(PETSC_COMM_WORLD,&user->Q); 
1269:   MatSetSizes(user->Q,dsubnlocal,ysubnlocal,user->ndata,user->ndesign); 
1270:   MatSetFromOptions(user->Q); 
1271:   MatMPIAIJSetPreallocation(user->Q,8,PETSC_NULL,8,PETSC_NULL); 
1272:   MatSeqAIJSetPreallocation(user->Q,8,PETSC_NULL); 

1274:   for (i=0; i<user->mx; i++){
1275:     x[i] = h*(i+0.5);
1276:     y[i] = h*(i+0.5);
1277:     z[i] = h*(i+0.5);
1278:   }
1279:   
1280:   MatGetOwnershipRange(user->Q,&istart,&iend);

1282:   nx = user->mx; ny = user->mx; nz = user->mx;
1283:   for (i=istart; i<iend; i++){
1284:     
1285:     xri = xr[i];
1286:     im = 0;
1287:     xim = x[im];
1288:     while (xri>xim && im<nx){
1289:       im = im+1;
1290:       xim = x[im];
1291:     }
1292:     indx1 = im-1;
1293:     indx2 = im;
1294:     dx1 = xri - x[indx1];
1295:     dx2 = x[indx2] - xri;

1297:     yri = yr[i];
1298:     im = 0;
1299:     yim = y[im];
1300:     while (yri>yim && im<ny){
1301:       im = im+1;
1302:       yim = y[im];
1303:     }
1304:     indy1 = im-1;
1305:     indy2 = im;
1306:     dy1 = yri - y[indy1];
1307:     dy2 = y[indy2] - yri;
1308:     
1309:     zri = zr[i];
1310:     im = 0;
1311:     zim = z[im];
1312:     while (zri>zim && im<nz){
1313:       im = im+1;
1314:       zim = z[im];
1315:     }
1316:     indz1 = im-1;
1317:     indz2 = im;
1318:     dz1 = zri - z[indz1];
1319:     dz2 = z[indz2] - zri;

1321:     Dx = x[indx2] - x[indx1];
1322:     Dy = y[indy2] - y[indy1];
1323:     Dz = z[indz2] - z[indz1];

1325:     j = indx1 + indy1*nx + indz1*nx*ny;
1326:     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1327:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES); 

1329:     j = indx1 + indy1*nx + indz2*nx*ny;
1330:     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1331:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES); 

1333:     j = indx1 + indy2*nx + indz1*nx*ny;
1334:     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1335:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES); 

1337:     j = indx1 + indy2*nx + indz2*nx*ny;
1338:     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1339:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES); 

1341:     j = indx2 + indy1*nx + indz1*nx*ny;
1342:     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1343:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES); 

1345:     j = indx2 + indy1*nx + indz2*nx*ny;
1346:     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1347:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES); 

1349:     j = indx2 + indy2*nx + indz1*nx*ny;
1350:     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1351:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES); 

1353:     j = indx2 + indy2*nx + indz2*nx*ny;
1354:     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1355:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES); 

1357:   }

1359:   MatAssemblyBegin(user->Q,MAT_FINAL_ASSEMBLY); 
1360:   MatAssemblyEnd(user->Q,MAT_FINAL_ASSEMBLY);   
1361:   /* Create MQ (composed of blocks of Q */
1362:   MatCreateShell(PETSC_COMM_WORLD,dsubnlocal*user->ns,PETSC_DECIDE,user->ndata*user->ns,user->nstate,user,&user->MQ); 
1363:   MatShellSetOperation(user->MQ,MATOP_MULT,(void(*)(void))QMatMult); 
1364:   MatShellSetOperation(user->MQ,MATOP_MULT_TRANSPOSE,(void(*)(void))QMatMultTranspose); 


1367:   /* Add noise to the measurement data */
1368:   VecSet(user->ywork,1.0); 
1369:   VecAYPX(user->ywork,user->noise,user->ytrue); 
1370:   MatMult(user->MQ,user->ywork,user->d); 

1372:   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1373:   PetscFree(x); 
1374:   PetscFree(y); 
1375:   PetscFree(z); 
1376:   PetscLogStagePop(); 

1378:   return(0);
1379: }

1383: PetscErrorCode EllipticDestroy(AppCtx *user)
1384: {
1386:   PetscInt i;

1389:   MatDestroy(&user->DSG); 
1390:   KSPDestroy(&user->solver); 
1391:   MatDestroy(&user->Q); 
1392:   MatDestroy(&user->MQ); 
1393:   if (!user->use_ptap) {
1394:     MatDestroy(&user->Div); 
1395:     MatDestroy(&user->Divwork); 
1396:   } else {
1397:     MatDestroy(&user->Diag); 
1398:   }
1399:   if (user->use_lrc) {
1400:     MatDestroy(&user->Ones); 
1401:   }

1403:   MatDestroy(&user->Grad); 
1404:   MatDestroy(&user->Av); 
1405:   MatDestroy(&user->Avwork); 
1406:   MatDestroy(&user->L); 
1407:   
1408:   MatDestroy(&user->Js); 
1409:   MatDestroy(&user->Jd); 
1410:   MatDestroy(&user->JsBlock); 
1411:   MatDestroy(&user->JsInv); 

1413:   VecDestroy(&user->x); 
1414:   VecDestroy(&user->u); 
1415:   VecDestroy(&user->uwork); 
1416:   VecDestroy(&user->utrue); 
1417:   VecDestroy(&user->y); 
1418:   VecDestroy(&user->ywork); 
1419:   VecDestroy(&user->ytrue); 
1420:   VecDestroy(&user->c); 
1421:   VecDestroy(&user->cwork); 
1422:   VecDestroy(&user->ur); 
1423:   VecDestroy(&user->q); 
1424:   VecDestroy(&user->d); 
1425:   VecDestroy(&user->dwork); 
1426:   VecDestroy(&user->lwork); 
1427:   VecDestroy(&user->S); 
1428:   VecDestroy(&user->Swork); 
1429:   VecDestroy(&user->Sdiag); 
1430:   VecDestroy(&user->Ywork); 
1431:   VecDestroy(&user->Twork); 
1432:   VecDestroy(&user->Av_u); 
1433:   VecDestroy(&user->js_diag); 
1434:   ISDestroy(&user->s_is); 
1435:   ISDestroy(&user->d_is); 
1436:   VecDestroy(&user->suby); 
1437:   VecDestroy(&user->subd); 
1438:   VecDestroy(&user->subq); 
1439:   VecScatterDestroy(&user->state_scatter); 
1440:   VecScatterDestroy(&user->design_scatter); 
1441:   for (i=0;i<user->ns;i++) {
1442:     VecScatterDestroy(&user->yi_scatter[i]); 
1443:     VecScatterDestroy(&user->di_scatter[i]); 
1444:   }
1445:   PetscFree(user->yi_scatter); 
1446:   PetscFree(user->di_scatter); 
1447:   if (user->use_lrc) {
1448:     PetscFree(user->ones); 
1449:     MatDestroy(&user->Ones); 
1450:   }

1452:   return(0);
1453: }

1457: PetscErrorCode EllipticMonitor(TaoSolver tao, void *ptr)
1458: {
1460:   Vec X;
1461:   PetscReal unorm,ynorm;
1462:   AppCtx *user = (AppCtx*)ptr;
1464:   TaoGetSolutionVector(tao,&X); 
1465:   Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter); 
1466:   VecAXPY(user->ywork,-1.0,user->ytrue); 
1467:   VecAXPY(user->uwork,-1.0,user->utrue); 
1468:   VecNorm(user->uwork,NORM_2,&unorm); 
1469:   VecNorm(user->ywork,NORM_2,&ynorm); 
1470:   PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%G ||y-yt||=%G\n",unorm,ynorm); 
1471:   return(0);
1472: }