Actual source code: hyperbolic.c

  1: #include "taosolver.h"
  2: #include "src/pde_constrained/impls/lcl/lcl.h"
  3: #include "petsctime.h"

  5: /*T
  6:    Concepts: TAO - Solving a system of nonlinear equations, nonlinear ;east squares
  7:    Routines: TaoInitialize(); TaoFinalize(); 
  8:    Routines: TaoCreate();
  9:    Routines: TaoSetType(); 
 10:    Routines: TaoSetInitialVector();
 11:    Routines: TaoSetObjectiveRoutine();
 12:    Routines: TaoSetGradientRoutine();
 13:    Routines: TaoSetConstraintsRoutine();
 14:    Routines: TaoSetJacobianStateRoutine();
 15:    Routines: TaoSetJacobianDesignRoutine();
 16:    Routines: TaoSetStateDesignIS();
 17:    Routines: TaoSetFromOptions();
 18:    Routines: TaoSetHistory(); TaoGetHistory();
 19:    Routines: TaoSolve();
 20:    Routines: TaoGetTerminationReason(); TaoDestroy(); 
 21:    Processors: 1
 22: T*/

 24: typedef struct {
 25:   PetscInt n; /*  Number of variables */
 26:   PetscInt m; /*  Number of constraints */
 27:   PetscInt mx; /*  grid points in each direction */
 28:   PetscInt nt; /*  Number of time steps */
 29:   PetscInt ndata; /*  Number of data points per sample */
 30:   IS s_is;
 31:   IS d_is;
 32:   VecScatter state_scatter;
 33:   VecScatter design_scatter;
 34:   VecScatter *uxi_scatter,*uyi_scatter,*ux_scatter,*uy_scatter,*ui_scatter;
 35:   VecScatter *yi_scatter;

 37:   Mat Js,Jd,JsBlockPrec,JsInv,JsBlock;
 38:   PetscBool jformed,c_formed;

 40:   PetscReal alpha; /*  Regularization parameter */
 41:   PetscReal gamma;
 42:   PetscReal ht; /*  Time step */
 43:   PetscReal T; /*  Final time */
 44:   Mat Q,QT;
 45:   Mat L,LT;
 46:   Mat Div,Divwork,Divxy[2];
 47:   Mat Grad,Gradxy[2];
 48:   Mat M;
 49:   Mat *C,*Cwork;
 50:   /* Mat Hs,Hd,Hsd; */
 51:   Vec q;
 52:   Vec ur; /*  reference */

 54:   Vec d;
 55:   Vec dwork;

 57:   Vec y; /*  state variables */
 58:   Vec ywork;
 59:   /* Vec ysave; */
 60:   Vec ytrue;
 61:   Vec *yi,*yiwork,*ziwork;
 62:   Vec *uxi,*uyi,*uxiwork,*uyiwork,*ui,*uiwork;

 64:   Vec u; /*  design variables */
 65:   Vec uwork,vwork;
 66:   /* Vec usave; */
 67:   Vec utrue;
 68:  
 69:   Vec js_diag;
 70:   
 71:   Vec c; /*  constraint vector */
 72:   Vec cwork;
 73:   
 74:   Vec lwork;

 76:   KSP solver;
 77:   PC prec;
 78:   PetscInt block_index;

 80:   TAO_LCL *lcl;
 81:   PetscReal tau[4];
 82:   PetscInt ksp_its;
 83:   PetscInt ksp_its_initial;

 85: } AppCtx;


 88: PetscErrorCode FormFunction(TaoSolver, Vec, PetscReal*, void*);
 89: PetscErrorCode FormGradient(TaoSolver, Vec, Vec, void*);
 90: PetscErrorCode FormFunctionGradient(TaoSolver, Vec, PetscReal*, Vec, void*);
 91: PetscErrorCode FormJacobianState(TaoSolver, Vec, Mat*, Mat*, Mat*, MatStructure*,void*);
 92: PetscErrorCode FormJacobianDesign(TaoSolver, Vec, Mat*,void*);
 93: PetscErrorCode FormConstraints(TaoSolver, Vec, Vec, void*);
 94: PetscErrorCode FormHessian(TaoSolver, Vec, Mat*, Mat*, MatStructure*, void*);
 95: PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
 96: PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
 97: PetscErrorCode HyperbolicInitialize(AppCtx *user);
 98: PetscErrorCode HyperbolicDestroy(AppCtx *user);
 99: PetscErrorCode HyperbolicMonitor(TaoSolver, void*);

101: PetscErrorCode StateMatMult(Mat,Vec,Vec);
102: PetscErrorCode StateMatBlockMult(Mat,Vec,Vec);
103: PetscErrorCode StateMatBlockMultTranspose(Mat,Vec,Vec);
104: PetscErrorCode StateMatMultTranspose(Mat,Vec,Vec);
105: PetscErrorCode StateMatGetDiagonal(Mat,Vec);
106: PetscErrorCode StateMatDuplicate(Mat,MatDuplicateOption,Mat*);
107: PetscErrorCode StateMatInvMult(Mat,Vec,Vec);
108: PetscErrorCode StateMatInvTransposeMult(Mat,Vec,Vec);
109: PetscErrorCode StateMatBlockPrecMult(PC,Vec,Vec);

111: PetscErrorCode DesignMatMult(Mat,Vec,Vec);
112: PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);

114: PetscErrorCode Scatter_yi(Vec,Vec*,VecScatter*,PetscInt); /*  y to y1,y2,...,y_nt */
115: PetscErrorCode Gather_yi(Vec,Vec*,VecScatter*,PetscInt); 
116: PetscErrorCode Scatter_uxi_uyi(Vec,Vec*,VecScatter*,Vec*,VecScatter*,PetscInt); /*  u to ux_1,uy_1,ux_2,uy_2,...,u */
117: PetscErrorCode Gather_uxi_uyi(Vec,Vec*,VecScatter*,Vec*,VecScatter*,PetscInt);

119: static  char help[]="";

123: int main(int argc, char **argv)
124: {
126:   Vec x,x0;
127:   
128:   TaoSolver tao;
129:   TaoSolverTerminationReason reason;
130:   AppCtx user;
131:   IS is_allstate,is_alldesign;
132:   PetscInt lo,hi,hi2,lo2,ksp_old;
133:   PetscBool flag,showtime;
134:   PetscInt ntests = 1;
135:   PetscLogDouble v1,v2;
136:   PetscInt i;
137:   int stages[1];
138:   

140:   PetscInitialize(&argc, &argv, (char*)0,help);
141:   TaoInitialize(&argc, &argv, (char*)0,help);

143:   showtime = PETSC_FALSE;
144:   PetscOptionsBool("-showtime","Display time elapsed","",showtime,&showtime,&flag); 
145:   user.mx = 32;
146:   PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,&flag); 
147:   user.nt = 16;
148:   PetscOptionsInt("-nt","Number of time steps","",user.nt,&user.nt,&flag); 
149:   user.ndata = 64;
150:   PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,&flag); 
151:   user.alpha = 10.0;
152:   PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,&flag); 
153:   user.T = 1.0/32.0;
154:   PetscOptionsReal("-Tfinal","Final time","",user.T,&user.T,&flag); 


157:   user.tau[0] = 1e-4;
158:   PetscOptionsReal("-tola","Tolerance for first forward solve","",user.tau[0],&user.tau[0],&flag); 
159:   user.tau[1] = 1e-4;
160:   PetscOptionsReal("-tolb","Tolerance for first adjoint solve","",user.tau[1],&user.tau[1],&flag); 
161:   user.tau[2] = 1e-4;
162:   PetscOptionsReal("-tolc","Tolerance for second forward solve","",user.tau[2],&user.tau[2],&flag); 
163:   user.tau[3] = 1e-4;
164:   PetscOptionsReal("-told","Tolerance for second adjoint solve","",user.tau[3],&user.tau[3],&flag); 

166:   user.m = user.mx*user.mx*user.nt; /*  number of constraints */
167:   user.n = user.mx*user.mx*3*user.nt; /*  number of variables */
168:   user.ht = user.T/user.nt; /*  Time step */
169:   user.gamma = user.T*user.ht / (user.mx*user.mx);

171:   VecCreate(PETSC_COMM_WORLD,&user.u); 
172:   VecCreate(PETSC_COMM_WORLD,&user.y); 
173:   VecCreate(PETSC_COMM_WORLD,&user.c); 
174:   VecSetSizes(user.u,PETSC_DECIDE,user.n-user.m); 
175:   VecSetSizes(user.y,PETSC_DECIDE,user.m); 
176:   VecSetSizes(user.c,PETSC_DECIDE,user.m); 
177:   VecSetFromOptions(user.u); 
178:   VecSetFromOptions(user.y); 
179:   VecSetFromOptions(user.c); 

181:   /* Create scatters for reduced spaces.
182:      If the state vector y and design vector u are partitioned as 
183:      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
184:      then the solution vector x is organized as
185:      [y_1; u_1; y_2; u_2; ...; y_np; u_np]. 
186:      The index sets user.s_is and user.d_is correspond to the indices of the
187:      state and design variables owned by the current processor.
188:   */
189:   VecCreate(PETSC_COMM_WORLD,&x); 

191:   VecGetOwnershipRange(user.y,&lo,&hi); 
192:   VecGetOwnershipRange(user.u,&lo2,&hi2);  

194:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate); 
195:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user.s_is); 

197:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign); 
198:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user.d_is); 

200:   VecSetSizes(x,hi-lo+hi2-lo2,user.n); 
201:   VecSetFromOptions(x); 

203:   VecScatterCreate(x,user.s_is,user.y,is_allstate,&user.state_scatter); 
204:   VecScatterCreate(x,user.d_is,user.u,is_alldesign,&user.design_scatter); 
205:   ISDestroy(&is_alldesign); 
206:   ISDestroy(&is_allstate); 



210:   /* Create TAO solver and set desired solution method */
211:   TaoCreate(PETSC_COMM_WORLD,&tao); 
212:   TaoSetType(tao,"tao_lcl"); 
213:   user.lcl = (TAO_LCL*)(tao->data);

215:   /* Set up initial vectors and matrices */
216:   HyperbolicInitialize(&user); 

218:   Gather(x,user.y,user.state_scatter,user.u,user.design_scatter); 
219:   VecDuplicate(x,&x0); 
220:   VecCopy(x,x0); 

222:   /* Set solution vector with an initial guess */
223:   TaoSetInitialVector(tao,x); 
224:   TaoSetObjectiveRoutine(tao, FormFunction, (void *)&user); 
225:   TaoSetGradientRoutine(tao, FormGradient, (void *)&user); 
226:   TaoSetConstraintsRoutine(tao, user.c, FormConstraints, (void *)&user); 

228:   TaoSetJacobianStateRoutine(tao, user.Js, user.Js, user.JsInv, FormJacobianState, (void *)&user);  

230:   TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, (void *)&user); 

232:   TaoSetFromOptions(tao); 
233:   TaoSetStateDesignIS(tao,user.s_is,user.d_is); 

235:   /* SOLVE THE APPLICATION */
236:   PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,&flag); 
237:   PetscLogStageRegister("Trials",&stages[0]); 
238:   PetscLogStagePush(stages[0]); 
239:   user.ksp_its_initial = user.ksp_its;
240:   ksp_old = user.ksp_its;
241:   for (i=0; i<ntests; i++){
242:     PetscGetTime(&v1); 
243:     TaoSolve(tao);  
244:     PetscGetTime(&v2); 
245:     if (showtime) {
246:       PetscPrintf(PETSC_COMM_WORLD,"Elapsed time = %G\n",v2-v1); 
247:     }
248:     PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %D\n",user.ksp_its-ksp_old); 
249:     VecCopy(x0,x); 
250:     TaoSetInitialVector(tao,x); 
251:   }
252:   PetscLogStagePop(); 
253:   PetscBarrier((PetscObject)x); 
254:   PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: ");
255:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its_initial);
256:   PetscPrintf(PETSC_COMM_WORLD,"Total KSP iterations over %D trial(s): ",ntests);
257:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its);
258:   PetscPrintf(PETSC_COMM_WORLD,"KSP iterations per trial: ");
259:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",(user.ksp_its-user.ksp_its_initial)/ntests);

261:   TaoGetTerminationReason(tao,&reason); 

263:   if (reason < 0)
264:   {
265:     PetscPrintf(MPI_COMM_WORLD, "TAO failed to converge.\n");
266:   }
267:   else
268:   {
269:     PetscPrintf(MPI_COMM_WORLD, "Optimization terminated with status %D.\n", reason);
270:   }


273:   /* Free TAO data structures */
274:   TaoDestroy(&tao); 

276:   /* Free PETSc data structures */
277:   VecDestroy(&x); 
278:   VecDestroy(&x0); 
279:   HyperbolicDestroy(&user); 

281:   /* Finalize TAO, PETSc */
282:   TaoFinalize();
283:   PetscFinalize();

285:   return 0;
286: }
287: /* ------------------------------------------------------------------- */
290: /* 
291:    dwork = Qy - d  
292:    lwork = L*(u-ur).^2
293:    f = 1/2 * (dwork.dork + alpha*y.lwork)
294: */
295: PetscErrorCode FormFunction(TaoSolver tao,Vec X,PetscReal *f,void *ptr)
296: {
298:   PetscReal d1=0,d2=0;
299:   AppCtx *user = (AppCtx*)ptr;
301:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
302:   MatMult(user->Q,user->y,user->dwork); 
303:   VecAXPY(user->dwork,-1.0,user->d); 
304:   VecDot(user->dwork,user->dwork,&d1); 

306:   VecWAXPY(user->uwork,-1.0,user->ur,user->u); 
307:   VecPointwiseMult(user->uwork,user->uwork,user->uwork); 
308:   MatMult(user->L,user->uwork,user->lwork); 
309:   VecDot(user->y,user->lwork,&d2); 
310:   
311:   *f = 0.5 * (d1 + user->alpha*d2); 
312:   return(0);
313: }

315: /* ------------------------------------------------------------------- */
318: /*  
319:     state: g_s = Q' *(Qy - d) + 0.5*alpha*L*(u-ur).^2
320:     design: g_d = alpha*(L'y).*(u-ur)
321: */
322: PetscErrorCode FormGradient(TaoSolver tao,Vec X,Vec G,void *ptr)
323: {
325:   AppCtx *user = (AppCtx*)ptr;
327:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
328:   MatMult(user->Q,user->y,user->dwork); 
329:   VecAXPY(user->dwork,-1.0,user->d); 

331:   MatMult(user->QT,user->dwork,user->ywork); 
332:   
333:   MatMult(user->LT,user->y,user->uwork); 
334:   VecWAXPY(user->vwork,-1.0,user->ur,user->u); 
335:   VecPointwiseMult(user->uwork,user->vwork,user->uwork); 
336:   VecScale(user->uwork,user->alpha); 

338:   VecPointwiseMult(user->vwork,user->vwork,user->vwork); 
339:   MatMult(user->L,user->vwork,user->lwork); 
340:   VecAXPY(user->ywork,0.5*user->alpha,user->lwork); 
341:                       
342:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter); 
343:   return(0);
344: }

348: PetscErrorCode FormFunctionGradient(TaoSolver tao, Vec X, PetscReal *f, Vec G, void *ptr)
349: {
351:   PetscReal d1,d2;
352:   AppCtx *user = (AppCtx*)ptr;

355:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
356:   MatMult(user->Q,user->y,user->dwork); 
357:   VecAXPY(user->dwork,-1.0,user->d); 

359:   MatMult(user->QT,user->dwork,user->ywork); 
360:   
361:   VecDot(user->dwork,user->dwork,&d1); 

363:   MatMult(user->LT,user->y,user->uwork); 
364:   VecWAXPY(user->vwork,-1.0,user->ur,user->u); 
365:   VecPointwiseMult(user->uwork,user->vwork,user->uwork); 
366:   VecScale(user->uwork,user->alpha); 

368:   VecPointwiseMult(user->vwork,user->vwork,user->vwork); 
369:   MatMult(user->L,user->vwork,user->lwork); 
370:   VecAXPY(user->ywork,0.5*user->alpha,user->lwork); 

372:   VecDot(user->y,user->lwork,&d2); 

374:   *f = 0.5 * (d1 + user->alpha*d2);

376:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter); 

378:   return(0);

380: }


383: /* ------------------------------------------------------------------- */
386: /* A 
387: MatShell object
388: */
389: PetscErrorCode FormJacobianState(TaoSolver tao, Vec X, Mat *J, Mat* JPre, Mat* JInv, MatStructure* flag, void *ptr)
390: {
392:   PetscInt i;
393:   AppCtx *user = (AppCtx*)ptr;
395:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
396:   
397:   Scatter_yi(user->u,user->ui,user->ui_scatter,user->nt); 
398:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt); 
399:   for (i=0; i<user->nt; i++){
400:     MatCopy(user->Divxy[0],user->C[i],SAME_NONZERO_PATTERN); 
401:     MatCopy(user->Divxy[1],user->Cwork[i],SAME_NONZERO_PATTERN); 

403:     MatDiagonalScale(user->C[i],PETSC_NULL,user->uxi[i]); 
404:     MatDiagonalScale(user->Cwork[i],PETSC_NULL,user->uyi[i]); 
405:     MatAXPY(user->C[i],1.0,user->Cwork[i],SUBSET_NONZERO_PATTERN); 
406:     MatScale(user->C[i],user->ht); 
407:     MatShift(user->C[i],1.0); 
408:   }
409:     
410:   *JInv = user->JsInv;

412:   return(0);
413: }
414: /* ------------------------------------------------------------------- */
417: /* B */
418: PetscErrorCode FormJacobianDesign(TaoSolver tao, Vec X, Mat *J, void *ptr)
419: {
421:   AppCtx *user = (AppCtx*)ptr;

424:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 

426:   *J = user->Jd;

428:   return(0);
429: }



435: PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y) 
436: {
438:   PetscInt i;
439:   void *ptr;
440:   AppCtx *user;
442:   MatShellGetContext(J_shell,&ptr); 
443:   user = (AppCtx*)ptr;

445:   Scatter_yi(X,user->yi,user->yi_scatter,user->nt); 
446:   
447:   user->block_index = 0;
448:   MatMult(user->JsBlock,user->yi[0],user->yiwork[0]); 

450:   for (i=1; i<user->nt; i++){
451:     user->block_index = i;
452:     MatMult(user->JsBlock,user->yi[i],user->yiwork[i]); 
453:     MatMult(user->M,user->yi[i-1],user->ziwork[i-1]); 
454:     VecAXPY(user->yiwork[i],-1.0,user->ziwork[i-1]); 
455:   }

457:   Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt); 

459:   return(0);
460: }

464: PetscErrorCode StateMatMultTranspose(Mat J_shell, Vec X, Vec Y) 
465: {
467:   PetscInt i;
468:   void *ptr;
469:   AppCtx *user;
471:   MatShellGetContext(J_shell,&ptr); 
472:   user = (AppCtx*)ptr;

474:   Scatter_yi(X,user->yi,user->yi_scatter,user->nt); 

476:   for (i=0; i<user->nt-1; i++){
477:     user->block_index = i;
478:     MatMultTranspose(user->JsBlock,user->yi[i],user->yiwork[i]); 
479:     MatMult(user->M,user->yi[i+1],user->ziwork[i+1]); 
480:     VecAXPY(user->yiwork[i],-1.0,user->ziwork[i+1]); 
481:   }

483:   i = user->nt-1;
484:   user->block_index = i;
485:   MatMultTranspose(user->JsBlock,user->yi[i],user->yiwork[i]); 

487:   Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt); 

489:   return(0);
490: }

494: PetscErrorCode StateMatBlockMult(Mat J_shell, Vec X, Vec Y) 
495: {
497:   PetscInt i;
498:   void *ptr;
499:   AppCtx *user;
501:   MatShellGetContext(J_shell,&ptr); 
502:   user = (AppCtx*)ptr;
503:   
504:   i = user->block_index;
505:   VecPointwiseMult(user->uxiwork[i],X,user->uxi[i]); 
506:   VecPointwiseMult(user->uyiwork[i],X,user->uyi[i]); 
507:   Gather(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]); 
508:   MatMult(user->Div,user->uiwork[i],Y); 
509:   VecAYPX(Y,user->ht,X); 

511:   return(0);
512: }

516: PetscErrorCode StateMatBlockMultTranspose(Mat J_shell, Vec X, Vec Y) 
517: {
519:   PetscInt i;
520:   void *ptr;
521:   AppCtx *user;
523:   MatShellGetContext(J_shell,&ptr); 
524:   user = (AppCtx*)ptr;
525:  
526:   i = user->block_index;
527:   MatMult(user->Grad,X,user->uiwork[i]); 
528:   Scatter(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]); 
529:   VecPointwiseMult(user->uxiwork[i],user->uxi[i],user->uxiwork[i]); 
530:   VecPointwiseMult(user->uyiwork[i],user->uyi[i],user->uyiwork[i]); 
531:   VecWAXPY(Y,1.0,user->uxiwork[i],user->uyiwork[i]); 
532:   VecAYPX(Y,user->ht,X); 

534:   return(0);
535: }

539: PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y) 
540: {
542:   void *ptr;
543:   PetscInt i;
544:   AppCtx *user;
546:   MatShellGetContext(J_shell,&ptr); 
547:   user = (AppCtx*)ptr;

549:   Scatter_yi(user->y,user->yi,user->yi_scatter,user->nt); 
550:   Scatter_uxi_uyi(X,user->uxiwork,user->uxi_scatter,user->uyiwork,user->uyi_scatter,user->nt);
551:   for (i=0; i<user->nt; i++){
552:     VecPointwiseMult(user->uxiwork[i],user->yi[i],user->uxiwork[i]); 
553:     VecPointwiseMult(user->uyiwork[i],user->yi[i],user->uyiwork[i]); 
554:     Gather(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]); 
555:     MatMult(user->Div,user->uiwork[i],user->ziwork[i]); 
556:     VecScale(user->ziwork[i],user->ht); 
557:   }
558:   Gather_yi(Y,user->ziwork,user->yi_scatter,user->nt); 

560:   return(0);
561: }

565: PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y) 
566: {
568:   void *ptr;
569:   PetscInt i;
570:   AppCtx *user;
572:   MatShellGetContext(J_shell,&ptr); 
573:   user = (AppCtx*)ptr;

575:   Scatter_yi(user->y,user->yi,user->yi_scatter,user->nt); 
576:   Scatter_yi(X,user->yiwork,user->yi_scatter,user->nt); 
577:   for (i=0; i<user->nt; i++){
578:     MatMult(user->Grad,user->yiwork[i],user->uiwork[i]); 
579:     Scatter(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]); 
580:     VecPointwiseMult(user->uxiwork[i],user->yi[i],user->uxiwork[i]); 
581:     VecPointwiseMult(user->uyiwork[i],user->yi[i],user->uyiwork[i]); 
582:     Gather(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]); 
583:     VecScale(user->uiwork[i],user->ht); 
584:   }
585:   Gather_yi(Y,user->uiwork,user->ui_scatter,user->nt); 

587:   return(0);
588: }

592: PetscErrorCode StateMatBlockPrecMult(PC PC_shell, Vec X, Vec Y) 
593: {
595:   PetscInt i;
596:   void *ptr;
597:   AppCtx *user;
599:   PCShellGetContext(PC_shell,&ptr); 
600:   user = (AppCtx*)ptr;

602:   i = user->block_index;
603:   if (user->c_formed) {
604:     MatSOR(user->C[i],X,1.0,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP),0.0,1,1,Y); 
605:   }
606:   else {
607:     printf("C not formed"); abort();
608:   }

610:   return(0);
611: }

615: PetscErrorCode StateMatBlockPrecMultTranspose(PC PC_shell, Vec X, Vec Y) 
616: {
618:   PetscInt i;
619:   void *ptr;
620:   AppCtx *user;
622:   PCShellGetContext(PC_shell,&ptr); 
623:   user = (AppCtx*)ptr;

625:   i = user->block_index;
626:   if (user->c_formed) {
627:     MatSOR(user->C[i],X,1.0,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP),0.0,1,1,Y); 
628:   }
629:   else {
630:     printf("C not formed"); abort();
631:   }

633:   return(0);
634: }

638: PetscErrorCode StateMatInvMult(Mat J_shell, Vec X, Vec Y)
639: {
641:   PetscReal tau;
642:   void *ptr;
643:   AppCtx *user;
644:   PetscInt its,i;
646:   MatShellGetContext(J_shell,&ptr); 
647:   user = (AppCtx*)ptr;

649:   if (Y == user->ytrue) {
650:     KSPSetTolerances(user->solver,1e-4,1e-20,PETSC_DEFAULT,PETSC_DEFAULT);
651:   } else if (user->lcl) {
652:     tau = user->tau[user->lcl->solve_type];
653:     KSPSetTolerances(user->solver,tau,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
654:   }
655:     
656:   Scatter_yi(X,user->yi,user->yi_scatter,user->nt); 
657:   Scatter_yi(Y,user->yiwork,user->yi_scatter,user->nt); 
658:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);
659:   
660:   user->block_index = 0;
661:   KSPSolve(user->solver,user->yi[0],user->yiwork[0]);  

663:   KSPGetIterationNumber(user->solver,&its); 
664:   user->ksp_its = user->ksp_its + its;


667:   for (i=1; i<user->nt; i++){
668:     MatMult(user->M,user->yiwork[i-1],user->ziwork[i-1]);
669:     VecAXPY(user->yi[i],1.0,user->ziwork[i-1]); 
670:     user->block_index = i;
671:     KSPSolve(user->solver,user->yi[i],user->yiwork[i]); 

673:     KSPGetIterationNumber(user->solver,&its); 
674:     user->ksp_its = user->ksp_its + its;


677:   }

679:   Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt); 


682:   return(0);
683: }

687: PetscErrorCode StateMatInvTransposeMult(Mat J_shell, Vec X, Vec Y)
688: {
690:   void *ptr;
691:   AppCtx *user;
692:   PetscReal tau;
693:   PetscInt its,i;
695:   MatShellGetContext(J_shell,&ptr); 
696:   user = (AppCtx*)ptr;

698:   if (user->lcl) {
699:     tau = user->tau[user->lcl->solve_type];
700:     KSPSetTolerances(user->solver,tau,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
701:   }
702:   Scatter_yi(X,user->yi,user->yi_scatter,user->nt); 
703:   Scatter_yi(Y,user->yiwork,user->yi_scatter,user->nt); 
704:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);
705:   
706:   i = user->nt - 1;
707:   user->block_index = i;
708:   KSPSolveTranspose(user->solver,user->yi[i],user->yiwork[i]);  

710:   KSPGetIterationNumber(user->solver,&its); 
711:   user->ksp_its = user->ksp_its + its;


714:   for (i=user->nt-2; i>=0; i--){
715:     MatMult(user->M,user->yiwork[i+1],user->ziwork[i+1]); 
716:     VecAXPY(user->yi[i],1.0,user->ziwork[i+1]); 
717:     user->block_index = i;
718:     KSPSolveTranspose(user->solver,user->yi[i],user->yiwork[i]); 

720:     KSPGetIterationNumber(user->solver,&its); 
721:     user->ksp_its = user->ksp_its + its;

723:   
724:   }

726:   Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt); 


729:   return(0);
730: }

734: PetscErrorCode StateMatDuplicate(Mat J_shell, MatDuplicateOption opt, Mat *new_shell)
735: {
737:   void *ptr;
738:   AppCtx *user;
740:   MatShellGetContext(J_shell,&ptr); 
741:   user = (AppCtx*)ptr;

743:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,new_shell); 
744:   MatShellSetOperation(*new_shell,MATOP_MULT,(void(*)(void))StateMatMult); 
745:   MatShellSetOperation(*new_shell,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate); 
746:   MatShellSetOperation(*new_shell,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose); 
747:   MatShellSetOperation(*new_shell,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal); 
748:   

750:   

752:   return(0);
753: }

757: PetscErrorCode StateMatGetDiagonal(Mat J_shell, Vec X)
758: {
760:   void *ptr;
761:   AppCtx *user;
763:   MatShellGetContext(J_shell,&ptr); 
764:   user = (AppCtx*)ptr;
765:    VecCopy(user->js_diag,X); 
766:   return(0);
767:   
768: }

772: PetscErrorCode FormConstraints(TaoSolver tao, Vec X, Vec C, void *ptr)
773: {
774:   /* con = Ay - q, A = [C(u1)  0     0     ...   0; 
775:                          -M  C(u2)   0     ...   0; 
776:                           0   -M   C(u3)   ...   0;
777:                                       ...         ;
778:                           0    ...      -M C(u_nt)] 
779:      C(u) = eye + ht*Div*[diag(u1); diag(u2)]       */
781:   PetscInt i;
782:   AppCtx *user = (AppCtx*)ptr;
784:    
785:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
786:   Scatter_yi(user->y,user->yi,user->yi_scatter,user->nt); 
787:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);

789:   user->block_index = 0;
790:   MatMult(user->JsBlock,user->yi[0],user->yiwork[0]); 

792:   for (i=1; i<user->nt; i++){
793:     user->block_index = i;
794:     MatMult(user->JsBlock,user->yi[i],user->yiwork[i]); 
795:     MatMult(user->M,user->yi[i-1],user->ziwork[i-1]); 
796:     VecAXPY(user->yiwork[i],-1.0,user->ziwork[i-1]);                     
797:   }

799:   Gather_yi(C,user->yiwork,user->yi_scatter,user->nt); 
800:   VecAXPY(C,-1.0,user->q); 

802:   return(0);
803: }


808: PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
809: {
812:   VecScatterBegin(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD); 
813:   VecScatterEnd(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD); 

815:   VecScatterBegin(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD); 
816:   VecScatterEnd(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD); 
817:   return(0);
818: }

822: PetscErrorCode Scatter_uxi_uyi(Vec u, Vec *uxi, VecScatter *scatx, Vec *uyi, VecScatter *scaty, PetscInt nt)
823: {
825:   PetscInt i;
827:   for (i=0; i<nt; i++){
828:     VecScatterBegin(scatx[i],u,uxi[i],INSERT_VALUES,SCATTER_FORWARD); 
829:     VecScatterEnd(scatx[i],u,uxi[i],INSERT_VALUES,SCATTER_FORWARD); 
830:     VecScatterBegin(scaty[i],u,uyi[i],INSERT_VALUES,SCATTER_FORWARD); 
831:     VecScatterEnd(scaty[i],u,uyi[i],INSERT_VALUES,SCATTER_FORWARD); 
832:   }
833:   return(0);
834: }

838: PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
839: {
842:   VecScatterBegin(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE); 
843:   VecScatterEnd(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE); 
844:   VecScatterBegin(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE); 
845:   VecScatterEnd(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE); 
846:   return(0);
847: }

851: PetscErrorCode Gather_uxi_uyi(Vec u, Vec *uxi, VecScatter *scatx, Vec *uyi, VecScatter *scaty, PetscInt nt)
852: {
854:   PetscInt i;
856:   for (i=0; i<nt; i++){
857:     VecScatterBegin(scatx[i],uxi[i],u,INSERT_VALUES,SCATTER_REVERSE); 
858:     VecScatterEnd(scatx[i],uxi[i],u,INSERT_VALUES,SCATTER_REVERSE); 
859:     VecScatterBegin(scaty[i],uyi[i],u,INSERT_VALUES,SCATTER_REVERSE); 
860:     VecScatterEnd(scaty[i],uyi[i],u,INSERT_VALUES,SCATTER_REVERSE); 
861:   }
862:   return(0);
863: }

867: PetscErrorCode Scatter_yi(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
868: {
870:   PetscInt i;
872:   for (i=0; i<nt; i++){
873:     VecScatterBegin(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD); 
874:     VecScatterEnd(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD); 
875:   }
876:   return(0);
877: }

881: PetscErrorCode Gather_yi(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
882: {
884:   PetscInt i;
886:   for (i=0; i<nt; i++){
887:     VecScatterBegin(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE); 
888:     VecScatterEnd(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE); 
889:   }
890:   return(0);
891: }
892:  
893:     
896: PetscErrorCode HyperbolicInitialize(AppCtx *user)
897: {
899:   PetscInt n,i,j,linear_index,istart,iend,iblock,lo,hi;
900:   Vec XX,YY,XXwork,YYwork,yi,uxi,ui,bc;
901:   PetscReal h,sum;
902:   PetscScalar hinv,neg_hinv,quarter=0.25,one=1.0,half_hinv,neg_half_hinv;
903:   PetscScalar vx,vy;
904:   IS is_from_y,is_to_yi,is_from_u,is_to_uxi,is_to_uyi;

907:   user->jformed = PETSC_FALSE;
908:   user->c_formed = PETSC_FALSE;

910:   user->ksp_its = 0;
911:   user->ksp_its_initial = 0;

913:   n = user->mx * user->mx;

915:   h = 1.0/user->mx;
916:   hinv = user->mx;
917:   neg_hinv = -hinv;
918:   half_hinv = hinv / 2.0;
919:   neg_half_hinv = neg_hinv / 2.0;

921:   /* Generate Grad matrix */
922:   MatCreate(PETSC_COMM_WORLD,&user->Grad); 
923:   MatSetSizes(user->Grad,PETSC_DECIDE,PETSC_DECIDE,2*n,n); 
924:   MatSetFromOptions(user->Grad); 
925:   MatMPIAIJSetPreallocation(user->Grad,2,PETSC_NULL,2,PETSC_NULL); 
926:   MatSeqAIJSetPreallocation(user->Grad,2,PETSC_NULL); 
927:   MatGetOwnershipRange(user->Grad,&istart,&iend); 

929:   for (i=istart; i<iend; i++){
930:     if (i<n){
931:       iblock = i / user->mx;
932:       j = iblock*user->mx + ((i+user->mx-1) % user->mx);
933:       MatSetValues(user->Grad,1,&i,1,&j,&half_hinv,INSERT_VALUES); 
934:       j = iblock*user->mx + ((i+1) % user->mx);
935:       MatSetValues(user->Grad,1,&i,1,&j,&neg_half_hinv,INSERT_VALUES); 
936:     }
937:     if (i>=n){
938:       j = (i - user->mx) % n;
939:       MatSetValues(user->Grad,1,&i,1,&j,&half_hinv,INSERT_VALUES); 
940:       j = (j + 2*user->mx) % n;
941:       MatSetValues(user->Grad,1,&i,1,&j,&neg_half_hinv,INSERT_VALUES); 
942:     }
943:   }

945:   MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY); 
946:   MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY); 

948:   MatCreate(PETSC_COMM_WORLD,&user->Gradxy[0]); 
949:   MatSetSizes(user->Gradxy[0],PETSC_DECIDE,PETSC_DECIDE,n,n); 
950:   MatSetFromOptions(user->Gradxy[0]); 
951:   MatMPIAIJSetPreallocation(user->Gradxy[0],2,PETSC_NULL,2,PETSC_NULL); 
952:   MatSeqAIJSetPreallocation(user->Gradxy[0],2,PETSC_NULL); 
953:   MatGetOwnershipRange(user->Gradxy[0],&istart,&iend); 
954:   
955:   for (i=istart; i<iend; i++){
956:     iblock = i / user->mx;
957:     j = iblock*user->mx + ((i+user->mx-1) % user->mx);
958:     MatSetValues(user->Gradxy[0],1,&i,1,&j,&half_hinv,INSERT_VALUES); 
959:     j = iblock*user->mx + ((i+1) % user->mx);
960:     MatSetValues(user->Gradxy[0],1,&i,1,&j,&neg_half_hinv,INSERT_VALUES); 
961:   }
962:   MatAssemblyBegin(user->Gradxy[0],MAT_FINAL_ASSEMBLY); 
963:   MatAssemblyEnd(user->Gradxy[0],MAT_FINAL_ASSEMBLY); 

965:   MatCreate(PETSC_COMM_WORLD,&user->Gradxy[1]); 
966:   MatSetSizes(user->Gradxy[1],PETSC_DECIDE,PETSC_DECIDE,n,n); 
967:   MatSetFromOptions(user->Gradxy[1]); 
968:   MatMPIAIJSetPreallocation(user->Gradxy[1],2,PETSC_NULL,2,PETSC_NULL); 
969:   MatSeqAIJSetPreallocation(user->Gradxy[1],2,PETSC_NULL); 
970:   MatGetOwnershipRange(user->Gradxy[1],&istart,&iend); 

972:   for (i=istart; i<iend; i++){
973:     j = (i + n - user->mx) % n;
974:     MatSetValues(user->Gradxy[1],1,&i,1,&j,&half_hinv,INSERT_VALUES); 
975:     j = (j + 2*user->mx) % n;
976:     MatSetValues(user->Gradxy[1],1,&i,1,&j,&neg_half_hinv,INSERT_VALUES); 
977:   }
978:   MatAssemblyBegin(user->Gradxy[1],MAT_FINAL_ASSEMBLY); 
979:   MatAssemblyEnd(user->Gradxy[1],MAT_FINAL_ASSEMBLY); 

981:   /* Generate Div matrix */
982:   MatTranspose(user->Grad,MAT_INITIAL_MATRIX,&user->Div);
983:   MatTranspose(user->Gradxy[0],MAT_INITIAL_MATRIX,&user->Divxy[0]); 
984:   MatTranspose(user->Gradxy[1],MAT_INITIAL_MATRIX,&user->Divxy[1]); 

986:   /* Off-diagonal averaging matrix */
987:   MatCreate(PETSC_COMM_WORLD,&user->M); 
988:   MatSetSizes(user->M,PETSC_DECIDE,PETSC_DECIDE,n,n); 
989:   MatSetFromOptions(user->M); 
990:   MatMPIAIJSetPreallocation(user->M,4,PETSC_NULL,4,PETSC_NULL); 
991:   MatSeqAIJSetPreallocation(user->M,4,PETSC_NULL); 
992:   MatGetOwnershipRange(user->M,&istart,&iend); 

994:   for (i=istart; i<iend; i++){
995:     /* kron(Id,Av) */
996:     iblock = i / user->mx;
997:     j = iblock*user->mx + ((i+user->mx-1) % user->mx);
998:     MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES); 
999:     j = iblock*user->mx + ((i+1) % user->mx);
1000:     MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES); 
1001:     
1002:     /* kron(Av,Id) */
1003:     j = (i + user->mx) % n;
1004:     MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES); 
1005:     j = (i + n - user->mx) % n;
1006:     MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES); 
1007:   }

1009:   MatAssemblyBegin(user->M,MAT_FINAL_ASSEMBLY); 
1010:   MatAssemblyEnd(user->M,MAT_FINAL_ASSEMBLY); 

1012:   /* Generate 2D grid */
1013:   VecCreate(PETSC_COMM_WORLD,&XX); 
1014:   VecCreate(PETSC_COMM_WORLD,&user->q); 
1015:   VecSetSizes(XX,PETSC_DECIDE,n); 
1016:   VecSetSizes(user->q,PETSC_DECIDE,n*user->nt); 
1017:   VecSetFromOptions(XX); 
1018:   VecSetFromOptions(user->q); 

1020:   VecDuplicate(XX,&YY); 
1021:   VecDuplicate(XX,&XXwork); 
1022:   VecDuplicate(XX,&YYwork); 
1023:   VecDuplicate(XX,&user->d); 
1024:   VecDuplicate(XX,&user->dwork); 

1026:   VecGetOwnershipRange(XX,&istart,&iend); 
1027:   for (linear_index=istart; linear_index<iend; linear_index++){
1028:     i = linear_index % user->mx;
1029:     j = (linear_index-i)/user->mx;
1030:     vx = h*(i+0.5); 
1031:     vy = h*(j+0.5);
1032:     VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES); 
1033:     VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES); 
1034:   }

1036:   VecAssemblyBegin(XX); 
1037:   VecAssemblyEnd(XX); 
1038:   VecAssemblyBegin(YY); 
1039:   VecAssemblyEnd(YY); 

1041:   /* Compute final density function yT
1042:      yT = 1.0 + exp(-30*((x-0.25)^2+(y-0.25)^2)) + exp(-30*((x-0.75)^2+(y-0.75)^2)) 
1043:      yT = yT / (h^2*sum(yT)) */
1044:   VecCopy(XX,XXwork); 
1045:   VecCopy(YY,YYwork); 

1047:   VecShift(XXwork,-0.25); 
1048:   VecShift(YYwork,-0.25); 

1050:   VecPointwiseMult(XXwork,XXwork,XXwork); 
1051:   VecPointwiseMult(YYwork,YYwork,YYwork); 

1053:   VecCopy(XXwork,user->dwork); 
1054:   VecAXPY(user->dwork,1.0,YYwork); 
1055:   VecScale(user->dwork,-30.0); 
1056:   VecExp(user->dwork); 
1057:   VecCopy(user->dwork,user->d); 

1059:   VecCopy(XX,XXwork); 
1060:   VecCopy(YY,YYwork); 

1062:   VecShift(XXwork,-0.75); 
1063:   VecShift(YYwork,-0.75); 

1065:   VecPointwiseMult(XXwork,XXwork,XXwork); 
1066:   VecPointwiseMult(YYwork,YYwork,YYwork); 

1068:   VecCopy(XXwork,user->dwork); 
1069:   VecAXPY(user->dwork,1.0,YYwork); 
1070:   VecScale(user->dwork,-30.0); 
1071:   VecExp(user->dwork); 

1073:   VecAXPY(user->d,1.0,user->dwork); 
1074:   VecShift(user->d,1.0); 
1075:   VecSum(user->d,&sum);   
1076:   VecScale(user->d,1.0/(h*h*sum)); 

1078:   /* Initial conditions of forward problem */
1079:   VecDuplicate(XX,&bc); 
1080:   VecCopy(XX,XXwork); 
1081:   VecCopy(YY,YYwork); 

1083:   VecShift(XXwork,-0.5); 
1084:   VecShift(YYwork,-0.5); 

1086:   VecPointwiseMult(XXwork,XXwork,XXwork); 
1087:   VecPointwiseMult(YYwork,YYwork,YYwork); 

1089:   VecWAXPY(bc,1.0,XXwork,YYwork); 
1090:   VecScale(bc,-50.0); 
1091:   VecExp(bc); 
1092:   VecShift(bc,1.0); 
1093:   VecSum(bc,&sum); 
1094:   VecScale(bc,1.0/(h*h*sum)); 

1096:   /* Create scatter from y to y_1,y_2,...,y_nt */
1097:   /*  TODO: Reorder for better parallelism. (This will require reordering Q and L as well.) */
1098:   PetscMalloc(user->nt*user->mx*user->mx*sizeof(PetscInt),&user->yi_scatter);
1099:   VecCreate(PETSC_COMM_WORLD,&yi); 
1100:   VecSetSizes(yi,PETSC_DECIDE,user->mx*user->mx); 
1101:   VecSetFromOptions(yi); 
1102:   VecDuplicateVecs(yi,user->nt,&user->yi); 
1103:   VecDuplicateVecs(yi,user->nt,&user->yiwork); 
1104:   VecDuplicateVecs(yi,user->nt,&user->ziwork); 
1105:   for (i=0; i<user->nt; i++){
1106:     VecGetOwnershipRange(user->yi[i],&lo,&hi); 
1107:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_yi); 
1108:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+i*user->mx*user->mx,1,&is_from_y); 
1109:     VecScatterCreate(user->y,is_from_y,user->yi[i],is_to_yi,&user->yi_scatter[i]); 
1110:     ISDestroy(&is_to_yi); 
1111:     ISDestroy(&is_from_y); 
1112:   }

1114:   /* Create scatter from u to ux_1,uy_1,ux_2,uy_2,...,ux_nt,uy_nt */
1115:   /*  TODO: reorder for better parallelism */
1116:   PetscMalloc(user->nt*user->mx*user->mx*sizeof(PetscInt),&user->uxi_scatter); 
1117:   PetscMalloc(user->nt*user->mx*user->mx*sizeof(PetscInt),&user->uyi_scatter); 
1118:   PetscMalloc(user->nt*user->mx*user->mx*sizeof(PetscInt),&user->ux_scatter); 
1119:   PetscMalloc(user->nt*user->mx*user->mx*sizeof(PetscInt),&user->uy_scatter); 
1120:   PetscMalloc(2*user->nt*user->mx*user->mx*sizeof(PetscInt),&user->ui_scatter); 
1121:   VecCreate(PETSC_COMM_WORLD,&uxi); 
1122:   VecCreate(PETSC_COMM_WORLD,&ui); 
1123:   VecSetSizes(uxi,PETSC_DECIDE,user->mx*user->mx); 
1124:   VecSetSizes(ui,PETSC_DECIDE,2*user->mx*user->mx); 
1125:   VecSetFromOptions(uxi); 
1126:   VecSetFromOptions(ui); 
1127:   VecDuplicateVecs(uxi,user->nt,&user->uxi); 
1128:   VecDuplicateVecs(uxi,user->nt,&user->uyi); 
1129:   VecDuplicateVecs(uxi,user->nt,&user->uxiwork); 
1130:   VecDuplicateVecs(uxi,user->nt,&user->uyiwork); 
1131:   VecDuplicateVecs(ui,user->nt,&user->ui); 
1132:   VecDuplicateVecs(ui,user->nt,&user->uiwork); 
1133:   for (i=0; i<user->nt; i++){
1134:     VecGetOwnershipRange(user->uxi[i],&lo,&hi); 
1135:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uxi); 
1136:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+2*i*user->mx*user->mx,1,&is_from_u); 
1137:     VecScatterCreate(user->u,is_from_u,user->uxi[i],is_to_uxi,&user->uxi_scatter[i]); 

1139:     ISDestroy(&is_to_uxi); 
1140:     ISDestroy(&is_from_u); 

1142:     VecGetOwnershipRange(user->uyi[i],&lo,&hi); 
1143:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uyi); 
1144:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+(2*i+1)*user->mx*user->mx,1,&is_from_u); 
1145:     VecScatterCreate(user->u,is_from_u,user->uyi[i],is_to_uyi,&user->uyi_scatter[i]); 

1147:     ISDestroy(&is_to_uyi); 
1148:     ISDestroy(&is_from_u); 

1150:     VecGetOwnershipRange(user->uxi[i],&lo,&hi); 
1151:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uxi); 
1152:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_from_u); 
1153:     VecScatterCreate(user->ui[i],is_from_u,user->uxi[i],is_to_uxi,&user->ux_scatter[i]);    

1155:     ISDestroy(&is_to_uxi); 
1156:     ISDestroy(&is_from_u); 

1158:     VecGetOwnershipRange(user->uyi[i],&lo,&hi); 
1159:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uyi); 
1160:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+user->mx*user->mx,1,&is_from_u); 
1161:     VecScatterCreate(user->ui[i],is_from_u,user->uyi[i],is_to_uyi,&user->uy_scatter[i]); 

1163:     ISDestroy(&is_to_uyi); 
1164:     ISDestroy(&is_from_u); 

1166:     VecGetOwnershipRange(user->ui[i],&lo,&hi); 
1167:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uxi); 
1168:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+2*i*user->mx*user->mx,1,&is_from_u); 
1169:     VecScatterCreate(user->u,is_from_u,user->ui[i],is_to_uxi,&user->ui_scatter[i]); 

1171:     ISDestroy(&is_to_uxi); 
1172:     ISDestroy(&is_from_u); 
1173:   }

1175:   /* RHS of forward problem */
1176:   MatMult(user->M,bc,user->yiwork[0]);  
1177:   for (i=1; i<user->nt; i++){
1178:     VecSet(user->yiwork[i],0.0); 
1179:   }
1180:   Gather_yi(user->q,user->yiwork,user->yi_scatter,user->nt); 

1182:   /* Compute true velocity field utrue */
1183:   VecDuplicate(user->u,&user->utrue); 
1184:   for (i=0; i<user->nt; i++){
1185:     VecCopy(YY,user->uxi[i]); 
1186:     VecScale(user->uxi[i],150.0*i*user->ht); 
1187:     VecCopy(XX,user->uyi[i]); 
1188:     VecShift(user->uyi[i],-10.0); 
1189:     VecScale(user->uyi[i],15.0*i*user->ht); 
1190:   }
1191:   Gather_uxi_uyi(user->utrue,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt); 
1192:  
1193:   /* Initial guess and reference model */
1194:   VecDuplicate(user->utrue,&user->ur); 
1195:   for (i=0; i<user->nt; i++){
1196:     VecCopy(XX,user->uxi[i]); 
1197:     VecShift(user->uxi[i],i*user->ht); 
1198:     VecCopy(YY,user->uyi[i]); 
1199:     VecShift(user->uyi[i],-i*user->ht); 
1200:   }
1201:   Gather_uxi_uyi(user->ur,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);   

1203:   /* Generate regularization matrix L */
1204:   MatCreate(PETSC_COMM_WORLD,&user->LT); 
1205:   MatSetSizes(user->LT,PETSC_DECIDE,PETSC_DECIDE,2*n*user->nt,n*user->nt); 
1206:   MatSetFromOptions(user->LT); 
1207:   MatMPIAIJSetPreallocation(user->LT,1,PETSC_NULL,1,PETSC_NULL); 
1208:   MatSeqAIJSetPreallocation(user->LT,1,PETSC_NULL); 
1209:   MatGetOwnershipRange(user->LT,&istart,&iend);

1211:   for (i=istart; i<iend; i++){
1212:     iblock = (i+n) / (2*n);
1213:     j = i - iblock*n;
1214:     MatSetValues(user->LT,1,&i,1,&j,&user->gamma,INSERT_VALUES); 
1215:   }

1217:   MatAssemblyBegin(user->LT,MAT_FINAL_ASSEMBLY); 
1218:   MatAssemblyEnd(user->LT,MAT_FINAL_ASSEMBLY); 

1220:   MatTranspose(user->LT,MAT_INITIAL_MATRIX,&user->L); 

1222:   /* Build work vectors and matrices */
1223:   VecCreate(PETSC_COMM_WORLD,&user->lwork); 
1224:   VecSetType(user->lwork,VECMPI); 
1225:   VecSetSizes(user->lwork,PETSC_DECIDE,user->m);  
1226:   VecSetFromOptions(user->lwork); 

1228:   MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork); 

1230:   VecDuplicate(user->y,&user->ywork); 
1231:   VecDuplicate(user->u,&user->uwork); 
1232:   VecDuplicate(user->u,&user->vwork); 
1233:   VecDuplicate(user->u,&user->js_diag); 
1234:   VecDuplicate(user->c,&user->cwork); 

1236:   /* Create matrix-free shell user->Js for computing A*x */
1237:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->Js); 
1238:   MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult); 
1239:   MatShellSetOperation(user->Js,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate); 
1240:   MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose); 
1241:   MatShellSetOperation(user->Js,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal); 

1243:   /* Diagonal blocks of user->Js */
1244:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,n,n,user,&user->JsBlock); 
1245:   MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateMatBlockMult); 
1246:   MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockMultTranspose); 

1248:   /* Create a matrix-free shell user->JsBlockPrec for computing (U+D)\D*(L+D)\x, where JsBlock = L+D+U,
1249:      D is diagonal, L is strictly lower triangular, and U is strictly upper triangular.
1250:      This is an SOR preconditioner for user->JsBlock. */
1251:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,n,n,user,&user->JsBlockPrec); 
1252:   MatShellSetOperation(user->JsBlockPrec,MATOP_MULT,(void(*)(void))StateMatBlockPrecMult);  
1253:   MatShellSetOperation(user->JsBlockPrec,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockPrecMultTranspose); 
1254:   
1255:   /* Create a matrix-free shell user->Jd for computing B*x */
1256:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->n-user->m,user,&user->Jd); 
1257:   MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult); 
1258:   MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose); 

1260:   /* User-defined routines for computing user->Js\x and user->Js^T\x*/
1261:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->JsInv); 
1262:   MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateMatInvMult); 
1263:   MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatInvTransposeMult); 


1266:   /* Build matrices for SOR preconditioner */
1267:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt); 
1268:   MatShift(user->Divxy[0],0.0);  /*  Force C[i] and Divxy[0] to share same nonzero pattern */
1269:   MatAXPY(user->Divxy[0],0.0,user->Divxy[1],DIFFERENT_NONZERO_PATTERN); 
1270:   PetscMalloc(5*n*sizeof(PetscReal),&user->C);
1271:   PetscMalloc(2*n*sizeof(PetscReal),&user->Cwork);
1272:   for (i=0; i<user->nt; i++){
1273:     MatDuplicate(user->Divxy[0],MAT_COPY_VALUES,&user->C[i]); 
1274:     MatDuplicate(user->Divxy[1],MAT_COPY_VALUES,&user->Cwork[i]); 

1276:     MatDiagonalScale(user->C[i],PETSC_NULL,user->uxi[i]); 
1277:     MatDiagonalScale(user->Cwork[i],PETSC_NULL,user->uyi[i]); 
1278:     MatAXPY(user->C[i],1.0,user->Cwork[i],SUBSET_NONZERO_PATTERN); 
1279:     MatScale(user->C[i],user->ht); 
1280:     MatShift(user->C[i],1.0); 
1281:   }

1283:   /* Solver options and tolerances */
1284:   KSPCreate(PETSC_COMM_WORLD,&user->solver); 
1285:   KSPSetType(user->solver,KSPGMRES); 
1286:   KSPSetOperators(user->solver,user->JsBlock,user->JsBlockPrec,DIFFERENT_NONZERO_PATTERN); 
1287:   KSPSetInitialGuessNonzero(user->solver,PETSC_FALSE);  /*  TODO: why is true slower? */
1288:   KSPSetTolerances(user->solver,1e-4,1e-20,1e3,500); 
1289:   /* KSPSetTolerances(user->solver,1e-8,1e-16,1e3,500);  */
1290:   KSPGetPC(user->solver,&user->prec); 
1291:   PCSetType(user->prec,PCSHELL); 

1293:   PCShellSetApply(user->prec,StateMatBlockPrecMult); 
1294:   PCShellSetApplyTranspose(user->prec,StateMatBlockPrecMultTranspose); 
1295:   PCShellSetContext(user->prec,user); 

1297:   /* Compute true state function yt given ut */
1298:   VecCreate(PETSC_COMM_WORLD,&user->ytrue); 
1299:   VecSetSizes(user->ytrue,PETSC_DECIDE,n*user->nt); 
1300:   VecSetFromOptions(user->ytrue); 
1301:   user->c_formed = PETSC_TRUE; 
1302:   VecCopy(user->utrue,user->u); /*  Set u=utrue temporarily for StateMatInv */
1303:   VecSet(user->ytrue,0.0);  /*  Initial guess */
1304:   StateMatInvMult(user->Js,user->q,user->ytrue); 
1305:   VecCopy(user->ur,user->u); /*  Reset u=ur */

1307:   /* Initial guess y0 for state given u0 */
1308:   user->lcl->solve_type = LCL_FORWARD1;
1309:   StateMatInvMult(user->Js,user->q,user->y); 

1311:   /* Data discretization */
1312:   MatCreate(PETSC_COMM_WORLD,&user->Q); 
1313:   MatSetSizes(user->Q,PETSC_DECIDE,PETSC_DECIDE,user->mx*user->mx,user->m); 
1314:   MatSetFromOptions(user->Q); 
1315:   MatMPIAIJSetPreallocation(user->Q,0,PETSC_NULL,1,PETSC_NULL); 
1316:   MatSeqAIJSetPreallocation(user->Q,1,PETSC_NULL); 

1318:   MatGetOwnershipRange(user->Q,&istart,&iend); 

1320:   for (i=istart; i<iend; i++){
1321:     j = i + user->m - user->mx*user->mx;
1322:     MatSetValues(user->Q,1,&i,1,&j,&one,INSERT_VALUES); 
1323:   }

1325:   MatAssemblyBegin(user->Q,MAT_FINAL_ASSEMBLY); 
1326:   MatAssemblyEnd(user->Q,MAT_FINAL_ASSEMBLY); 

1328:   MatTranspose(user->Q,MAT_INITIAL_MATRIX,&user->QT); 

1330:   VecDestroy(&XX); 
1331:   VecDestroy(&YY); 
1332:   VecDestroy(&XXwork); 
1333:   VecDestroy(&YYwork); 
1334:   VecDestroy(&yi); 
1335:   VecDestroy(&uxi); 
1336:   VecDestroy(&ui); 
1337:   VecDestroy(&bc); 

1339:   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1340:   KSPSetFromOptions(user->solver); 


1343:   return(0);
1344: }

1348: PetscErrorCode HyperbolicDestroy(AppCtx *user)
1349: {
1351:   PetscInt i;
1353:   MatDestroy(&user->Q); 
1354:   MatDestroy(&user->QT); 
1355:   MatDestroy(&user->Div); 
1356:   MatDestroy(&user->Divwork); 
1357:   MatDestroy(&user->Grad); 
1358:   MatDestroy(&user->L); 
1359:   MatDestroy(&user->LT); 
1360:   KSPDestroy(&user->solver); 
1361:   MatDestroy(&user->Js); 
1362:   MatDestroy(&user->Jd); 
1363:   MatDestroy(&user->JsBlockPrec); 
1364:   MatDestroy(&user->JsInv); 
1365:   MatDestroy(&user->JsBlock); 
1366:   MatDestroy(&user->Divxy[0]); 
1367:   MatDestroy(&user->Divxy[1]); 
1368:   MatDestroy(&user->Gradxy[0]); 
1369:   MatDestroy(&user->Gradxy[1]); 
1370:   MatDestroy(&user->M); 
1371:   for (i=0; i<user->nt; i++){
1372:     MatDestroy(&user->C[i]); 
1373:     MatDestroy(&user->Cwork[i]); 
1374:   }
1375:   PetscFree(user->C); 
1376:   PetscFree(user->Cwork); 
1377:   VecDestroy(&user->u); 
1378:   VecDestroy(&user->uwork); 
1379:   VecDestroy(&user->vwork); 
1380:   VecDestroy(&user->utrue); 
1381:   VecDestroy(&user->y); 
1382:   VecDestroy(&user->ywork); 
1383:   VecDestroy(&user->ytrue);  
1384:   VecDestroyVecs(user->nt,&user->yi); 
1385:   VecDestroyVecs(user->nt,&user->yiwork); 
1386:   VecDestroyVecs(user->nt,&user->ziwork); 
1387:   VecDestroyVecs(user->nt,&user->uxi); 
1388:   VecDestroyVecs(user->nt,&user->uyi); 
1389:   VecDestroyVecs(user->nt,&user->uxiwork); 
1390:   VecDestroyVecs(user->nt,&user->uyiwork); 
1391:   VecDestroyVecs(user->nt,&user->ui); 
1392:   VecDestroyVecs(user->nt,&user->uiwork); 
1393:   VecDestroy(&user->c); 
1394:   VecDestroy(&user->cwork); 
1395:   VecDestroy(&user->ur); 
1396:   VecDestroy(&user->q); 
1397:   VecDestroy(&user->d); 
1398:   VecDestroy(&user->dwork); 
1399:   VecDestroy(&user->lwork); 
1400:   VecDestroy(&user->js_diag); 
1401:   ISDestroy(&user->s_is); 
1402:   ISDestroy(&user->d_is); 
1403:   VecScatterDestroy(&user->state_scatter); 
1404:   VecScatterDestroy(&user->design_scatter); 
1405:   for (i=0; i<user->nt; i++){
1406:     VecScatterDestroy(&user->uxi_scatter[i]); 
1407:     VecScatterDestroy(&user->uyi_scatter[i]); 
1408:     VecScatterDestroy(&user->ux_scatter[i]); 
1409:     VecScatterDestroy(&user->uy_scatter[i]); 
1410:     VecScatterDestroy(&user->ui_scatter[i]); 
1411:     VecScatterDestroy(&user->yi_scatter[i]); 
1412:   }
1413:   PetscFree(user->uxi_scatter); 
1414:   PetscFree(user->uyi_scatter); 
1415:   PetscFree(user->ux_scatter); 
1416:   PetscFree(user->uy_scatter); 
1417:   PetscFree(user->ui_scatter); 
1418:   PetscFree(user->yi_scatter); 
1419:   return(0);
1420: }

1424: PetscErrorCode HyperbolicMonitor(TaoSolver tao, void *ptr)
1425: {
1427:   Vec X;
1428:   PetscReal unorm,ynorm;
1429:   AppCtx *user = (AppCtx*)ptr;
1431:   TaoGetSolutionVector(tao,&X); 
1432:   Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter); 
1433:   VecAXPY(user->ywork,-1.0,user->ytrue); 
1434:   VecAXPY(user->uwork,-1.0,user->utrue); 
1435:   VecNorm(user->uwork,NORM_2,&unorm); 
1436:   VecNorm(user->ywork,NORM_2,&ynorm); 
1437:   PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%G ||y-yt||=%G\n",unorm,ynorm); 
1438:   return(0);
1439: }