Actual source code: chwirut2.c
1: /*
2: Include "tao.h" so that we can use TAO solvers. Note that this
3: file automatically includes libraries such as:
4: petsc.h - base PETSc routines petscvec.h - vectors
5: petscsys.h - sysem routines petscmat.h - matrices
6: petscis.h - index sets petscksp.h - Krylov subspace methods
7: petscviewer.h - viewers petscpc.h - preconditioners
9: */
11: #include "taosolver.h"
12: #include "mpi.h"
15: /*
16: Description: These data are the result of a NIST study involving
17: ultrasonic calibration. The response variable is
18: ultrasonic response, and the predictor variable is
19: metal distance.
21: Reference: Chwirut, D., NIST (197?).
22: Ultrasonic Reference Block Study.
23: */
27: static char help[]="Finds the nonlinear least-squares solution to the model \n\
28: y = exp[-b1*x]/(b2+b3*x) + e \n";
32: /* T
33: Concepts: TAO - Solving a system of nonlinear equations, nonlinear ;east squares
34: Routines: TaoInitialize(); TaoFinalize();
35: Routines: TaoCreate();
36: Routines: TaoSetType();
37: Routines: TaoSetSeparableObjectiveRoutine();
38: Routines: TaoSetMonitor();
39: Routines: TaoSetInitialVector();
40: Routines: TaoSetFromOptions();
41: Routines: TaoSolve();
42: Routines: TaoDestroy();
43: Processors: n
44: T*/
46: #define NOBSERVATIONS 214
47: #define NPARAMETERS 3
49: #define DIE_TAG 2000
50: #define IDLE_TAG 1000
53: /* User-defined application context */
54: typedef struct {
55: /* Working space */
56: PetscReal t[NOBSERVATIONS]; /* array of independent variables of observation */
57: PetscReal y[NOBSERVATIONS]; /* array of dependent variables */
58: PetscMPIInt size,rank;
59: } AppCtx;
61: /* User provided Routines */
62: PetscErrorCode InitializeData(AppCtx *user);
63: PetscErrorCode FormStartingPoint(Vec);
64: PetscErrorCode EvaluateFunction(TaoSolver, Vec, Vec, void *);
65: PetscErrorCode MyMonitor(TaoSolver, void*);
66: PetscErrorCode TaskWorker(AppCtx *user);
67: PetscErrorCode StopWorkers(AppCtx *user);
68: PetscErrorCode RunSimulation(PetscReal *x, PetscInt i, PetscReal*f, AppCtx *user);
70: /*--------------------------------------------------------------------*/
73: int main(int argc,char **argv)
74: {
76: Vec x, f; /* solution, function */
77: TaoSolver tao; /* TaoSolver solver context */
78: AppCtx user; /* user-defined work context */
80: /* Initialize TAO and PETSc */
81: PetscInitialize(&argc,&argv,(char *)0,help);
82: TaoInitialize(&argc,&argv,(char *)0,help);
84: MPI_Comm_size(MPI_COMM_WORLD,&user.size);
85: MPI_Comm_rank(MPI_COMM_WORLD,&user.rank);
86: InitializeData(&user);
88: /* Run optimization on rank 0 */
89: if (user.rank == 0) {
90: /* Allocate vectors */
91: VecCreateSeq(PETSC_COMM_SELF,NPARAMETERS,&x);
92: VecCreateSeq(PETSC_COMM_SELF,NOBSERVATIONS,&f);
94: /* TAO code begins here */
96: /* Create TAO solver and set desired solution method */
97: TaoCreate(PETSC_COMM_SELF,&tao);
98: TaoSetType(tao,"tao_pounders");
100: /* Set the function and Jacobian routines. */
101: FormStartingPoint(x);
102: TaoSetInitialVector(tao,x);
103: TaoSetSeparableObjectiveRoutine(tao,f,EvaluateFunction,(void*)&user);
104: TaoSetMonitor(tao,MyMonitor,&user,PETSC_NULL);
106:
107: /* Check for any TAO command line arguments */
108: TaoSetFromOptions(tao);
110: /* Perform the Solve */
111: TaoSolve(tao);
112:
113: /* Free TAO data structures */
114: TaoDestroy(&tao);
116: /* Free PETSc data structures */
117: VecDestroy(&x);
118: VecDestroy(&f);
119: StopWorkers(&user);
120: } else {
121: TaskWorker(&user);
122: }
124: /* Finalize TAO */
125: TaoFinalize();
126: PetscFinalize();
127:
128: return 0;
129: }
134: /*--------------------------------------------------------------------*/
137: PetscErrorCode EvaluateFunction(TaoSolver tao, Vec X, Vec F, void *ptr)
138: {
139: AppCtx *user = (AppCtx *)ptr;
140: PetscInt i;
141: PetscReal *x,*f;
145: VecGetArray(X,&x);
146: VecGetArray(F,&f);
149: if (user->size == 1) {
150: /* Single processor */
151: for (i=0;i<NOBSERVATIONS;i++) {
152: RunSimulation(x,i,&f[i],user);
153: }
154: } else {
155: /* Multiprocessor master */
156: PetscMPIInt tag;
157: PetscInt finishedtasks,next_task,checkedin;
158: PetscReal f_i;
159: MPI_Status status;
160:
161: next_task=0;
162: finishedtasks=0;
163: checkedin=0;
164:
165: while(finishedtasks < NOBSERVATIONS || checkedin < user->size-1) {
166: MPI_Recv(&f_i,1,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&status);
167: if (status.MPI_TAG == IDLE_TAG) {
168: checkedin++;
169: } else {
171: tag = status.MPI_TAG;
172: f[tag] = (PetscReal)f_i;
173: finishedtasks++;
174: }
176: if (next_task<NOBSERVATIONS) {
177: MPI_Send(x,NPARAMETERS,MPIU_REAL,status.MPI_SOURCE,next_task,PETSC_COMM_WORLD);
178: next_task++;
180: } else {
181: /* Send idle message */
182: MPI_Send(x,NPARAMETERS,MPIU_REAL,status.MPI_SOURCE,IDLE_TAG,PETSC_COMM_WORLD);
183: }
184: }
185: }
186:
187: VecRestoreArray(X,&x);
188: VecRestoreArray(F,&f);
189: PetscLogFlops(6*NOBSERVATIONS);
190: return(0);
191: }
193: /* ------------------------------------------------------------ */
196: PetscErrorCode FormStartingPoint(Vec X)
197: {
198: PetscReal *x;
200:
203: VecGetArray(X,&x);
205: x[0] = 0.15;
206: x[1] = 0.008;
207: x[2] = 0.010;
209: VecRestoreArray(X,&x);
211: return(0);
212: }
215: /* ---------------------------------------------------------------------- */
218: PetscErrorCode InitializeData(AppCtx *user)
219: {
220: PetscReal *t=user->t,*y=user->y;
221: PetscInt i=0;
226: y[i] = 92.9000; t[i++] = 0.5000;
227: y[i] = 78.7000; t[i++] = 0.6250;
228: y[i] = 64.2000; t[i++] = 0.7500;
229: y[i] = 64.9000; t[i++] = 0.8750;
230: y[i] = 57.1000; t[i++] = 1.0000;
231: y[i] = 43.3000; t[i++] = 1.2500;
232: y[i] = 31.1000; t[i++] = 1.7500;
233: y[i] = 23.6000; t[i++] = 2.2500;
234: y[i] = 31.0500; t[i++] = 1.7500;
235: y[i] = 23.7750; t[i++] = 2.2500;
236: y[i] = 17.7375; t[i++] = 2.7500;
237: y[i] = 13.8000; t[i++] = 3.2500;
238: y[i] = 11.5875; t[i++] = 3.7500;
239: y[i] = 9.4125; t[i++] = 4.2500;
240: y[i] = 7.7250; t[i++] = 4.7500;
241: y[i] = 7.3500; t[i++] = 5.2500;
242: y[i] = 8.0250; t[i++] = 5.7500;
243: y[i] = 90.6000; t[i++] = 0.5000;
244: y[i] = 76.9000; t[i++] = 0.6250;
245: y[i] = 71.6000; t[i++] = 0.7500;
246: y[i] = 63.6000; t[i++] = 0.8750;
247: y[i] = 54.0000; t[i++] = 1.0000;
248: y[i] = 39.2000; t[i++] = 1.2500;
249: y[i] = 29.3000; t[i++] = 1.7500;
250: y[i] = 21.4000; t[i++] = 2.2500;
251: y[i] = 29.1750; t[i++] = 1.7500;
252: y[i] = 22.1250; t[i++] = 2.2500;
253: y[i] = 17.5125; t[i++] = 2.7500;
254: y[i] = 14.2500; t[i++] = 3.2500;
255: y[i] = 9.4500; t[i++] = 3.7500;
256: y[i] = 9.1500; t[i++] = 4.2500;
257: y[i] = 7.9125; t[i++] = 4.7500;
258: y[i] = 8.4750; t[i++] = 5.2500;
259: y[i] = 6.1125; t[i++] = 5.7500;
260: y[i] = 80.0000; t[i++] = 0.5000;
261: y[i] = 79.0000; t[i++] = 0.6250;
262: y[i] = 63.8000; t[i++] = 0.7500;
263: y[i] = 57.2000; t[i++] = 0.8750;
264: y[i] = 53.2000; t[i++] = 1.0000;
265: y[i] = 42.5000; t[i++] = 1.2500;
266: y[i] = 26.8000; t[i++] = 1.7500;
267: y[i] = 20.4000; t[i++] = 2.2500;
268: y[i] = 26.8500; t[i++] = 1.7500;
269: y[i] = 21.0000; t[i++] = 2.2500;
270: y[i] = 16.4625; t[i++] = 2.7500;
271: y[i] = 12.5250; t[i++] = 3.2500;
272: y[i] = 10.5375; t[i++] = 3.7500;
273: y[i] = 8.5875; t[i++] = 4.2500;
274: y[i] = 7.1250; t[i++] = 4.7500;
275: y[i] = 6.1125; t[i++] = 5.2500;
276: y[i] = 5.9625; t[i++] = 5.7500;
277: y[i] = 74.1000; t[i++] = 0.5000;
278: y[i] = 67.3000; t[i++] = 0.6250;
279: y[i] = 60.8000; t[i++] = 0.7500;
280: y[i] = 55.5000; t[i++] = 0.8750;
281: y[i] = 50.3000; t[i++] = 1.0000;
282: y[i] = 41.0000; t[i++] = 1.2500;
283: y[i] = 29.4000; t[i++] = 1.7500;
284: y[i] = 20.4000; t[i++] = 2.2500;
285: y[i] = 29.3625; t[i++] = 1.7500;
286: y[i] = 21.1500; t[i++] = 2.2500;
287: y[i] = 16.7625; t[i++] = 2.7500;
288: y[i] = 13.2000; t[i++] = 3.2500;
289: y[i] = 10.8750; t[i++] = 3.7500;
290: y[i] = 8.1750; t[i++] = 4.2500;
291: y[i] = 7.3500; t[i++] = 4.7500;
292: y[i] = 5.9625; t[i++] = 5.2500;
293: y[i] = 5.6250; t[i++] = 5.7500;
294: y[i] = 81.5000; t[i++] = .5000;
295: y[i] = 62.4000; t[i++] = .7500;
296: y[i] = 32.5000; t[i++] = 1.5000;
297: y[i] = 12.4100; t[i++] = 3.0000;
298: y[i] = 13.1200; t[i++] = 3.0000;
299: y[i] = 15.5600; t[i++] = 3.0000;
300: y[i] = 5.6300; t[i++] = 6.0000;
301: y[i] = 78.0000; t[i++] = .5000;
302: y[i] = 59.9000; t[i++] = .7500;
303: y[i] = 33.2000; t[i++] = 1.5000;
304: y[i] = 13.8400; t[i++] = 3.0000;
305: y[i] = 12.7500; t[i++] = 3.0000;
306: y[i] = 14.6200; t[i++] = 3.0000;
307: y[i] = 3.9400; t[i++] = 6.0000;
308: y[i] = 76.8000; t[i++] = .5000;
309: y[i] = 61.0000; t[i++] = .7500;
310: y[i] = 32.9000; t[i++] = 1.5000;
311: y[i] = 13.8700; t[i++] = 3.0000;
312: y[i] = 11.8100; t[i++] = 3.0000;
313: y[i] = 13.3100; t[i++] = 3.0000;
314: y[i] = 5.4400; t[i++] = 6.0000;
315: y[i] = 78.0000; t[i++] = .5000;
316: y[i] = 63.5000; t[i++] = .7500;
317: y[i] = 33.8000; t[i++] = 1.5000;
318: y[i] = 12.5600; t[i++] = 3.0000;
319: y[i] = 5.6300; t[i++] = 6.0000;
320: y[i] = 12.7500; t[i++] = 3.0000;
321: y[i] = 13.1200; t[i++] = 3.0000;
322: y[i] = 5.4400; t[i++] = 6.0000;
323: y[i] = 76.8000; t[i++] = .5000;
324: y[i] = 60.0000; t[i++] = .7500;
325: y[i] = 47.8000; t[i++] = 1.0000;
326: y[i] = 32.0000; t[i++] = 1.5000;
327: y[i] = 22.2000; t[i++] = 2.0000;
328: y[i] = 22.5700; t[i++] = 2.0000;
329: y[i] = 18.8200; t[i++] = 2.5000;
330: y[i] = 13.9500; t[i++] = 3.0000;
331: y[i] = 11.2500; t[i++] = 4.0000;
332: y[i] = 9.0000; t[i++] = 5.0000;
333: y[i] = 6.6700; t[i++] = 6.0000;
334: y[i] = 75.8000; t[i++] = .5000;
335: y[i] = 62.0000; t[i++] = .7500;
336: y[i] = 48.8000; t[i++] = 1.0000;
337: y[i] = 35.2000; t[i++] = 1.5000;
338: y[i] = 20.0000; t[i++] = 2.0000;
339: y[i] = 20.3200; t[i++] = 2.0000;
340: y[i] = 19.3100; t[i++] = 2.5000;
341: y[i] = 12.7500; t[i++] = 3.0000;
342: y[i] = 10.4200; t[i++] = 4.0000;
343: y[i] = 7.3100; t[i++] = 5.0000;
344: y[i] = 7.4200; t[i++] = 6.0000;
345: y[i] = 70.5000; t[i++] = .5000;
346: y[i] = 59.5000; t[i++] = .7500;
347: y[i] = 48.5000; t[i++] = 1.0000;
348: y[i] = 35.8000; t[i++] = 1.5000;
349: y[i] = 21.0000; t[i++] = 2.0000;
350: y[i] = 21.6700; t[i++] = 2.0000;
351: y[i] = 21.0000; t[i++] = 2.5000;
352: y[i] = 15.6400; t[i++] = 3.0000;
353: y[i] = 8.1700; t[i++] = 4.0000;
354: y[i] = 8.5500; t[i++] = 5.0000;
355: y[i] = 10.1200; t[i++] = 6.0000;
356: y[i] = 78.0000; t[i++] = .5000;
357: y[i] = 66.0000; t[i++] = .6250;
358: y[i] = 62.0000; t[i++] = .7500;
359: y[i] = 58.0000; t[i++] = .8750;
360: y[i] = 47.7000; t[i++] = 1.0000;
361: y[i] = 37.8000; t[i++] = 1.2500;
362: y[i] = 20.2000; t[i++] = 2.2500;
363: y[i] = 21.0700; t[i++] = 2.2500;
364: y[i] = 13.8700; t[i++] = 2.7500;
365: y[i] = 9.6700; t[i++] = 3.2500;
366: y[i] = 7.7600; t[i++] = 3.7500;
367: y[i] = 5.4400; t[i++] = 4.2500;
368: y[i] = 4.8700; t[i++] = 4.7500;
369: y[i] = 4.0100; t[i++] = 5.2500;
370: y[i] = 3.7500; t[i++] = 5.7500;
371: y[i] = 24.1900; t[i++] = 3.0000;
372: y[i] = 25.7600; t[i++] = 3.0000;
373: y[i] = 18.0700; t[i++] = 3.0000;
374: y[i] = 11.8100; t[i++] = 3.0000;
375: y[i] = 12.0700; t[i++] = 3.0000;
376: y[i] = 16.1200; t[i++] = 3.0000;
377: y[i] = 70.8000; t[i++] = .5000;
378: y[i] = 54.7000; t[i++] = .7500;
379: y[i] = 48.0000; t[i++] = 1.0000;
380: y[i] = 39.8000; t[i++] = 1.5000;
381: y[i] = 29.8000; t[i++] = 2.0000;
382: y[i] = 23.7000; t[i++] = 2.5000;
383: y[i] = 29.6200; t[i++] = 2.0000;
384: y[i] = 23.8100; t[i++] = 2.5000;
385: y[i] = 17.7000; t[i++] = 3.0000;
386: y[i] = 11.5500; t[i++] = 4.0000;
387: y[i] = 12.0700; t[i++] = 5.0000;
388: y[i] = 8.7400; t[i++] = 6.0000;
389: y[i] = 80.7000; t[i++] = .5000;
390: y[i] = 61.3000; t[i++] = .7500;
391: y[i] = 47.5000; t[i++] = 1.0000;
392: y[i] = 29.0000; t[i++] = 1.5000;
393: y[i] = 24.0000; t[i++] = 2.0000;
394: y[i] = 17.7000; t[i++] = 2.5000;
395: y[i] = 24.5600; t[i++] = 2.0000;
396: y[i] = 18.6700; t[i++] = 2.5000;
397: y[i] = 16.2400; t[i++] = 3.0000;
398: y[i] = 8.7400; t[i++] = 4.0000;
399: y[i] = 7.8700; t[i++] = 5.0000;
400: y[i] = 8.5100; t[i++] = 6.0000;
401: y[i] = 66.7000; t[i++] = .5000;
402: y[i] = 59.2000; t[i++] = .7500;
403: y[i] = 40.8000; t[i++] = 1.0000;
404: y[i] = 30.7000; t[i++] = 1.5000;
405: y[i] = 25.7000; t[i++] = 2.0000;
406: y[i] = 16.3000; t[i++] = 2.5000;
407: y[i] = 25.9900; t[i++] = 2.0000;
408: y[i] = 16.9500; t[i++] = 2.5000;
409: y[i] = 13.3500; t[i++] = 3.0000;
410: y[i] = 8.6200; t[i++] = 4.0000;
411: y[i] = 7.2000; t[i++] = 5.0000;
412: y[i] = 6.6400; t[i++] = 6.0000;
413: y[i] = 13.6900; t[i++] = 3.0000;
414: y[i] = 81.0000; t[i++] = .5000;
415: y[i] = 64.5000; t[i++] = .7500;
416: y[i] = 35.5000; t[i++] = 1.5000;
417: y[i] = 13.3100; t[i++] = 3.0000;
418: y[i] = 4.8700; t[i++] = 6.0000;
419: y[i] = 12.9400; t[i++] = 3.0000;
420: y[i] = 5.0600; t[i++] = 6.0000;
421: y[i] = 15.1900; t[i++] = 3.0000;
422: y[i] = 14.6200; t[i++] = 3.0000;
423: y[i] = 15.6400; t[i++] = 3.0000;
424: y[i] = 25.5000; t[i++] = 1.7500;
425: y[i] = 25.9500; t[i++] = 1.7500;
426: y[i] = 81.7000; t[i++] = .5000;
427: y[i] = 61.6000; t[i++] = .7500;
428: y[i] = 29.8000; t[i++] = 1.7500;
429: y[i] = 29.8100; t[i++] = 1.7500;
430: y[i] = 17.1700; t[i++] = 2.7500;
431: y[i] = 10.3900; t[i++] = 3.7500;
432: y[i] = 28.4000; t[i++] = 1.7500;
433: y[i] = 28.6900; t[i++] = 1.7500;
434: y[i] = 81.3000; t[i++] = .5000;
435: y[i] = 60.9000; t[i++] = .7500;
436: y[i] = 16.6500; t[i++] = 2.7500;
437: y[i] = 10.0500; t[i++] = 3.7500;
438: y[i] = 28.9000; t[i++] = 1.7500;
439: y[i] = 28.9500; t[i++] = 1.7500;
441: return(0);
442: }
446: PetscErrorCode MyMonitor(TaoSolver tao, void *ptr)
447: {
448: PetscReal fc,gnorm;
449: PetscInt its;
450: PetscViewer viewer = PETSC_VIEWER_STDOUT_SELF;
455: TaoGetSolutionStatus(tao,&its,&fc,&gnorm,0,0,0);
456: ierr=PetscViewerASCIIPrintf(viewer,"iter = %3D,",its);
457: ierr=PetscViewerASCIIPrintf(viewer," Function value %G,",fc);
458: if (gnorm > 1.e-6) {
459: ierr=PetscViewerASCIIPrintf(viewer," Residual: %G \n",gnorm);
460: } else if (gnorm > 1.e-11) {
461: ierr=PetscViewerASCIIPrintf(viewer," Residual: < 1.0e-6 \n");
462: } else {
463: ierr=PetscViewerASCIIPrintf(viewer," Residual: < 1.0e-11 \n");
464: }
465:
466: return(0);
467: }
468:
471: PetscErrorCode TaskWorker(AppCtx *user)
472: {
473: PetscReal x[NPARAMETERS],f;
474: PetscMPIInt tag=IDLE_TAG;
475: PetscInt index;
476: MPI_Status status;
478:
479:
481: /* Send check-in message to master */
483: MPI_Send(&f,1,MPIU_REAL,0,IDLE_TAG,PETSC_COMM_WORLD);
484: while (tag != DIE_TAG) {
485: MPI_Recv(x,NPARAMETERS,MPIU_REAL,0,MPI_ANY_TAG,PETSC_COMM_WORLD,&status);
486: tag = status.MPI_TAG;
487: if (tag == IDLE_TAG) {
488: MPI_Send(&f,1,MPIU_REAL,0,IDLE_TAG,PETSC_COMM_WORLD);
489: } else if (tag != DIE_TAG) {
490: index = (PetscInt)tag;
491: ierr=RunSimulation(x,index,&f,user);
492: ierr=MPI_Send(&f,1,MPIU_REAL,0,tag,PETSC_COMM_WORLD);
493: }
494: }
495: return(0);
496: }
500: PetscErrorCode RunSimulation(PetscReal *x, PetscInt i, PetscReal*f, AppCtx *user)
501: {
502: PetscReal *t = user->t;
503: PetscReal *y = user->y;
504: *f = y[i] - PetscExpScalar(-x[0]*t[i])/(x[1] + x[2]*t[i]);
505: return(0);
506: }
510: PetscErrorCode StopWorkers(AppCtx *user)
511: {
512: PetscInt checkedin;
513: MPI_Status status;
514: PetscReal f,x[NPARAMETERS];
518: checkedin=0;
519: while(checkedin < user->size-1) {
520: MPI_Recv(&f,1,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&status);
521: checkedin++;
522: MPI_Send(x,NPARAMETERS,MPIU_REAL,status.MPI_SOURCE,DIE_TAG,PETSC_COMM_WORLD);
523: }
524: return(0);
525: }