7 #include "../toolkits/toolkits.h"
8 #include "../classes/classes.h"
9 #include "../shared/shared.h"
10 #include "../modules/modules.h"
11 #include "../solutionsequences/solutionsequences.h"
13 #ifdef _HAVE_CODIPACK_
14 extern CoDi_global codi_global;
18 #if defined (_HAVE_M1QN3_) && defined(_HAVE_AD_)
20 extern "C" void *ctonbe_;
21 extern "C" void *ctcabe_;
22 extern "C" void *euclid_;
23 typedef void (*SimulFunc) (
long* indic,
long* n,
double* x,
double* pf,
double* g,
long [],
float [],
void* dzs);
24 extern "C" void m1qn3_ (
void f(
long* indic,
long* n,
double* x,
double* pf,
double* g,
long [],
float [],
void* dzs),
25 void **,
void **,
void **,
26 long *,
double [],
double *,
double[],
double*,
double *,
27 double *,
char [],
long *,
long *,
long *,
long *,
long *,
long *,
long [],
double [],
long *,
28 long *,
long *,
long [],
float [],
void* );
43 #if defined(_HAVE_ADOLC_)
59 setStoreManagerControl(reCast<IssmPDouble>(gcTriggerRatio),reCast<size_t>(gcTriggerMaxSize));
62 int skipFileDeletion=1;
65 trace_on(my_rank,keepTaylors,reCast<size_t>(obufsize),reCast<size_t>(lbufsize),reCast<size_t>(cbufsize),reCast<size_t>(tbufsize),skipFileDeletion);
67 #elif defined(_HAVE_CODIPACK_)
76 auto& tape_codi = IssmDouble::getGlobalTape();
77 tape_codi.setActive();
81 tape_codi.registerInput(y_t);
84 for(
int i = 0;i < codi_allocn;++i) {
102 void simul_stoptrace(){
104 #if defined(_HAVE_ADOLC_)
110 size_t tape_stats[15];
111 tapestats(my_rank,tape_stats);
113 int *sstats=
new int[7];
114 sstats[0]=tape_stats[NUM_OPERATIONS];
115 sstats[1]=tape_stats[OP_FILE_ACCESS];
116 sstats[2]=tape_stats[NUM_LOCATIONS];
117 sstats[3]=tape_stats[LOC_FILE_ACCESS];
118 sstats[4]=tape_stats[NUM_VALUES];
119 sstats[5]=tape_stats[VAL_FILE_ACCESS];
120 sstats[6]=tape_stats[TAY_STACK_SIZE];
122 if (my_rank==0) rstats=
new int[commSize*7];
126 int rOffset=(commSize/10)+1;
128 _printf_(
" "<<setw(offset)<<left<<
"#independents: " <<setw(12)<<right<<tape_stats[NUM_INDEPENDENTS] <<
"\n");
129 _printf_(
" "<<setw(offset)<<left<<
"#dependents: " <<setw(12)<<right<<tape_stats[NUM_DEPENDENTS] <<
"\n");
130 _printf_(
" "<<setw(offset)<<left<<
"max #live active variables: " <<setw(12)<<right<<tape_stats[NUM_MAX_LIVES] <<
"\n");
131 _printf_(
" operations: entry size "<<
sizeof(
unsigned char) <<
" Bytes \n");
132 _printf_(
" "<<setw(offset)<<left<<
" #entries in buffer (AutodiffObufsizeEnum) " <<setw(12)<<right<<tape_stats[OP_BUFFER_SIZE] <<
"\n");
133 for (
int r=0;r<commSize;++r)
134 _printf_(
" ["<<setw(rOffset)<<right<<r<<
"]"<<setw(offset-rOffset-4)<<left<<
" #entries total" <<setw(12)<<right<<rstats[r*7+0] << (rstats[r*7+1]?
" ->file":
"") <<
"\n");
135 _printf_(
" locations: entry size " <<
sizeof(locint) <<
" Bytes\n");
136 _printf_(
" "<<setw(offset)<<left<<
" #entries in buffer (AutodiffLbufsizeEnum) " <<setw(12)<<right<<tape_stats[LOC_BUFFER_SIZE] <<
"\n");
137 for (
int r=0;r<commSize;++r)
138 _printf_(
" ["<<setw(rOffset)<<right<<r<<
"]"<<setw(offset-rOffset-4)<<left<<
" #entries total" <<setw(12)<<right<<rstats[r*7+2] << (rstats[r*7+3]?
" ->file":
"") <<
"\n");
139 _printf_(
" constant values: entry size " <<
sizeof(
double) <<
" Bytes\n");
140 _printf_(
" "<<setw(offset)<<left<<
" #entries in buffer (AutodiffCbufsizeEnum) " <<setw(12)<<right<<tape_stats[VAL_BUFFER_SIZE] <<
"\n");
141 for (
int r=0;r<commSize;++r)
142 _printf_(
" ["<<setw(rOffset)<<right<<r<<
"]"<<setw(offset-rOffset-4)<<left<<
" #entries total" <<setw(12)<<right<<rstats[r*7+4] << (rstats[r*7+5]?
" ->file":
"") <<
"\n");
143 _printf_(
" Taylor stack: entry size " <<
sizeof(revreal) <<
" Bytes\n");
144 _printf_(
" "<<setw(offset)<<left<<
" #entries in buffer (AutodiffTbufsizeEnum) " <<setw(12)<<right<<tape_stats[TAY_BUFFER_SIZE] <<
"\n");
145 for (
int r=0;r<commSize;++r)
146 _printf_(
" ["<<setw(rOffset)<<right<<r<<
"]"<<setw(offset-rOffset-4)<<left<<
" #entries total" <<setw(12)<<right<<rstats[r*7+6] << (rstats[r*7+6]>tape_stats[TAY_BUFFER_SIZE]?
" ->file":
"") <<
"\n");
152 #ifdef _HAVE_CODIPACK_
153 #ifdef _AD_TAPE_ALLOC_
156 std::stringstream out_s;
157 IssmDouble::getGlobalTape().printStatistics(out_s);
158 _printf0_(
"CoDiPack Profiling::Tape Statistics :\n" << out_s.str());
162 #elif defined(_HAVE_CODIPACK_)
163 auto& tape_codi = IssmDouble::getGlobalTape();
164 tape_codi.setPassive();
169 tape_codi.printStatistics(std::cout);
170 codi_global.print(std::cout);
177 void simul_ad(
long* indic,
long* n,
double* X,
double* pf,
double* G,
long izs[1],
float rzs[1],
void* dzs){
183 m1qn3_struct *input_struct = (m1qn3_struct*)dzs;
186 int num_responses,num_controls,solution_type;
193 int JlistM = input_struct->M;
194 int JlistN = input_struct->N;
195 int* Jlisti = input_struct->i;
201 int *control_enum = NULL;
216 for (
int c=0;c<num_controls;c++){
217 for(
int i=0;i<numberofvertices*N[c];i++){
218 int index = N_add*numberofvertices+i;
219 X[index] = X[index]*reCast<double>(scaling_factors[c]);
220 if(X[index]>XU[index]) X[index]=XU[index];
221 if(X[index]<XL[index]) X[index]=XL[index];
235 #if defined(_HAVE_ADOLC_)
237 for(
int i=0;i<intn;i++){
241 #elif defined(_HAVE_CODIPACK_)
242 auto& tape_codi = IssmDouble::getGlobalTape();
243 codi_global.input_indices.clear();
245 for (
int i=0;i<intn;i++) {
247 tape_codi.registerInput(aX[i]);
248 codi_global.input_indices.push_back(aX[i].getGradientData());
257 xDelete<IssmDouble>(aX);
260 void (*solutioncore)(
FemModel*)=NULL;
275 DataSet *dependent_objects = NULL;
281 dependents=xNew<IssmPDouble>(num_dependents);
282 #if defined(_HAVE_CODIPACK_)
283 codi_global.output_indices.clear();
285 for(
int i=0;i<dependent_objects->
Size();i++){
291 #if defined(_HAVE_CODIPACK_)
292 tape_codi.registerOutput(output_value);
293 dependents[i] = output_value.getValue();
294 codi_global.output_indices.push_back(output_value.getGradientData());
296 #elif defined(_HAVE_ADOLC_)
297 output_value>>=dependents[i];
312 int num_independents=intn;
315 IssmPDouble *totalgradient=xNewZeroInit<IssmPDouble>(num_independents);
318 if(!(num_dependents*num_independents))
_error_(
"this is not allowed");
321 int num_dependents_old = num_dependents;
322 int num_independents_old = num_independents;
324 #if defined(_HAVE_ADOLC_)
328 num_independents = 0;
335 anEDF_for_solverx_p->dp_x=xNew<double>(anEDF_for_solverx_p->max_n);
336 anEDF_for_solverx_p->dp_y=xNew<double>(anEDF_for_solverx_p->max_m);
340 for(
int aDepIndex=0;aDepIndex<num_dependents_old;aDepIndex++){
343 aWeightVector=xNewZeroInit<IssmPDouble>(num_dependents);
344 if (my_rank==0) aWeightVector[aDepIndex]=1.;
347 weightVectorTimesJac=xNew<IssmPDouble>(num_independents);
351 anEDF_for_solverx_p->fos_reverse=EDF_fos_reverse_for_solverx;
354 anEDF_for_solverx_p->fos_reverse_iArr=fos_reverse_mumpsSolveEDF;
357 anEDF_for_solverx_p->dp_U=xNew<IssmPDouble>(anEDF_for_solverx_p->max_m);
358 anEDF_for_solverx_p->dp_Z=xNew<IssmPDouble>(anEDF_for_solverx_p->max_n);
361 fos_reverse(my_rank,num_dependents,num_independents, aWeightVector, weightVectorTimesJac );
364 if(my_rank==0)
for(
int i=0;i<num_independents;i++) {
365 totalgradient[i]+=weightVectorTimesJac[i];
373 #elif defined(_HAVE_CODIPACK_)
379 for(
int dep_index=0;dep_index<num_dependents_old;dep_index++){
383 if(dep_index<0 || dep_index>=num_dependents || codi_global.output_indices.size() <= dep_index){
384 _error_(
"index value for dependent index should be in [0,num_dependents-1]");
386 tape_codi.setGradient(codi_global.output_indices[dep_index],1.0);
388 tape_codi.evaluate();
391 weightVectorTimesJac=xNew<IssmPDouble>(num_independents);
392 auto in_size = codi_global.input_indices.size();
393 for(
size_t i = 0; i < in_size; ++i){
395 weightVectorTimesJac[i] = tape_codi.getGradient(codi_global.input_indices[i]);
397 if(my_rank==0)
for(
int i=0;i<num_independents;i++){
398 totalgradient[i]+=weightVectorTimesJac[i];
414 *pf = reCast<double>(J);
416 _printf0_(
"f(x) = "<<setw(12)<<setprecision(7)<<*pf<<
" | ");
419 for(
int i=0;i<num_responses;i++) Jlist[(*Jlisti)*JlistN+i] = reCast<IssmPDouble>(dependents[i]);
420 Jlist[(*Jlisti)*JlistN+num_responses] = reCast<IssmPDouble>(J);
427 for(
int i=0;i<num_responses;i++)
_printf0_(
" "<<setw(12)<<setprecision(7)<<Jlist[(*Jlisti)*JlistN+i]);
430 *Jlisti = (*Jlisti) +1;
437 for(
long i=0;i<num_independents_old;i++) G[i] = totalgradient[i];
442 for (
int c=0;c<num_controls;c++){
443 for(
int i=0;i<numberofvertices*N[c];i++){
444 int index = N_add*numberofvertices+i;
445 if(X[index]>=XU[index]) G[index]=0.;
446 if(X[index]<=XL[index]) G[index]=0.;
447 G[index] = G[index]*reCast<double>(scaling_factors[c]);
448 X[index] = X[index]/reCast<double>(scaling_factors[c]);
449 Gnorm += G[index]*G[index];
456 _printf0_(
" "<<setw(12)<<setprecision(7)<<Gnorm<<
" |");
457 for(
int i=0;i<num_responses;i++)
_printf0_(
" "<<setw(12)<<setprecision(7)<<Jlist[(*Jlisti)*JlistN+i]);
461 *Jlisti = (*Jlisti) +1;
464 xDelete<int>(control_enum);
466 xDelete<IssmDouble>(scaling_factors);
467 xDelete<IssmPDouble>(totalgradient);
475 int maxsteps,maxiter;
476 int intn,numberofvertices,num_controls,num_cost_functions,solution_type;
501 SimulFunc simul_ptr = &simul_ad;
502 void** prosca = &euclid_;
503 char normtype[] =
"dfn";
514 long niter = long(maxsteps);
515 long nsim = long(maxiter);
527 for (
int c=0;c<num_controls;c++){
528 for(
int i=0;i<numberofvertices*N[c];i++){
529 int index = N_add*numberofvertices+i;
530 X[index] = X[index]/reCast<IssmPDouble>(scaling_factors[c]);
537 long ndz = 4*n+m*(2*n+1);
538 double* dz = xNew<double>(ndz);
541 _printf0_(
"Cost function f(x) | Gradient norm |g(x)| | List of contributions\n");
542 _printf0_(
"____________________________________________________________________\n");
545 m1qn3_struct mystruct;
547 mystruct.M = maxiter;
548 mystruct.N = num_cost_functions+1;
549 mystruct.Jlist = xNewZeroInit<IssmPDouble>(mystruct.M*mystruct.N);
550 mystruct.i = xNewZeroInit<int>(1);
553 simul_ad(&indic,&n,X,&f,G,izs,rzs,(
void*)&mystruct);
558 m1qn3_(simul_ptr,prosca,&ctonbe_,&ctcabe_,
559 &n,X,&f,G,&dxmin,&df1,
560 >tol,normtype,&impres,&io,imode,&omode,&niter,&nsim,iz,dz,&ndz,
561 &reverse,&indic,izs,rzs,(
void*)&mystruct);
563 case 0:
_printf0_(
" Stop requested (indic = 0)\n");
break;
564 case 1:
_printf0_(
" Convergence reached (gradient satisfies stopping criterion)\n");
break;
565 case 2:
_printf0_(
" Bad initialization\n");
break;
566 case 3:
_printf0_(
" Line search failure\n");
break;
567 case 4:
_printf0_(
" Maximum number of iterations exceeded\n");
break;
568 case 5:
_printf0_(
" Maximum number of function calls exceeded\n");
break;
569 case 6:
_printf0_(
" stopped on dxmin during line search\n");
break;
570 case 7:
_printf0_(
" <g,d> > 0 or <y,s> <0\n");
break;
571 default:
_printf0_(
" Unknown end condition\n");
581 for (
int c=0;c<num_controls;c++){
582 for(
int i=0;i<numberofvertices*N[c];i++){
583 int index = N_add*numberofvertices+i;
584 X[index] = X[index]*reCast<double>(scaling_factors[c]);
585 if(X[index]>XU[index]) X[index]=XU[index];
586 if(X[index]<XL[index]) X[index]=XL[index];
595 for(
int i=0;i<intn;i++) {
596 aX[i] = reCast<IssmDouble>(X[i]);
597 aG[i] = reCast<IssmDouble>(G[i]);
611 for(
int i=0;i<num_controls;i++){
625 offset += N[i]*numberofvertices;
641 void (*solutioncore)(
FemModel*)=NULL;
651 xDelete<IssmDouble>(scaling_factors);
652 xDelete<IssmPDouble>(mystruct.Jlist);
653 xDelete<int>(mystruct.i);
658 #endif //_HAVE_M1QN3_