/*!\file: controladm1qn3_core.cpp
 * \brief: core of the control solution 
 */ 
#include <ctime>
#include <config.h>
#include "./cores.h"
#include "../toolkits/toolkits.h"
#include "../classes/classes.h"
#include "../shared/shared.h"
#include "../modules/modules.h"
#include "../solutionsequences/solutionsequences.h"

#ifdef _HAVE_CODIPACK_
extern CoDi_global codi_global;
#include <sstream> // for output of the CoDiPack tape
#endif

#if defined (_HAVE_M1QN3_) && defined(_HAVE_AD_)
/*m1qn3 prototypes {{{*/
extern "C" void *ctonbe_; // DIS mode : Conversion
extern "C" void *ctcabe_; // DIS mode : Conversion
extern "C" void *euclid_; // Scalar product
typedef void (*SimulFunc) (long* indic,long* n, double* x,double* pf,double* g,long [],float [],void* dzs);
extern "C" void m1qn3_ (void f(long* indic,long* n, double* x,double* pf,double* g,long [],float [],void* dzs),
			void **, void **, void **,
			long *, double [],double *, double[], double*, double *,
			double *, char [], long *, long *, long *, long *, long *, long *, long [],double [], long *,
			long *, long *, long [], float [],void* );

/*Use struct to provide arguments*/
typedef struct{
	FemModel   * femmodel;
	IssmPDouble* Jlist;
	int          M;
	int          N;
	int*         i;
} m1qn3_struct;
/*}}}*/

/*m1qm3 functions*/
void simul_starttrace(FemModel* femmodel){/*{{{*/

	#if defined(_HAVE_ADOLC_)
	/*Retrive ADOLC parameters*/
	IssmDouble gcTriggerRatio;
	IssmDouble gcTriggerMaxSize;
	IssmDouble obufsize;
	IssmDouble lbufsize;
	IssmDouble cbufsize;
	IssmDouble tbufsize;
	femmodel->parameters->FindParam(&gcTriggerRatio,AutodiffGcTriggerRatioEnum);
	femmodel->parameters->FindParam(&gcTriggerMaxSize,AutodiffGcTriggerMaxSizeEnum);
	femmodel->parameters->FindParam(&obufsize,AutodiffObufsizeEnum);
	femmodel->parameters->FindParam(&lbufsize,AutodiffLbufsizeEnum);
	femmodel->parameters->FindParam(&cbufsize,AutodiffCbufsizeEnum);
	femmodel->parameters->FindParam(&tbufsize,AutodiffTbufsizeEnum);

	/*Set garbage collection parameters: */
	setStoreManagerControl(reCast<IssmPDouble>(gcTriggerRatio),reCast<size_t>(gcTriggerMaxSize));

	/*Start trace: */
	int skipFileDeletion=1;
	int keepTaylors=1;
	int my_rank=IssmComm::GetRank();
	trace_on(my_rank,keepTaylors,reCast<size_t>(obufsize),reCast<size_t>(lbufsize),reCast<size_t>(cbufsize),reCast<size_t>(tbufsize),skipFileDeletion);

	#elif defined(_HAVE_CODIPACK_)

		//fprintf(stderr, "*** Codipack IoModel::StartTrace\n");
		/*
		 * FIXME codi
		 * - ADOL-C variant uses fine grained tracing with various arguments
		 * - ADOL-C variant sets a garbage collection parameter for its tape
		 * -> These parameters are not read for the CoDiPack ISSM version!
		 */
		auto& tape_codi = IssmDouble::getGlobalTape();
		tape_codi.setActive();
		#if _AD_TAPE_ALLOC_
		//alloc_profiler.Tag(StartInit, true);
		IssmDouble x_t(1.0), y_t(1.0);
		tape_codi.registerInput(y_t);
		int codi_allocn = 0;
		femmodel->parameters->FindParam(&codi_allocn,AutodiffTapeAllocEnum);
		for(int i = 0;i < codi_allocn;++i) {
			x_t = y_t * y_t;
		}
		/*
		std::stringstream out_s;
		IssmDouble::getGlobalTape().printStatistics(out_s);
		_printf0_("StartTrace::Tape Statistics	   : TapeAlloc count=[" << codi_allocn << "]\n" << out_s.str());
		*/
		tape_codi.reset();
		//alloc_profiler.Tag(FinishInit, true);
		#else
		tape_codi.reset();
		#endif

	#else
	_error_("not implemented");
	#endif
}/*}}}*/
void simul_stoptrace(){/*{{{*/

	#if defined(_HAVE_ADOLC_)
	trace_off();
	if(VerboseAutodiff()){ /*{{{*/

		#ifdef _HAVE_ADOLC_
		int my_rank=IssmComm::GetRank();
		size_t  tape_stats[15];
		tapestats(my_rank,tape_stats); //reading of tape statistics
		int commSize=IssmComm::GetSize();
		int *sstats=new int[7];
		sstats[0]=tape_stats[NUM_OPERATIONS];
		sstats[1]=tape_stats[OP_FILE_ACCESS];
		sstats[2]=tape_stats[NUM_LOCATIONS];
		sstats[3]=tape_stats[LOC_FILE_ACCESS];
		sstats[4]=tape_stats[NUM_VALUES];
		sstats[5]=tape_stats[VAL_FILE_ACCESS];
		sstats[6]=tape_stats[TAY_STACK_SIZE];
		int *rstats=NULL;
		if (my_rank==0) rstats=new int[commSize*7];
		ISSM_MPI_Gather(sstats,7,ISSM_MPI_INT,rstats,7,ISSM_MPI_INT,0,IssmComm::GetComm());
		if (my_rank==0) {
			int offset=50;
			int rOffset=(commSize/10)+1;
			_printf_("   ADOLC statistics: \n");
			_printf_("     "<<setw(offset)<<left<<"#independents: " <<setw(12)<<right<<tape_stats[NUM_INDEPENDENTS] << "\n");
			_printf_("     "<<setw(offset)<<left<<"#dependents: " <<setw(12)<<right<<tape_stats[NUM_DEPENDENTS] << "\n");
			_printf_("     "<<setw(offset)<<left<<"max #live active variables: " <<setw(12)<<right<<tape_stats[NUM_MAX_LIVES] << "\n");
			_printf_("     operations: entry size "<< sizeof(unsigned char) << " Bytes \n");
			_printf_("     "<<setw(offset)<<left<<"  #entries in buffer (AutodiffObufsizeEnum) " <<setw(12)<<right<<tape_stats[OP_BUFFER_SIZE] << "\n");
			for (int r=0;r<commSize;++r)
			 _printf_("       ["<<setw(rOffset)<<right<<r<<"]"<<setw(offset-rOffset-4)<<left<<" #entries total" <<setw(12)<<right<<rstats[r*7+0] << (rstats[r*7+1]?" ->file":"") << "\n");
			_printf_("     locations: entry size " << sizeof(locint) << " Bytes\n");
			_printf_("     "<<setw(offset)<<left<<"  #entries in buffer (AutodiffLbufsizeEnum) " <<setw(12)<<right<<tape_stats[LOC_BUFFER_SIZE] << "\n");
			for (int r=0;r<commSize;++r)
			 _printf_("       ["<<setw(rOffset)<<right<<r<<"]"<<setw(offset-rOffset-4)<<left<<" #entries total" <<setw(12)<<right<<rstats[r*7+2] << (rstats[r*7+3]?" ->file":"") << "\n");
			_printf_("     constant values: entry size " << sizeof(double) << " Bytes\n");
			_printf_("     "<<setw(offset)<<left<<"  #entries in buffer (AutodiffCbufsizeEnum) " <<setw(12)<<right<<tape_stats[VAL_BUFFER_SIZE] << "\n");
			for (int r=0;r<commSize;++r)
			 _printf_("       ["<<setw(rOffset)<<right<<r<<"]"<<setw(offset-rOffset-4)<<left<<" #entries total" <<setw(12)<<right<<rstats[r*7+4] << (rstats[r*7+5]?" ->file":"") << "\n");
			_printf_("     Taylor stack: entry size " << sizeof(revreal) << " Bytes\n");
			_printf_("     "<<setw(offset)<<left<<"  #entries in buffer (AutodiffTbufsizeEnum) " <<setw(12)<<right<<tape_stats[TAY_BUFFER_SIZE] << "\n");
			for (int r=0;r<commSize;++r)
			 _printf_("       ["<<setw(rOffset)<<right<<r<<"]"<<setw(offset-rOffset-4)<<left<<" #entries total" <<setw(12)<<right<<rstats[r*7+6] << (rstats[r*7+6]>tape_stats[TAY_BUFFER_SIZE]?" ->file":"") << "\n");
			delete []rstats;
		}
		delete [] sstats;
		#endif

		#ifdef _HAVE_CODIPACK_
		#ifdef _AD_TAPE_ALLOC_
		//_printf_("Allocation time  P(" << my_rank << "): " << alloc_profiler.DeltaTime(StartInit, FinishInit) << "\n");
		#endif
		std::stringstream out_s;
		IssmDouble::getGlobalTape().printStatistics(out_s);
		_printf0_("CoDiPack Profiling::Tape Statistics :\n" << out_s.str());
		#endif
	} /*}}}*/

	#elif defined(_HAVE_CODIPACK_)
	auto& tape_codi = IssmDouble::getGlobalTape();
	tape_codi.setPassive();
	if(VerboseAutodiff()){
		int my_rank=IssmComm::GetRank();
		if(my_rank == 0) {
			// FIXME codi "just because" for now
			tape_codi.printStatistics(std::cout);
			codi_global.print(std::cout);
		}
	}
	#else
	_error_("not implemented");
	#endif
}/*}}}*/
void simul_ad(long* indic,long* n,double* X,double* pf,double* G,long izs[1],float rzs[1],void* dzs){/*{{{*/

	/*Get rank*/
	int my_rank=IssmComm::GetRank();

	/*Recover Arguments*/
	m1qn3_struct *input_struct = (m1qn3_struct*)dzs;

	FemModel* femmodel = input_struct->femmodel;
	int num_responses,num_controls,solution_type;
	femmodel->parameters->FindParam(&solution_type,SolutionTypeEnum);

	/*In transient, we need to make sure we do not modify femmodel at each iteration, make a copy*/
	if(solution_type == TransientSolutionEnum) femmodel = input_struct->femmodel->copy();

	IssmPDouble*  Jlist        = input_struct->Jlist;
	int           JlistM       = input_struct->M;
	int           JlistN       = input_struct->N;
	int*          Jlisti       = input_struct->i;
	int           intn         = (int)*n;

	/*Recover some parameters*/
	IssmDouble *scaling_factors = NULL;
	int        *N               = NULL;
	int        *control_enum    = NULL;
	femmodel->parameters->FindParam(&num_responses,InversionNumCostFunctionsEnum);
	femmodel->parameters->FindParam(&num_controls,InversionNumControlParametersEnum);
	femmodel->parameters->FindParam(&scaling_factors,NULL,InversionControlScalingFactorsEnum);
	femmodel->parameters->FindParam(&N,NULL,ControlInputSizeNEnum);
	femmodel->parameters->FindParam(&control_enum,NULL,InversionControlParametersEnum);
	int numberofvertices=femmodel->vertices->NumberOfVertices();

	/*Constrain input vector and update controls*/
	double  *XL = NULL;
	double  *XU = NULL;
	GetPassiveVectorFromControlInputsx(&XL,femmodel->elements,femmodel->nodes,femmodel->vertices,femmodel->loads,femmodel->materials,femmodel->parameters,"lowerbound");
	GetPassiveVectorFromControlInputsx(&XU,femmodel->elements,femmodel->nodes,femmodel->vertices,femmodel->loads,femmodel->materials,femmodel->parameters,"upperbound");

	int N_add = 0;
	for (int c=0;c<num_controls;c++){
		for(int i=0;i<numberofvertices*N[c];i++){
			int index = N_add*numberofvertices+i;
			X[index] = X[index]*reCast<double>(scaling_factors[c]);
			if(X[index]>XU[index]) X[index]=XU[index];
			if(X[index]<XL[index]) X[index]=XL[index];
		}
		N_add+=N[c];
	}

	/*Start Tracing*/
	simul_starttrace(femmodel);
	/*Set X as our new control input and as INDEPENDENT*/
#ifdef _HAVE_AD_
	IssmDouble* aX=xNew<IssmDouble>(intn,"t");
#else
	IssmDouble* aX=xNew<IssmDouble>(intn);
#endif

	#if defined(_HAVE_ADOLC_)
	if(my_rank==0){
		for(int i=0;i<intn;i++){
			aX[i]<<=X[i];
		}
	}
	#elif defined(_HAVE_CODIPACK_)
	auto& tape_codi = IssmDouble::getGlobalTape();
	if(my_rank==0){
		for (int i=0;i<intn;i++) {
			aX[i]=X[i];
			tape_codi.registerInput(aX[i]);
			codi_global.input_indices.push_back(aX[i].getGradientData());
		}
	}
	#else
	_error_("not suppoted");
	#endif

	ISSM_MPI_Bcast(aX,intn,ISSM_MPI_DOUBLE,0,IssmComm::GetComm());
	SetControlInputsFromVectorx(femmodel,aX);
	xDelete<IssmDouble>(aX);

	/*Compute solution (forward)*/
	void (*solutioncore)(FemModel*)=NULL;
	CorePointerFromSolutionEnum(&solutioncore,femmodel->parameters,solution_type);
	solutioncore(femmodel);

	/*Reset the time to zero for next optimization*/
	if(solution_type==TransientSolutionEnum){
		IssmDouble restart_time;
		femmodel->parameters->FindParam(&restart_time,TimesteppingStartTimeEnum);
		femmodel->parameters->SetParam(restart_time,TimeEnum);

	}

	/*Get Dependents*/
	IssmDouble  output_value;
	int         num_dependents;
	IssmPDouble *dependents;
	DataSet*    dependent_objects=NULL;
	IssmDouble	J=0.;
	femmodel->parameters->FindParam(&num_dependents,AutodiffNumDependentsEnum);
	femmodel->parameters->FindParam(&dependent_objects,AutodiffDependentObjectsEnum);

	/*Go through our dependent variables, and compute the response:*/
	dependents=xNew<IssmPDouble>(num_dependents);
	for(int i=0;i<dependent_objects->Size();i++){
		DependentObject* dep=(DependentObject*)dependent_objects->GetObjectByOffset(i);
		if(solution_type==TransientSolutionEnum) output_value = dep->GetValue();
		if(solution_type!=TransientSolutionEnum) dep->Responsex(&output_value,femmodel);
		if(my_rank==0) {

			#if defined(_HAVE_CODIPACK_)
			tape_codi.registerOutput(output_value);
			dependents[i] = output_value.getValue();
			codi_global.output_indices.push_back(output_value.getGradientData());

			#elif defined(_HAVE_ADOLC_)
			output_value>>=dependents[i];

			#else
			_error_("not suppoted");
			#endif
			J+=output_value;
		}
	}

	/*Turning off trace tape*/
	simul_stoptrace();
	//time_t now = time(NULL);
	//if(my_rank==0) _printf_("\nTIME: "<<now<<"\n");

	/*intermediary: */
	int          num_independents=intn;
	IssmPDouble *aWeightVector=NULL;
	IssmPDouble *weightVectorTimesJac=NULL;
	IssmPDouble *totalgradient=xNewZeroInit<IssmPDouble>(num_independents);

	/*if no dependents, no point in running a driver: */
	if(!(num_dependents*num_independents)) _error_("this is not allowed");

	/*for adolc to run in parallel, we 0 out on rank~=0. But we still keep track of num_dependents:*/
	int num_dependents_old   = num_dependents;
	int num_independents_old = num_independents;
	if(my_rank!=0){
		num_dependents   = 0;
		num_independents = 0;
	}

	#if defined(_HAVE_ADOLC_)
	/*Get gradient for ADOLC {{{*/

	/*get the EDF pointer:*/
	ext_diff_fct *anEDF_for_solverx_p=xDynamicCast<GenericParam<Adolc_edf> * >(femmodel->parameters->FindParamObject(AdolcParamEnum))->GetParameterValue().myEDF_for_solverx_p;

	/* these are always needed regardless of the interpreter */
	anEDF_for_solverx_p->dp_x=xNew<double>(anEDF_for_solverx_p->max_n);
	anEDF_for_solverx_p->dp_y=xNew<double>(anEDF_for_solverx_p->max_m);

	/* Ok, now we are going to call the fos_reverse in a loop on the index, from 0 to num_dependents, so
	 * as to generate num_dependents gradients: */
	for(int aDepIndex=0;aDepIndex<num_dependents_old;aDepIndex++){

		/*initialize direction index in the weights vector: */
		aWeightVector=xNewZeroInit<IssmPDouble>(num_dependents);
		if (my_rank==0) aWeightVector[aDepIndex]=1.;

		/*initialize output gradient: */
		weightVectorTimesJac=xNew<IssmPDouble>(num_independents);

		/*set the forward method function pointer: */
		#ifdef _HAVE_GSL_
		anEDF_for_solverx_p->fos_reverse=EDF_fos_reverse_for_solverx;
		#endif
		#ifdef _HAVE_MUMPS_
		anEDF_for_solverx_p->fos_reverse_iArr=fos_reverse_mumpsSolveEDF;
		#endif

		anEDF_for_solverx_p->dp_U=xNew<IssmPDouble>(anEDF_for_solverx_p->max_m);
		anEDF_for_solverx_p->dp_Z=xNew<IssmPDouble>(anEDF_for_solverx_p->max_n);

		/*call driver: */
		fos_reverse(my_rank,num_dependents,num_independents, aWeightVector, weightVectorTimesJac );

		/*Add to totalgradient: */
		if(my_rank==0) for(int i=0;i<num_independents;i++) {
			totalgradient[i]+=weightVectorTimesJac[i];
		}

		/*free resources :*/
		xDelete(weightVectorTimesJac);
		xDelete(aWeightVector);
	}
	/*}}}*/
	#elif defined(_HAVE_CODIPACK_)
	/*Get gradient for CoDiPack{{{*/
	if(VerboseAutodiff())_printf0_("   CoDiPack fos_reverse\n");

	/* call the fos_reverse in a loop on the index, from 0 to num_dependents, so
	 * as to generate num_dependents gradients: */
	for(int aDepIndex=0;aDepIndex<num_dependents_old;aDepIndex++){

		/*initialize direction index in the weights vector: */
		aWeightVector=xNewZeroInit<IssmPDouble>(num_dependents);
		if(my_rank==0)  tape_codi.setGradient(codi_global.output_indices[aDepIndex],1.0);
		tape_codi.evaluate();

		/*Get gradient for this dependent */
		weightVectorTimesJac=xNew<IssmPDouble>(num_independents);
		auto in_size = codi_global.input_indices.size();
		for(size_t i = 0; i < in_size; ++i) {
			weightVectorTimesJac[i] = tape_codi.getGradient(codi_global.input_indices[i]);
		}
		if(my_rank==0) for(int i=0;i<num_independents;i++) {
			totalgradient[i]+=weightVectorTimesJac[i];
		}
		/*free resources :*/
		xDelete(weightVectorTimesJac);
		xDelete(aWeightVector);
	}
	/*}}}*/
	#else
	_error_("not suppoted");
	#endif

	/*Broadcast gradient to other ranks*/
	ISSM_MPI_Bcast(totalgradient,num_independents_old,ISSM_MPI_PDOUBLE,0,IssmComm::GetComm());
	/*Check size of Jlist to avoid crashes*/
	_assert_((*Jlisti)<JlistM);
	_assert_(JlistN==num_responses+1);

	/*Compute objective function and broadcast it to other cpus*/
	*pf = reCast<double>(J);
	ISSM_MPI_Bcast(pf,1,ISSM_MPI_PDOUBLE,0,IssmComm::GetComm());
	_printf0_("f(x) = "<<setw(12)<<setprecision(7)<<*pf<<"  |  ");

	/*Record cost function values and delete Jtemp*/
	for(int i=0;i<num_responses;i++) Jlist[(*Jlisti)*JlistN+i] = reCast<IssmPDouble>(dependents[i]);
	Jlist[(*Jlisti)*JlistN+num_responses] = reCast<IssmPDouble>(J);

	if(*indic==0){
		/*dry run, no gradient required*/

		/*Retrieve objective functions independently*/
		_printf0_("            N/A |\n");
		for(int i=0;i<num_responses;i++) _printf0_(" "<<setw(12)<<setprecision(7)<<Jlist[(*Jlisti)*JlistN+i]);
		_printf0_("\n");

		*Jlisti = (*Jlisti) +1;
		xDelete<double>(XU);
		xDelete<double>(XL);
		return;
	}

	/*Compute gradient*/
	for(long i=0;i<num_independents_old;i++) G[i] = totalgradient[i];

	/*Constrain Gradient*/
	IssmDouble  Gnorm = 0.;
	N_add = 0;
	for (int c=0;c<num_controls;c++){
		for(int i=0;i<numberofvertices*N[c];i++){
			int index = N_add*numberofvertices+i;
			if(X[index]>=XU[index]) G[index]=0.;
			if(X[index]<=XL[index]) G[index]=0.;
			G[index] = G[index]*reCast<double>(scaling_factors[c]);
			X[index] = X[index]/reCast<double>(scaling_factors[c]);
			Gnorm += G[index]*G[index];
		}
		N_add+=N[c];
	}
	Gnorm = sqrt(Gnorm);

	/*Print info*/
	_printf0_("       "<<setw(12)<<setprecision(7)<<Gnorm<<" |");
	for(int i=0;i<num_responses;i++) _printf0_(" "<<setw(12)<<setprecision(7)<<Jlist[(*Jlisti)*JlistN+i]);
	_printf0_("\n");

	/*Clean-up and return*/
	*Jlisti = (*Jlisti) +1;
	xDelete<double>(XU);
	xDelete<double>(XL);
	xDelete<IssmDouble>(scaling_factors);
	xDelete<IssmPDouble>(totalgradient);

}/*}}}*/
void controladm1qn3_core(FemModel* femmodel){/*{{{*/

	/*Intermediaries*/
	long         omode;
	double		 f;
	double		 dxmin,gttol; 
	int          maxsteps,maxiter;
	int          intn,numberofvertices,num_controls,num_cost_functions,solution_type;
	IssmDouble	*scaling_factors = NULL;
	double      *X  = NULL;
	double      *G  = NULL;
	int*			N	= NULL;
	int offset = 0;
	int N_add;
	int* control_enum;

	/*Recover some parameters*/
	femmodel->parameters->FindParam(&solution_type,SolutionTypeEnum);
	femmodel->parameters->FindParam(&num_controls,InversionNumControlParametersEnum);
	femmodel->parameters->FindParam(&num_cost_functions,InversionNumCostFunctionsEnum);
	femmodel->parameters->FindParam(&maxsteps,InversionMaxstepsEnum);
	femmodel->parameters->FindParam(&maxiter,InversionMaxiterEnum);
	femmodel->parameters->FindParamAndMakePassive(&dxmin,InversionDxminEnum);
	femmodel->parameters->FindParamAndMakePassive(&gttol,InversionGttolEnum);
	femmodel->parameters->FindParam(&scaling_factors,NULL,InversionControlScalingFactorsEnum);
	femmodel->parameters->FindParam(&control_enum,NULL,InversionControlParametersEnum);
	femmodel->parameters->SetParam(false,SaveResultsEnum);
	femmodel->parameters->FindParam(&N,NULL,ControlInputSizeNEnum);
	numberofvertices=femmodel->vertices->NumberOfVertices();

	/*Initialize M1QN3 parameters*/
	if(VerboseControl())_printf0_("   Initialize M1QN3 parameters\n");
	SimulFunc simul_ptr    = &simul_ad; /*Cost function address*/
	void**    prosca       = &euclid_;  /*Dot product function (euclid is the default)*/
	char      normtype[]   = "dfn";     /*Norm type: dfn = scalar product defined by prosca*/
	long      izs[5];                   /*Arrays used by m1qn3 subroutines*/
	long      iz[5];                    /*Integer m1qn3 working array of size 5*/
	float     rzs[1];                   /*Arrays used by m1qn3 subroutines*/
	long      impres       = 0;         /*verbosity level*/
	long      imode[3]     = {0};       /*scaling and starting mode, 0 by default*/
	long      indic        = 4;         /*compute f and g*/
	long      reverse      = 0;         /*reverse or direct mode*/
	long      io           = 6;         /*Channel number for the output*/

	/*Optimization criterions*/
	long niter = long(maxsteps); /*Maximum number of iterations*/
	long nsim  = long(maxiter);/*Maximum number of function calls*/

	/*Get initial guess*/
	Vector<double> *Xpetsc = NULL;

	GetPassiveVectorFromControlInputsx(&Xpetsc,femmodel->elements,femmodel->nodes,femmodel->vertices,femmodel->loads,femmodel->materials,femmodel->parameters,"value");
	X = Xpetsc->ToMPISerial();
	Xpetsc->GetSize(&intn);
	delete Xpetsc;
	//_assert_(intn==numberofvertices*num_controls);

	/*Get problem dimension and initialize gradient and initial guess*/
	long n = long(intn);
	G = xNew<double>(n);

	/*Scale control for M1QN3*/
	N_add = 0;
	for (int c=0;c<num_controls;c++){
		for(int i=0;i<numberofvertices*N[c];i++){
			int index = N_add*numberofvertices+i;
			X[index] = X[index]/reCast<IssmPDouble>(scaling_factors[c]);
		}
		N_add+=N[c];
	}

	/*Allocate m1qn3 working arrays (see documentation)*/
	long      m   = 100;
	long      ndz = 4*n+m*(2*n+1);
	double*   dz  = xNew<double>(ndz);
	if(VerboseControl())_printf0_("   Computing initial solution\n");
	_printf0_("\n");
	_printf0_("Cost function f(x)   | Gradient norm |g(x)| |  List of contributions\n");
	_printf0_("____________________________________________________________________\n");

	/*Prepare structure for m1qn3*/
	m1qn3_struct mystruct;
	mystruct.femmodel = femmodel;
	mystruct.M        = maxiter;
	mystruct.N        = num_cost_functions+1;
	mystruct.Jlist    = xNewZeroInit<IssmPDouble>(mystruct.M*mystruct.N);
	mystruct.i        = xNewZeroInit<int>(1);
	/*Initialize Gradient and cost function of M1QN3*/
	indic = 4; /*gradient required*/
	simul_ad(&indic,&n,X,&f,G,izs,rzs,(void*)&mystruct);
	/*Estimation of the expected decrease in f during the first iteration*/
	double df1=f;

	/*Call M1QN3 solver*/
	m1qn3_(simul_ptr,prosca,&ctonbe_,&ctcabe_,
				&n,X,&f,G,&dxmin,&df1,
				&gttol,normtype,&impres,&io,imode,&omode,&niter,&nsim,iz,dz,&ndz,
				&reverse,&indic,izs,rzs,(void*)&mystruct);
	switch(int(omode)){
		case 0:  _printf0_("   Stop requested (indic = 0)\n"); break;
		case 1:  _printf0_("   Convergence reached (gradient satisfies stopping criterion)\n"); break;
		case 2:  _printf0_("   Bad initialization\n"); break;
		case 3:  _printf0_("   Line search failure\n"); break;
		case 4:  _printf0_("   Maximum number of iterations exceeded\n");break;
		case 5:  _printf0_("   Maximum number of function calls exceeded\n"); break;
		case 6:  _printf0_("   stopped on dxmin during line search\n"); break;
		case 7:  _printf0_("   <g,d> > 0  or  <y,s> <0\n"); break;
		default: _printf0_("   Unknown end condition\n");
	}

	/*Constrain solution vector*/
	double  *XL = NULL;
	double  *XU = NULL;
	GetPassiveVectorFromControlInputsx(&XL,femmodel->elements,femmodel->nodes,femmodel->vertices,femmodel->loads,femmodel->materials,femmodel->parameters,"lowerbound");
	GetPassiveVectorFromControlInputsx(&XU,femmodel->elements,femmodel->nodes,femmodel->vertices,femmodel->loads,femmodel->materials,femmodel->parameters,"upperbound");

	N_add = 0;
	for (int c=0;c<num_controls;c++){
		for(int i=0;i<numberofvertices*N[c];i++){
			int index = N_add*numberofvertices+i;
			X[index] = X[index]*reCast<double>(scaling_factors[c]);
			if(X[index]>XU[index]) X[index]=XU[index];
			if(X[index]<XL[index]) X[index]=XL[index];
		}
		N_add +=N[c];
	}

	/*Set X as our new control*/
	IssmDouble* aX=xNew<IssmDouble>(intn);
	IssmDouble* aG=xNew<IssmDouble>(intn);

	for(int i=0;i<intn;i++) {
		aX[i] = reCast<IssmDouble>(X[i]); 
		aG[i] = reCast<IssmDouble>(G[i]);
		}

	ControlInputSetGradientx(femmodel->elements,femmodel->nodes,femmodel->vertices,femmodel->loads,femmodel->materials,femmodel->parameters,aG);
	SetControlInputsFromVectorx(femmodel,aX);

	xDelete(aX);

	if (solution_type == TransientSolutionEnum){
		int step = 1;
		femmodel->parameters->SetParam(step,StepEnum);
		femmodel->results->AddObject(new GenericExternalResult<IssmPDouble*>(femmodel->results->Size()+1,JEnum,mystruct.Jlist,(*mystruct.i),mystruct.N,1,0));

		int offset = 0;
		for(int i=0;i<num_controls;i++){

			/*Disect results*/
			GenericExternalResult<IssmPDouble*>* G_output = new GenericExternalResult<IssmPDouble*>(femmodel->results->Size()+1,Gradient1Enum+i,&G[offset],N[i],numberofvertices,1,0.);
			GenericExternalResult<IssmPDouble*>* X_output = new GenericExternalResult<IssmPDouble*>(femmodel->results->Size()+1,control_enum[i],&X[offset],N[i],numberofvertices,1,0.);

			/*transpose for consistency with MATLAB's formating*/
			G_output->Transpose();
			X_output->Transpose();

			/*Add to results*/
			femmodel->results->AddObject(G_output);
			femmodel->results->AddObject(X_output);

			offset += N[i]*numberofvertices;
		}
	}
	else{
		femmodel->results->AddObject(new GenericExternalResult<IssmPDouble*>(femmodel->results->Size()+1,JEnum,mystruct.Jlist,(*mystruct.i),mystruct.N,0,0));

	femmodel->OutputControlsx(&femmodel->results);
	}

	xDelete(aG);

	/*Add last cost function to results*/

	/*Finalize*/
	if(VerboseControl()) _printf0_("   preparing final solution\n");
	femmodel->parameters->SetParam(true,SaveResultsEnum);
	void (*solutioncore)(FemModel*)=NULL;
	CorePointerFromSolutionEnum(&solutioncore,femmodel->parameters,solution_type);
	solutioncore(femmodel);

	/*Clean-up and return*/
	xDelete<double>(G);
	xDelete<double>(X);
	xDelete<double>(dz);
	xDelete<double>(XU);
	xDelete<double>(XL);
	xDelete<IssmDouble>(scaling_factors);
	xDelete<IssmPDouble>(mystruct.Jlist);
	xDelete<int>(mystruct.i);
}/*}}}*/

#else
void controladm1qn3_core(FemModel* femmodel){_error_("M1QN3 or ADOLC/CoDiPack not installed");}
#endif //_HAVE_M1QN3_
