Changeset 19191


Ignore:
Timestamp:
03/09/15 12:45:28 (10 years ago)
Author:
Eric.Larour
Message:

CHG: introducing checkpointing capability. Right now, just empty shell, getting thematlab and python apis
to accept restart_frequency settings.

Location:
issm/trunk-jpl/src
Files:
2 added
12 edited

Legend:

Unmodified
Added
Removed
  • TabularUnified issm/trunk-jpl/src/c/classes/FemModel.cpp

    r19138 r19191  
    5454        char *outbinfilename = NULL;
    5555        char *petscfilename  = NULL;
     56        char *restartfilename  = NULL;
    5657        char *rootpath       = NULL;
    5758
     
    7172
    7273        /*From command line arguments, retrieve different filenames needed to create the FemModel: */
    73         ProcessArguments(&solution_type,&binfilename,&outbinfilename,&petscfilename,&lockfilename,&rootpath,argc,argv);
     74        ProcessArguments(&solution_type,&binfilename,&outbinfilename,&petscfilename,&lockfilename,&restartfilename,&rootpath,argc,argv);
    7475
    7576        /*Create femmodel from input files: */
    7677        profiler->Tag(StartInit);
    77         this->InitFromFiles(rootpath,binfilename,outbinfilename,petscfilename,lockfilename,solution_type,trace,NULL);
     78        this->InitFromFiles(rootpath,binfilename,outbinfilename,petscfilename,lockfilename,restartfilename, solution_type,trace,NULL);
    7879        profiler->Tag(FinishInit);
    7980
     
    8788}
    8889/*}}}*/
    89 FemModel::FemModel(char* rootpath, char* inputfilename, char* outputfilename, char* toolkitsfilename, char* lockfilename, ISSM_MPI_Comm incomm, int solution_type,IssmPDouble* X){ /*{{{*/
     90FemModel::FemModel(char* rootpath, char* inputfilename, char* outputfilename, char* toolkitsfilename, char* lockfilename, char* restartfilename, ISSM_MPI_Comm incomm, int solution_type,IssmPDouble* X){ /*{{{*/
    9091
    9192        bool traceon=true;
     
    9899
    99100        /*Create femmodel from input files, with trace activated: */
    100         this->InitFromFiles(rootpath,inputfilename,outputfilename,toolkitsfilename,lockfilename,solution_type,traceon,X);
     101        this->InitFromFiles(rootpath,inputfilename,outputfilename,toolkitsfilename,lockfilename,restartfilename, solution_type,traceon,X);
    101102
    102103}
     
    142143}
    143144/*}}}*/
    144 void FemModel::InitFromFiles(char* rootpath, char* inputfilename, char* outputfilename, char* toolkitsfilename, char* lockfilename, const int in_solution_type,bool trace,IssmPDouble* X){/*{{{*/
     145void FemModel::InitFromFiles(char* rootpath, char* inputfilename, char* outputfilename, char* toolkitsfilename, char* lockfilename, char* restartfilename, const int in_solution_type,bool trace,IssmPDouble* X){/*{{{*/
    145146
    146147        /*intermediary*/
     
    208209        this->parameters->AddObject(new StringParam(OutputFileNameEnum,outputfilename));
    209210        this->parameters->AddObject(new StringParam(LockFileNameEnum,lockfilename));
     211        this->parameters->AddObject(new StringParam(RestartFileNameEnum,restartfilename));
    210212
    211213        /*Clean up*/
     
    603605        else              xDelete<int>(analyses);
    604606}/*}}}*/
     607void FemModel::CheckPoint(void){/*{{{*/
     608
     609        FILE* restartfid=NULL;
     610        char* restartfilename = NULL;
     611
     612        /*First, recover the name of the restart file: */
     613        parameters->FindParam(&restartfilename,RestartFileNameEnum);
     614       
     615        /*Open file for writing: */
     616        restartfid=pfopen(restartfilename,"wb");
     617
     618        /*Done, close file :*/
     619        pfclose(restartfid,restartfilename);
     620
     621        /*Free ressources: */
     622        xDelete<char>(restartfilename);
     623
     624}
     625/*}}}*/
     626void FemModel::Restart(void){/*{{{*/
     627}
     628/*}}}*/
    605629
    606630/*Modules:*/
  • TabularUnified issm/trunk-jpl/src/c/classes/FemModel.h

    r19062 r19191  
    4747                /*constructors, destructors: */
    4848                FemModel(int argc,char** argv,ISSM_MPI_Comm comm_init,bool trace=false);
    49                 FemModel(char* rootpath, char* inputfilename, char* outputfilename, char* toolkitsfilename, char* lockfilename, ISSM_MPI_Comm incomm, int solution_type,IssmPDouble* X);
     49                FemModel(char* rootpath, char* inputfilename, char* outputfilename, char* toolkitsfilename, char* lockfilename, char* restartfilename, ISSM_MPI_Comm incomm, int solution_type,IssmPDouble* X);
    5050                ~FemModel();
    5151
     
    5353                void Echo();
    5454                FemModel* copy();
    55                 void InitFromFiles(char* rootpath, char* inputfilename, char* outputfilename, char* petscfilename, char* lockfilename, const int solution_type,bool trace,IssmPDouble* X=NULL);
     55                void InitFromFiles(char* rootpath, char* inputfilename, char* outputfilename, char* petscfilename, char* lockfilename, char* restartfilename, const int solution_type,bool trace,IssmPDouble* X=NULL);
    5656                void SolutionAnalysesList(int** panalyses,int* pnumanalyses,IoModel* iomodel,int solutiontype);
    5757                void CleanUp(void);
     
    6060                void SetCurrentConfiguration(int configuration_type);
    6161                void SetCurrentConfiguration(int configuration_type,int analysis_type);
     62                void CheckPoint(void);
     63                void Restart(void);
    6264
    6365                /*Modules*/
  • TabularUnified issm/trunk-jpl/src/c/cores/ProcessArguments.cpp

    r14917 r19191  
    88#include "../shared/shared.h"
    99
    10 void ProcessArguments(int* solution_type,char** pbinfilename,char** poutbinfilename,char** ptoolkitsfilename,char** plockfilename,char** prootpath, int argc,char **argv){
     10void ProcessArguments(int* solution_type,char** pbinfilename,char** poutbinfilename,char** ptoolkitsfilename,char** plockfilename,char** prestartfilename, char** prootpath, int argc,char **argv){
    1111
    1212        char *modelname      = NULL;
     
    1515        char *toolkitsfilename  = NULL;
    1616        char *lockfilename   = NULL;
     17        char *restartfilename    = NULL;
    1718        char *rootpath       = NULL;
    18         char *rootpatharg    = NULL;
     19        char *rootpatharg    = NULL;
     20        int   my_rank, rank_length; 
    1921
    2022        /*Check input arguments*/
     
    2224        if(argc<3)_error_("Usage error: missing execution directory");
    2325        if(argc<4)_error_("Usage error: missing model name");
     26
     27        /*Recover myrank: */
     28        my_rank=IssmComm::GetRank();
     29        rank_length= (my_rank == 0 ? 1 : (int)(log10(my_rank)+1)); /*length of string "my_rank" */
    2430
    2531        /*Get requested solution*/
     
    4046                toolkitsfilename  = xNew<char>(strlen(rootpath)+strlen(modelname)+strlen(".toolkits") +1); sprintf(toolkitsfilename, "%s%s%s",rootpath,modelname,".toolkits");
    4147                lockfilename   = xNew<char>(strlen(rootpath)+strlen(modelname)+strlen(".lock")  +1); sprintf(lockfilename,  "%s%s%s",rootpath,modelname,".lock");
     48                restartfilename   = xNew<char>(strlen(rootpath)+strlen(modelname)+strlen(".rst.")  +rank_length +1); sprintf(restartfilename,  "%s%s%s%i",rootpath,modelname,".rst.",my_rank);
    4249        }
    4350        else{
     
    4653                toolkitsfilename  = xNew<char>(strlen(modelname)+strlen(".toolkits") +1); sprintf(toolkitsfilename, "%s%s",modelname,".toolkits");
    4754                lockfilename   = xNew<char>(strlen(modelname)+strlen(".lock")  +1); sprintf(lockfilename,  "%s%s",modelname,".lock");
     55                restartfilename   = xNew<char>(strlen(modelname)+strlen(".rst.")  +rank_length +1); sprintf(restartfilename,  "%s%s%i",modelname,".rst.",my_rank);
    4856        }
    4957
     
    5361        *ptoolkitsfilename=toolkitsfilename;
    5462        *plockfilename=lockfilename;
     63        *prestartfilename=restartfilename;
    5564        *prootpath=rootpath;
    5665
  • TabularUnified issm/trunk-jpl/src/c/cores/cores.h

    r19087 r19191  
    5353
    5454//diverse
    55 void ProcessArguments(int* solution,char** pbinname,char** poutbinname,char** ptoolkitsname,char** plockname,char** prootpath,int argc,char **argv);
     55void ProcessArguments(int* solution,char** pbinname,char** poutbinname,char** ptoolkitsname,char** plockname,char** prestartname, char** prootpath,int argc,char **argv);
    5656void WriteLockFile(char* filename);
    5757void ResetBoundaryConditions(FemModel* femmodel, int analysis_type);
  • TabularUnified issm/trunk-jpl/src/c/cores/transient_core.cpp

    r19138 r19191  
    2525        bool       time_adapt;
    2626        int        output_frequency;
     27        int        restart_frequency;
    2728        int        domaintype,groundingline_migration,smb_model;
    2829        int        numoutputs;
     
    4243        femmodel->parameters->FindParam(&dakota_analysis,QmuIsdakotaEnum);
    4344        femmodel->parameters->FindParam(&output_frequency,SettingsOutputFrequencyEnum);
     45        femmodel->parameters->FindParam(&restart_frequency,SettingsRestartFrequencyEnum);
    4446        femmodel->parameters->FindParam(&time_adapt,TimesteppingTimeAdaptEnum);
    4547        femmodel->parameters->FindParam(&isstressbalance,TransientIsstressbalanceEnum);
     
    179181                        OutputResultsx(femmodel);
    180182                }
     183                if(step%restart_frequency==0 )femmodel->CheckPoint();
    181184        }
    182185
  • TabularUnified issm/trunk-jpl/src/c/modules/ModelProcessorx/CreateParameters.cpp

    r19172 r19191  
    4848        parameters->AddObject(iomodel->CopyConstantObject(DomainDimensionEnum));
    4949        parameters->AddObject(iomodel->CopyConstantObject(SettingsOutputFrequencyEnum));
     50        parameters->AddObject(iomodel->CopyConstantObject(SettingsRestartFrequencyEnum));
    5051        parameters->AddObject(iomodel->CopyConstantObject(ConstantsYtsEnum));
    5152        parameters->AddObject(iomodel->CopyConstantObject(TimesteppingStartTimeEnum));
  • TabularUnified issm/trunk-jpl/src/c/shared/Enum/EnumDefinitions.h

    r19172 r19191  
    290290        SettingsLowmemEnum,
    291291        SettingsOutputFrequencyEnum,
     292        SettingsRestartFrequencyEnum,
    292293        SettingsWaitonlockEnum,
    293294        DebugProfilingEnum,
     
    869870        InputFileNameEnum,
    870871        LockFileNameEnum,
     872        RestartFileNameEnum,
    871873        ToolkitsOptionsAnalysesEnum,
    872874        ToolkitsOptionsStringsEnum,
  • TabularUnified issm/trunk-jpl/src/c/shared/Enum/EnumToStringx.cpp

    r19172 r19191  
    298298                case SettingsLowmemEnum : return "SettingsLowmem";
    299299                case SettingsOutputFrequencyEnum : return "SettingsOutputFrequency";
     300                case SettingsRestartFrequencyEnum : return "SettingsRestartFrequency";
    300301                case SettingsWaitonlockEnum : return "SettingsWaitonlock";
    301302                case DebugProfilingEnum : return "DebugProfiling";
     
    842843                case InputFileNameEnum : return "InputFileName";
    843844                case LockFileNameEnum : return "LockFileName";
     845                case RestartFileNameEnum : return "RestartFileName";
    844846                case ToolkitsOptionsAnalysesEnum : return "ToolkitsOptionsAnalyses";
    845847                case ToolkitsOptionsStringsEnum : return "ToolkitsOptionsStrings";
  • TabularUnified issm/trunk-jpl/src/c/shared/Enum/StringToEnumx.cpp

    r19172 r19191  
    304304              else if (strcmp(name,"SettingsLowmem")==0) return SettingsLowmemEnum;
    305305              else if (strcmp(name,"SettingsOutputFrequency")==0) return SettingsOutputFrequencyEnum;
     306              else if (strcmp(name,"SettingsRestartFrequency")==0) return SettingsRestartFrequencyEnum;
    306307              else if (strcmp(name,"SettingsWaitonlock")==0) return SettingsWaitonlockEnum;
    307308              else if (strcmp(name,"DebugProfiling")==0) return DebugProfilingEnum;
     
    382383              else if (strcmp(name,"SurfaceforcingsBNeg")==0) return SurfaceforcingsBNegEnum;
    383384              else if (strcmp(name,"SMBhenning")==0) return SMBhenningEnum;
    384               else if (strcmp(name,"SMBcomponents")==0) return SMBcomponentsEnum;
    385385         else stage=4;
    386386   }
    387387   if(stage==4){
    388               if (strcmp(name,"SurfaceforcingsAccumulation")==0) return SurfaceforcingsAccumulationEnum;
     388              if (strcmp(name,"SMBcomponents")==0) return SMBcomponentsEnum;
     389              else if (strcmp(name,"SurfaceforcingsAccumulation")==0) return SurfaceforcingsAccumulationEnum;
    389390              else if (strcmp(name,"SurfaceforcingsEvaporation")==0) return SurfaceforcingsEvaporationEnum;
    390391              else if (strcmp(name,"SurfaceforcingsRunoff")==0) return SurfaceforcingsRunoffEnum;
     
    505506              else if (strcmp(name,"Pengrid")==0) return PengridEnum;
    506507              else if (strcmp(name,"Penpair")==0) return PenpairEnum;
    507               else if (strcmp(name,"Profiler")==0) return ProfilerEnum;
    508508         else stage=5;
    509509   }
    510510   if(stage==5){
    511               if (strcmp(name,"MatrixParam")==0) return MatrixParamEnum;
     511              if (strcmp(name,"Profiler")==0) return ProfilerEnum;
     512              else if (strcmp(name,"MatrixParam")==0) return MatrixParamEnum;
    512513              else if (strcmp(name,"Masscon")==0) return MassconEnum;
    513514              else if (strcmp(name,"MassconName")==0) return MassconNameEnum;
     
    628629              else if (strcmp(name,"DeviatoricStress")==0) return DeviatoricStressEnum;
    629630              else if (strcmp(name,"DeviatoricStressxx")==0) return DeviatoricStressxxEnum;
    630               else if (strcmp(name,"DeviatoricStressxy")==0) return DeviatoricStressxyEnum;
    631631         else stage=6;
    632632   }
    633633   if(stage==6){
    634               if (strcmp(name,"DeviatoricStressxz")==0) return DeviatoricStressxzEnum;
     634              if (strcmp(name,"DeviatoricStressxy")==0) return DeviatoricStressxyEnum;
     635              else if (strcmp(name,"DeviatoricStressxz")==0) return DeviatoricStressxzEnum;
    635636              else if (strcmp(name,"DeviatoricStressyy")==0) return DeviatoricStressyyEnum;
    636637              else if (strcmp(name,"DeviatoricStressyz")==0) return DeviatoricStressyzEnum;
     
    751752              else if (strcmp(name,"Outputdefinition69")==0) return Outputdefinition69Enum;
    752753              else if (strcmp(name,"Outputdefinition70")==0) return Outputdefinition70Enum;
    753               else if (strcmp(name,"Outputdefinition71")==0) return Outputdefinition71Enum;
    754754         else stage=7;
    755755   }
    756756   if(stage==7){
    757               if (strcmp(name,"Outputdefinition72")==0) return Outputdefinition72Enum;
     757              if (strcmp(name,"Outputdefinition71")==0) return Outputdefinition71Enum;
     758              else if (strcmp(name,"Outputdefinition72")==0) return Outputdefinition72Enum;
    758759              else if (strcmp(name,"Outputdefinition73")==0) return Outputdefinition73Enum;
    759760              else if (strcmp(name,"Outputdefinition74")==0) return Outputdefinition74Enum;
     
    860861              else if (strcmp(name,"InputFileName")==0) return InputFileNameEnum;
    861862              else if (strcmp(name,"LockFileName")==0) return LockFileNameEnum;
     863              else if (strcmp(name,"RestartFileName")==0) return RestartFileNameEnum;
    862864              else if (strcmp(name,"ToolkitsOptionsAnalyses")==0) return ToolkitsOptionsAnalysesEnum;
    863865              else if (strcmp(name,"ToolkitsOptionsStrings")==0) return ToolkitsOptionsStringsEnum;
     
    873875              else if (strcmp(name,"BilinearInterp")==0) return BilinearInterpEnum;
    874876              else if (strcmp(name,"NearestInterp")==0) return NearestInterpEnum;
    875               else if (strcmp(name,"XY")==0) return XYEnum;
    876               else if (strcmp(name,"XYZ")==0) return XYZEnum;
    877877         else stage=8;
    878878   }
    879879   if(stage==8){
    880               if (strcmp(name,"Dense")==0) return DenseEnum;
     880              if (strcmp(name,"XY")==0) return XYEnum;
     881              else if (strcmp(name,"XYZ")==0) return XYZEnum;
     882              else if (strcmp(name,"Dense")==0) return DenseEnum;
    881883              else if (strcmp(name,"MpiDense")==0) return MpiDenseEnum;
    882884              else if (strcmp(name,"MpiSparse")==0) return MpiSparseEnum;
  • TabularUnified issm/trunk-jpl/src/m/classes/settings.m

    r19040 r19191  
    1010                lowmem              = 0;
    1111                output_frequency    = 0;
     12                restart_frequency   = 0;
    1213                waitonlock          = 0;
    1314                upload_server       = '';
     
    5859                        self.output_frequency=1;
    5960
     61                        %checkpoints frequency, by default never:
     62                        self.restart_frequency=0;
     63
    6064                        %this option can be activated to load automatically the results
    6165                        %onto the model after a parallel run by waiting for the lock file
     
    7478                        md = checkfield(md,'fieldname','settings.lowmem','numel',[1],'values',[0 1]);
    7579                        md = checkfield(md,'fieldname','settings.output_frequency','numel',[1],'>=',1);
     80                        md = checkfield(md,'fieldname','settings.restart_frequency','numel',[1],'>=',0);
    7681                        md = checkfield(md,'fieldname','settings.waitonlock','numel',[1]);
    7782
     
    8489                        fielddisplay(self,'lowmem','is the memory limited ? (0 or 1)');
    8590                        fielddisplay(self,'output_frequency','frequency at which results are saved in all solutions with multiple time_steps');
     91                        fielddisplay(self,'restart_frequency','frequency at which the runs are being checkpointed, allowing for a restart');
    8692                        fielddisplay(self,'waitonlock','maximum number of minutes to wait for batch results (NaN to deactivate)');
    8793                        fielddisplay(self,'upload_server','server hostname where model should be uploaded');
     
    97103                        WriteData(fid,'object',self,'fieldname','lowmem','format','Boolean');
    98104                        WriteData(fid,'object',self,'fieldname','output_frequency','format','Integer');
     105                        WriteData(fid,'object',self,'fieldname','restart_frequency','format','Integer');
    99106                        if self.waitonlock>0,
    100107                                WriteData(fid,'enum',SettingsWaitonlockEnum(),'data',true,'format','Boolean');
  • TabularUnified issm/trunk-jpl/src/m/classes/settings.py

    r17497 r19191  
    1717                self.lowmem              = 0
    1818                self.output_frequency    = 0
     19                self.restart_frequency    = 0
    1920                self.waitonlock          = 0
    2021
     
    3031                string="%s\n%s"%(string,fielddisplay(self,"lowmem","is the memory limited ? (0 or 1)"))
    3132                string="%s\n%s"%(string,fielddisplay(self,"output_frequency","frequency at which results are saved in all solutions with multiple time_steps"))
     33                string="%s\n%s"%(string,fielddisplay(self,"restart_frequency","frequency at which the runs are being checkpointed, allowing for a restart"))
    3234                string="%s\n%s"%(string,fielddisplay(self,"waitonlock","maximum number of minutes to wait for batch results, or return 0"))
    3335                return string
     
    4446                self.output_frequency=1
    4547
     48                #checkpoints frequency, by default never:
     49                self.restart_frequency=0
     50
     51
    4652                #this option can be activated to load automatically the results
    4753                #onto the model after a parallel run by waiting for the lock file
     
    5763                md = checkfield(md,'fieldname','settings.lowmem','numel',[1],'values',[0,1])
    5864                md = checkfield(md,'fieldname','settings.output_frequency','numel',[1],'>=',1)
     65                md = checkfield(md,'fieldname','settings.restart_frequency','numel',[1],'>=',0)
    5966                md = checkfield(md,'fieldname','settings.waitonlock','numel',[1])
    6067
     
    6673                WriteData(fid,'object',self,'fieldname','lowmem','format','Boolean')
    6774                WriteData(fid,'object',self,'fieldname','output_frequency','format','Integer')
     75                WriteData(fid,'object',self,'fieldname','restart_frequency','format','Integer')
    6876                if self.waitonlock>0:
    6977                        WriteData(fid,'enum',SettingsWaitonlockEnum(),'data',True,'format','Boolean');
  • TabularUnified issm/trunk-jpl/src/m/enum/EnumDefinitions.py

    r19172 r19191  
    290290def SettingsLowmemEnum(): return StringToEnum("SettingsLowmem")[0]
    291291def SettingsOutputFrequencyEnum(): return StringToEnum("SettingsOutputFrequency")[0]
     292def SettingsRestartFrequencyEnum(): return StringToEnum("SettingsRestartFrequency")[0]
    292293def SettingsWaitonlockEnum(): return StringToEnum("SettingsWaitonlock")[0]
    293294def DebugProfilingEnum(): return StringToEnum("DebugProfiling")[0]
     
    834835def InputFileNameEnum(): return StringToEnum("InputFileName")[0]
    835836def LockFileNameEnum(): return StringToEnum("LockFileName")[0]
     837def RestartFileNameEnum(): return StringToEnum("RestartFileName")[0]
    836838def ToolkitsOptionsAnalysesEnum(): return StringToEnum("ToolkitsOptionsAnalyses")[0]
    837839def ToolkitsOptionsStringsEnum(): return StringToEnum("ToolkitsOptionsStrings")[0]
Note: See TracChangeset for help on using the changeset viewer.