[8586] | 1 | %GENERIC cluster class definition
|
---|
[8578] | 2 | %
|
---|
| 3 | % Usage:
|
---|
[8586] | 4 | % cluster=generic('name','astrid','np',3);
|
---|
| 5 | % cluster=generic('name',oshostname(),'np',3,'login','username');
|
---|
[8578] | 6 |
|
---|
| 7 | classdef generic
|
---|
[13395] | 8 | properties (SetAccess=public)
|
---|
| 9 | % {{{
|
---|
| 10 | name='';
|
---|
| 11 | login='';
|
---|
| 12 | np=1;
|
---|
| 13 | port=0;
|
---|
| 14 | interactive=1;
|
---|
| 15 | codepath=[issmdir() '/bin'];
|
---|
[13975] | 16 | etcpath=[issmdir() '/etc'];
|
---|
[13395] | 17 | executionpath=[issmdir() '/execution'];
|
---|
| 18 | valgrind=[issmdir() '/externalpackages/valgrind/install/bin/valgrind'];
|
---|
| 19 | valgrindlib=[issmdir() '/externalpackages/valgrind/install/lib/libmpidebug.so'];
|
---|
| 20 | valgrindsup=[issmdir() '/externalpackages/valgrind/issm.supp'];
|
---|
[17806] | 21 | verbose=1;
|
---|
[18301] | 22 | shell='/bin/sh';
|
---|
[13395] | 23 | %}}}
|
---|
| 24 | end
|
---|
| 25 | methods
|
---|
| 26 | function cluster=generic(varargin) % {{{
|
---|
[8586] | 27 |
|
---|
[19105] | 28 | %Change the defaults if ispc
|
---|
| 29 | if ispc,
|
---|
| 30 | cluster.codepath = [issmdir() '\bin'];
|
---|
| 31 | cluster.etcpath = [issmdir() '\etc'];
|
---|
| 32 | cluster.executionpath = [issmdir() '\execution'];
|
---|
| 33 | end
|
---|
| 34 |
|
---|
[13395] | 35 | %use provided options to change fields
|
---|
| 36 | options=pairoptions(varargin{:});
|
---|
[8586] | 37 |
|
---|
[13395] | 38 | %get name
|
---|
| 39 | cluster.name=getfieldvalue(options,'name',oshostname());
|
---|
[8586] | 40 |
|
---|
[13395] | 41 | %initialize cluster using user settings if provided
|
---|
| 42 | if (exist([cluster.name '_settings'])==2), eval([cluster.name '_settings']); end
|
---|
[8578] | 43 |
|
---|
[13395] | 44 | %OK get other fields
|
---|
| 45 | cluster=AssignObjectFields(pairoptions(varargin{:}),cluster);
|
---|
| 46 | end
|
---|
| 47 | %}}}
|
---|
| 48 | function disp(cluster) % {{{
|
---|
| 49 | % display the object
|
---|
| 50 | disp(sprintf('class ''%s'' object ''%s'' = ',class(cluster),inputname(1)));
|
---|
| 51 | disp(sprintf(' name: %s',cluster.name));
|
---|
| 52 | disp(sprintf(' login: %s',cluster.login));
|
---|
| 53 | disp(sprintf(' np: %i',cluster.np));
|
---|
| 54 | disp(sprintf(' port: %i',cluster.port));
|
---|
| 55 | disp(sprintf(' codepath: %s',cluster.codepath));
|
---|
| 56 | disp(sprintf(' executionpath: %s',cluster.executionpath));
|
---|
[20500] | 57 | disp(sprintf(' etcpath: %s',cluster.etcpath));
|
---|
[13395] | 58 | disp(sprintf(' valgrind: %s',cluster.valgrind));
|
---|
| 59 | disp(sprintf(' valgrindlib: %s',cluster.valgrindlib));
|
---|
| 60 | disp(sprintf(' valgrindsup: %s',cluster.valgrindsup));
|
---|
[17806] | 61 | disp(sprintf(' verbose: %s',cluster.verbose));
|
---|
[18301] | 62 | disp(sprintf(' shell: %s',cluster.shell));
|
---|
[13395] | 63 | end
|
---|
| 64 | %}}}
|
---|
| 65 | function md = checkconsistency(cluster,md,solution,analyses) % {{{
|
---|
| 66 | if cluster.np<1
|
---|
| 67 | md = checkmessage(md,['number of processors should be at least 1']);
|
---|
| 68 | end
|
---|
| 69 | if isnan(cluster.np),
|
---|
| 70 | md = checkmessage(md,'number of processors should not be NaN!');
|
---|
| 71 | end
|
---|
| 72 | end
|
---|
| 73 | %}}}
|
---|
[20500] | 74 | function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota) % {{{
|
---|
[8578] | 75 |
|
---|
[13395] | 76 | %write queuing script
|
---|
[20500] | 77 | %what is the executable being called?
|
---|
| 78 | executable='issm.exe';
|
---|
| 79 | if isdakota,
|
---|
| 80 | version=IssmConfig('_DAKOTA_VERSION_'); version=str2num(version(1:3));
|
---|
| 81 | if (version>=6),
|
---|
| 82 | executable='issm_dakota.exe';
|
---|
| 83 | end
|
---|
| 84 | end
|
---|
| 85 |
|
---|
[13395] | 86 | if ~ispc(),
|
---|
[12706] | 87 |
|
---|
[13395] | 88 | fid=fopen([modelname '.queue'],'w');
|
---|
[18301] | 89 | fprintf(fid,'#!%s\n',cluster.shell);
|
---|
[13395] | 90 | if ~isvalgrind,
|
---|
| 91 | if cluster.interactive
|
---|
[16137] | 92 | if IssmConfig('_HAVE_MPI_'),
|
---|
[20500] | 93 | fprintf(fid,'mpiexec -np %i %s/%s %s %s %s \n',cluster.np,cluster.codepath,executable,EnumToString(solution),[cluster.executionpath '/' dirname],modelname);
|
---|
[13395] | 94 | else
|
---|
[20500] | 95 | fprintf(fid,'%s/%s %s %s %s ',cluster.codepath,executable,EnumToString(solution),[cluster.executionpath '/' dirname],modelname);
|
---|
[13395] | 96 | end
|
---|
| 97 | else
|
---|
[16137] | 98 | if IssmConfig('_HAVE_MPI_'),
|
---|
[20500] | 99 | fprintf(fid,'mpiexec -np %i %s/%s %s %s %s 2> %s.errlog >%s.outlog &',cluster.np,cluster.codepath,executable,EnumToString(solution),[cluster.executionpath '/' dirname],modelname,modelname,modelname);
|
---|
[13395] | 100 | else
|
---|
[20500] | 101 | fprintf(fid,'%s/%s %s %s %s 2> %s.errlog >%s.outlog &',cluster.codepath,executable,EnumToString(solution),[cluster.executionpath '/' dirname],modelname,modelname,modelname);
|
---|
[13395] | 102 | end
|
---|
| 103 | end
|
---|
| 104 | elseif isgprof,
|
---|
| 105 | fprintf(fid,'\n gprof %s/issm.exe gmon.out > %s.performance',cluster.codepath,modelname);
|
---|
| 106 | else
|
---|
| 107 | %Add --gen-suppressions=all to get suppression lines
|
---|
| 108 | fprintf(fid,'LD_PRELOAD=%s \\\n',cluster.valgrindlib);
|
---|
| 109 | if ismac,
|
---|
[16137] | 110 | if IssmConfig('_HAVE_MPI_'),
|
---|
[20500] | 111 | fprintf(fid,'mpiexec -np %i %s --leak-check=full --error-limit=no --dsymutil=yes --suppressions=%s %s/issm.exe %s %s %s 2> %s.errlog >%s.outlog ',...
|
---|
[13395] | 112 | cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,EnumToString(solution),[cluster.executionpath '/' dirname], modelname,modelname,modelname);
|
---|
| 113 | else
|
---|
[20500] | 114 | fprintf(fid,'%s --leak-check=full --dsymutil=yes --error-limit=no --suppressions=%s %s/issm.exe %s %s %s 2> %s.errlog >%s.outlog ',...
|
---|
[13395] | 115 | cluster.valgrind,cluster.valgrindsup,cluster.codepath,EnumToString(solution),[cluster.executionpath '/' dirname], modelname,modelname,modelname);
|
---|
| 116 | end
|
---|
| 117 | else
|
---|
[16137] | 118 | if IssmConfig('_HAVE_MPI_'),
|
---|
[20500] | 119 | fprintf(fid,'mpiexec -np %i %s --leak-check=full --error-limit=no --suppressions=%s %s/issm.exe %s %s %s 2> %s.errlog >%s.outlog ',...
|
---|
[13395] | 120 | cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,EnumToString(solution),[cluster.executionpath '/' dirname],modelname,modelname,modelname);
|
---|
| 121 | else
|
---|
[20500] | 122 | fprintf(fid,'%s --leak-check=full --error-limit=no --suppressions=%s %s/issm.exe %s %s %s 2> %s.errlog >%s.outlog ',...
|
---|
[13395] | 123 | cluster.valgrind,cluster.valgrindsup,cluster.codepath,EnumToString(solution),[cluster.executionpath '/' dirname],modelname,modelname,modelname);
|
---|
| 124 | end
|
---|
| 125 | end
|
---|
| 126 | end
|
---|
| 127 | if ~io_gather, %concatenate the output files:
|
---|
| 128 | fprintf(fid,'\ncat %s.outbin.* > %s.outbin',modelname,modelname);
|
---|
| 129 | end
|
---|
| 130 | fclose(fid);
|
---|
[12706] | 131 |
|
---|
[13395] | 132 | else % Windows
|
---|
[12706] | 133 |
|
---|
[13395] | 134 | fid=fopen([modelname '.bat'],'w');
|
---|
| 135 | fprintf(fid,'@echo off\n');
|
---|
[15396] | 136 |
|
---|
[20500] | 137 | if IssmConfig('_HAVE_PETSC_MPI_'),
|
---|
| 138 | warning('parallel runs not allowed yet in Windows. Defaulting to 1 cpus');
|
---|
| 139 | cluster.np=1;
|
---|
| 140 | end
|
---|
[17806] | 141 |
|
---|
[15396] | 142 | if cluster.np>1,
|
---|
| 143 | fprintf(fid,'"C:\\Program Files\\MPICH2\\bin\\mpiexec.exe" -n %i "%s/issm.exe" %s ./ %s ',cluster.np,cluster.codepath,EnumToString(solution),modelname);
|
---|
[13395] | 144 | else
|
---|
[15396] | 145 | fprintf(fid,'"%s/issm.exe" %s ./ %s ',cluster.codepath,EnumToString(solution),modelname);
|
---|
[13395] | 146 | end
|
---|
| 147 | fclose(fid);
|
---|
| 148 | end
|
---|
[8578] | 149 |
|
---|
[13395] | 150 | %in interactive mode, create a run file, and errlog and outlog file
|
---|
| 151 | if cluster.interactive,
|
---|
| 152 | fid=fopen([modelname '.errlog'],'w'); fclose(fid);
|
---|
| 153 | fid=fopen([modelname '.outlog'],'w'); fclose(fid);
|
---|
| 154 | end
|
---|
| 155 | end
|
---|
| 156 | %}}}
|
---|
[20500] | 157 | function BuildQueueScriptMultipleModels(cluster,dirname,modelname,solution,dirnames,modelnames,nps) % {{{
|
---|
| 158 |
|
---|
| 159 | %some checks:
|
---|
| 160 | if isempty(modelname), error('BuildQueueScriptMultipleModels error message: need a non empty model name!');end
|
---|
| 161 |
|
---|
| 162 | %what is the executable being called?
|
---|
| 163 | executable='issm_slr.exe';
|
---|
| 164 |
|
---|
| 165 | if ispc(), error('BuildQueueScriptMultipleModels not support yet on windows machines');end;
|
---|
| 166 |
|
---|
| 167 | %write queuing script
|
---|
| 168 | fid=fopen([modelname '.queue'],'w');
|
---|
| 169 |
|
---|
| 170 | fprintf(fid,'#!%s\n',cluster.shell);
|
---|
| 171 |
|
---|
| 172 | %number of cpus:
|
---|
| 173 | mpistring=sprintf('mpiexec -np %i ',cluster.np);
|
---|
| 174 |
|
---|
| 175 | %executable:
|
---|
| 176 | mpistring=[mpistring sprintf('%s/%s ',cluster.codepath,executable)];
|
---|
| 177 |
|
---|
| 178 | %solution name:
|
---|
| 179 | mpistring=[mpistring sprintf('%s ',EnumToString(solution))];
|
---|
| 180 |
|
---|
| 181 | %execution directory and model name:
|
---|
| 182 | mpistring=[mpistring sprintf('%s/%s %s',cluster.executionpath,dirname,modelname)];
|
---|
| 183 |
|
---|
| 184 | %inform main executable of how many icecaps, glaciers and earth models are being run:
|
---|
| 185 | mpistring=[mpistring sprintf(' %i ',length(dirnames))];
|
---|
| 186 |
|
---|
| 187 | %icecaps, glaciers and earth location, names and number of processors associated:
|
---|
| 188 | for i=1:length(dirnames),
|
---|
| 189 | mpistring=[mpistring sprintf(' %s/%s %s %i ',cluster.executionpath,dirnames{i},modelnames{i},nps{i})];
|
---|
| 190 | end
|
---|
| 191 |
|
---|
| 192 | %log files:
|
---|
| 193 | if ~cluster.interactive,
|
---|
| 194 | mpistring=[mpistring sprintf('2> %s.errlog> %s.outlog',modelname,modelname)];
|
---|
| 195 | end
|
---|
| 196 |
|
---|
| 197 | %write this long string to disk:
|
---|
| 198 | fprintf(fid,mpistring);
|
---|
| 199 | fclose(fid);
|
---|
| 200 |
|
---|
| 201 | %in interactive mode, create a run file, and errlog and outlog file
|
---|
| 202 | if cluster.interactive,
|
---|
| 203 | fid=fopen([modelname '.errlog'],'w'); fclose(fid);
|
---|
| 204 | fid=fopen([modelname '.outlog'],'w'); fclose(fid);
|
---|
| 205 | end
|
---|
| 206 | end
|
---|
| 207 | %}}}
|
---|
[13395] | 208 | function BuildKrigingQueueScript(cluster,modelname,solution,io_gather,isvalgrind,isgprof) % {{{
|
---|
[12706] | 209 |
|
---|
[13395] | 210 | %write queuing script
|
---|
| 211 | if ~ispc(),
|
---|
[12706] | 212 |
|
---|
[13395] | 213 | fid=fopen([modelname '.queue'],'w');
|
---|
| 214 | fprintf(fid,'#!/bin/sh\n');
|
---|
| 215 | if ~isvalgrind,
|
---|
| 216 | if cluster.interactive
|
---|
| 217 | fprintf(fid,'mpiexec -np %i %s/kriging.exe %s %s ',cluster.np,cluster.codepath,[cluster.executionpath '/' modelname],modelname);
|
---|
| 218 | else
|
---|
| 219 | fprintf(fid,'mpiexec -np %i %s/kriging.exe %s %s 2> %s.errlog >%s.outlog ',cluster.np,cluster.codepath,[cluster.executionpath '/' modelname],modelname,modelname,modelname);
|
---|
| 220 | end
|
---|
| 221 | elseif isgprof,
|
---|
| 222 | fprintf(fid,'\n gprof %s/kriging.exe gmon.out > %s.performance',cluster.codepath,modelname);
|
---|
| 223 | else
|
---|
| 224 | %Add --gen-suppressions=all to get suppression lines
|
---|
| 225 | fprintf(fid,'LD_PRELOAD=%s \\\n',cluster.valgrindlib);
|
---|
| 226 | fprintf(fid,'mpiexec -np %i %s --leak-check=full --suppressions=%s %s/kriging.exe %s %s 2> %s.errlog >%s.outlog ',...
|
---|
| 227 | cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,[cluster.executionpath '/' modelname],modelname,modelname,modelname);
|
---|
| 228 | end
|
---|
| 229 | if ~io_gather, %concatenate the output files:
|
---|
| 230 | fprintf(fid,'\ncat %s.outbin.* > %s.outbin',modelname,modelname);
|
---|
| 231 | end
|
---|
| 232 | fclose(fid);
|
---|
[11527] | 233 |
|
---|
[13395] | 234 | else % Windows
|
---|
[8578] | 235 |
|
---|
[13395] | 236 | fid=fopen([modelname '.bat'],'w');
|
---|
| 237 | fprintf(fid,'@echo off\n');
|
---|
| 238 | if cluster.interactive
|
---|
| 239 | fprintf(fid,'"%s/issm.exe" %s "%s" %s ',cluster.codepath,EnumToString(solution),[cluster.executionpath '/' modelname],modelname);
|
---|
| 240 | else
|
---|
| 241 | fprintf(fid,'"%s/issm.exe" %s "%s" %s 2> %s.errlog >%s.outlog',...
|
---|
| 242 | cluster.codepath,EnumToString(solution),[cluster.executionpath '/' modelname],modelname,modelname,modelname);
|
---|
| 243 | end
|
---|
| 244 | fclose(fid);
|
---|
| 245 | end
|
---|
[8578] | 246 |
|
---|
[13395] | 247 | %in interactive mode, create a run file, and errlog and outlog file
|
---|
| 248 | if cluster.interactive,
|
---|
| 249 | fid=fopen([modelname '.errlog'],'w'); fclose(fid);
|
---|
| 250 | fid=fopen([modelname '.outlog'],'w'); fclose(fid);
|
---|
| 251 | end
|
---|
| 252 | end
|
---|
| 253 | %}}}
|
---|
[20500] | 254 | function UploadQueueJob(cluster,modelname,dirname,filelist)% {{{
|
---|
[8578] | 255 |
|
---|
[15396] | 256 | if ~ispc,
|
---|
[18301] | 257 |
|
---|
[15396] | 258 | %compress the files into one zip.
|
---|
| 259 | compressstring=['tar -zcf ' dirname '.tar.gz '];
|
---|
| 260 | for i=1:numel(filelist),
|
---|
| 261 | compressstring = [compressstring ' ' filelist{i}];
|
---|
| 262 | end
|
---|
| 263 | if cluster.interactive,
|
---|
| 264 | compressstring = [compressstring ' ' modelname '.errlog ' modelname '.outlog '];
|
---|
| 265 | end
|
---|
| 266 | system(compressstring);
|
---|
[11527] | 267 |
|
---|
[17806] | 268 | if cluster.verbose, disp('uploading input file and queueing script'); end
|
---|
[15396] | 269 | issmscpout(cluster.name,cluster.executionpath,cluster.login,cluster.port,{[dirname '.tar.gz']});
|
---|
[20500] | 270 | end
|
---|
| 271 | end %}}}
|
---|
| 272 | function LaunchQueueJob(cluster,modelname,dirname,filelist,restart,batch)% {{{
|
---|
[11527] | 273 |
|
---|
[20500] | 274 | if ~ispc,
|
---|
| 275 |
|
---|
| 276 | %figure out what shell extension we will use:
|
---|
| 277 | if isempty(strfind(cluster.shell,'csh')),
|
---|
| 278 | shellext='sh';
|
---|
| 279 | else
|
---|
| 280 | shellext='csh';
|
---|
| 281 | end
|
---|
| 282 |
|
---|
[17806] | 283 | if cluster.verbose, disp('launching solution sequence on remote cluster'); end
|
---|
[20500] | 284 |
|
---|
| 285 | if ~isempty(restart)
|
---|
| 286 | launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && cd ' dirname ' && source ' modelname '.queue '];
|
---|
| 287 | else
|
---|
| 288 | if ~batch,
|
---|
| 289 | launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
|
---|
| 290 | ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz && source ' modelname '.queue '];
|
---|
| 291 | else
|
---|
| 292 | launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
|
---|
| 293 | ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz '];
|
---|
| 294 | end
|
---|
| 295 | end
|
---|
[15396] | 296 | issmssh(cluster.name,cluster.login,cluster.port,launchcommand);
|
---|
| 297 | else
|
---|
| 298 | system([modelname '.bat']);
|
---|
| 299 | end
|
---|
[20500] | 300 |
|
---|
[13395] | 301 | end %}}}
|
---|
| 302 | function Download(cluster,dirname,filelist)% {{{
|
---|
[11527] | 303 |
|
---|
[13395] | 304 | if ispc(),
|
---|
[12706] | 305 | %do nothing
|
---|
| 306 | return;
|
---|
[8578] | 307 | end
|
---|
| 308 |
|
---|
[12706] | 309 | %copy files from cluster to current directory
|
---|
| 310 | directory=[cluster.executionpath '/' dirname '/'];
|
---|
| 311 | issmscpin(cluster.name,cluster.login,cluster.port,directory,filelist);
|
---|
[11527] | 312 | end %}}}
|
---|
[8578] | 313 | end
|
---|
| 314 | end
|
---|