source: issm/trunk/src/m/classes/clusters/generic.m@ 26744

Last change on this file since 26744 was 26744, checked in by Mathieu Morlighem, 3 years ago

merged trunk-jpl and trunk for revision 26742

File size: 14.4 KB
RevLine 
[8586]1%GENERIC cluster class definition
[8578]2%
3% Usage:
[8586]4% cluster=generic('name','astrid','np',3);
5% cluster=generic('name',oshostname(),'np',3,'login','username');
[26744]6%
7% TODO:
8% - Add support for restart to Windows (under MSYS2), then activate tests 125
9% and 126 in test suite
10%
[8578]11
12classdef generic
[24686]13 properties (SetAccess=public)
[13395]14 % {{{
[21341]15 name = '';
16 login = '';
17 np = 1;
[22758]18 npocean = 1;
[21341]19 port = 0;
20 interactive = 1;
21 codepath = [IssmConfig('ISSM_PREFIX') '/bin'];
22 etcpath = [issmdir() '/etc'];
23 executionpath = [issmdir() '/execution'];
24 valgrind = [issmdir() '/externalpackages/valgrind/install/bin/valgrind'];
25 valgrindlib = [issmdir() '/externalpackages/valgrind/install/lib/libmpidebug.so'];
26 valgrindsup = [issmdir() '/externalpackages/valgrind/issm.supp'];
27 verbose = 1;
28 shell = '/bin/sh';
[13395]29 %}}}
30 end
31 methods
32 function cluster=generic(varargin) % {{{
[8586]33
[19105]34 %Change the defaults if ispc
[26744]35 if ispc & ~ismingw,
[19105]36 cluster.codepath = [issmdir() '\bin'];
37 cluster.etcpath = [issmdir() '\etc'];
38 cluster.executionpath = [issmdir() '\execution'];
39 end
40
[13395]41 %use provided options to change fields
42 options=pairoptions(varargin{:});
[8586]43
[13395]44 %get name
45 cluster.name=getfieldvalue(options,'name',oshostname());
[8586]46
[13395]47 %initialize cluster using user settings if provided
48 if (exist([cluster.name '_settings'])==2), eval([cluster.name '_settings']); end
[8578]49
[13395]50 %OK get other fields
51 cluster=AssignObjectFields(pairoptions(varargin{:}),cluster);
52 end
53 %}}}
54 function disp(cluster) % {{{
55 % display the object
56 disp(sprintf('class ''%s'' object ''%s'' = ',class(cluster),inputname(1)));
57 disp(sprintf(' name: %s',cluster.name));
58 disp(sprintf(' login: %s',cluster.login));
59 disp(sprintf(' np: %i',cluster.np));
[22758]60 disp(sprintf(' npocean: %i',cluster.npocean));
[13395]61 disp(sprintf(' port: %i',cluster.port));
62 disp(sprintf(' codepath: %s',cluster.codepath));
63 disp(sprintf(' executionpath: %s',cluster.executionpath));
[20500]64 disp(sprintf(' etcpath: %s',cluster.etcpath));
[13395]65 disp(sprintf(' valgrind: %s',cluster.valgrind));
66 disp(sprintf(' valgrindlib: %s',cluster.valgrindlib));
67 disp(sprintf(' valgrindsup: %s',cluster.valgrindsup));
[17806]68 disp(sprintf(' verbose: %s',cluster.verbose));
[18301]69 disp(sprintf(' shell: %s',cluster.shell));
[13395]70 end
71 %}}}
[26744]72 function numprocs=nprocs(cluster) % {{{
73 numprocs=cluster.np;
74 end
75 %}}}
[13395]76 function md = checkconsistency(cluster,md,solution,analyses) % {{{
77 if cluster.np<1
78 md = checkmessage(md,['number of processors should be at least 1']);
79 end
80 if isnan(cluster.np),
81 md = checkmessage(md,'number of processors should not be NaN!');
82 end
83 end
84 %}}}
[21729]85 function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{
[24686]86 % Which executable are we calling?
87 executable='issm.exe'; % default
[8578]88
[20500]89 if isdakota,
[24686]90 version=IssmConfig('_DAKOTA_VERSION_');
91 version=str2num(version(1:3));
[20500]92 if (version>=6),
93 executable='issm_dakota.exe';
94 end
95 end
[21729]96 if isoceancoupling,
97 executable='issm_ocean.exe';
98 end
[20500]99
[13395]100 if ~ispc(),
101 fid=fopen([modelname '.queue'],'w');
[18301]102 fprintf(fid,'#!%s\n',cluster.shell);
[13395]103 if ~isvalgrind,
104 if cluster.interactive
[16137]105 if IssmConfig('_HAVE_MPI_'),
[25836]106 fprintf(fid,'mpiexec -np %i %s/%s %s %s %s\n',cluster.np,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname);
[13395]107 else
[25836]108 fprintf(fid,'%s/%s %s %s %s',cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname);
[13395]109 end
110 else
[16137]111 if IssmConfig('_HAVE_MPI_'),
[25836]112 fprintf(fid,'mpiexec -np %i %s/%s %s %s %s 2> %s.errlog > %s.outlog &',cluster.np,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname,modelname,modelname);
[13395]113 else
[25836]114 fprintf(fid,'%s/%s %s %s %s 2> %s.errlog > %s.outlog &',cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname,modelname,modelname);
[13395]115 end
116 end
117 elseif isgprof,
118 fprintf(fid,'\n gprof %s/issm.exe gmon.out > %s.performance',cluster.codepath,modelname);
119 else
120 %Add --gen-suppressions=all to get suppression lines
[24686]121 %fprintf(fid,'LD_PRELOAD=%s \\\n',cluster.valgrindlib); it could be deleted
122 if ismac,
[16137]123 if IssmConfig('_HAVE_MPI_'),
[25836]124 fprintf(fid,'mpiexec -np %i %s --leak-check=full --error-limit=no --dsymutil=yes --suppressions=%s %s/%s %s %s %s 2> %s.errlog > %s.outlog ',...
[22758]125 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname], modelname,modelname,modelname);
[13395]126 else
[25836]127 fprintf(fid,'%s --leak-check=full --dsymutil=yes --error-limit=no --suppressions=%s %s/%s %s %s %s 2> %s.errlog > %s.outlog',...
[22758]128 cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname], modelname,modelname,modelname);
[13395]129 end
130 else
[16137]131 if IssmConfig('_HAVE_MPI_'),
[25836]132 fprintf(fid,'mpiexec -np %i %s --leak-check=full --error-limit=no --suppressions=%s %s/%s %s %s %s 2> %s.errlog > %s.outlog',...
[22758]133 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname,modelname,modelname);
[13395]134 else
[25836]135 fprintf(fid,'%s --leak-check=full --error-limit=no --suppressions=%s %s/%s %s %s %s 2> %s.errlog > %s.outlog',...
[22758]136 cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname,modelname,modelname);
[13395]137 end
138 end
139 end
140 if ~io_gather, %concatenate the output files:
141 fprintf(fid,'\ncat %s.outbin.* > %s.outbin',modelname,modelname);
142 end
143 fclose(fid);
[12706]144
[13395]145 else % Windows
[12706]146
[13395]147 fid=fopen([modelname '.bat'],'w');
148 fprintf(fid,'@echo off\n');
[15396]149
[26744]150 % if IssmConfig('_HAVE_PETSC_MPI_'),
151 % warning('parallel runs not allowed yet in Windows. Defaulting to 1 cpus');
152 % cluster.np=1;
153 % end
[17806]154
[15396]155 if cluster.np>1,
[26744]156 % fprintf(fid,'"C:\\Program Files\\MPICH2\\bin\\mpiexec.exe" -n %i "%s/%s" %s ./ %s',cluster.np,cluster.codepath,executable,solution,modelname);
157 fprintf(fid,'"C:\\Program Files\\Microsoft MPI\\Bin\\mpiexec.exe" -n %i "%s/%s" %s ./ %s',cluster.np,cluster.codepath,executable,solution,modelname);
[13395]158 else
[26744]159 fprintf(fid,'"%s/%s" %s ./ %s',cluster.codepath,executable,solution,modelname);
[13395]160 end
161 fclose(fid);
162 end
[8578]163
[13395]164 %in interactive mode, create a run file, and errlog and outlog file
165 if cluster.interactive,
166 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
167 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
168 end
169 end
170 %}}}
[20500]171 function BuildQueueScriptMultipleModels(cluster,dirname,modelname,solution,dirnames,modelnames,nps) % {{{
[24686]172
173 %some checks:
[20500]174 if isempty(modelname), error('BuildQueueScriptMultipleModels error message: need a non empty model name!');end
175
[24686]176 %what is the executable being called?
[26744]177 executable='issm_slc.exe';
[20500]178
179 if ispc(), error('BuildQueueScriptMultipleModels not support yet on windows machines');end;
[24686]180
181 %write queuing script
[20500]182 fid=fopen([modelname '.queue'],'w');
[24686]183
[20500]184 fprintf(fid,'#!%s\n',cluster.shell);
185
[24686]186 %number of cpus:
[20500]187 mpistring=sprintf('mpiexec -np %i ',cluster.np);
188
[24686]189 %executable:
[20500]190 mpistring=[mpistring sprintf('%s/%s ',cluster.codepath,executable)];
[24686]191
192 %solution name:
[21341]193 mpistring=[mpistring sprintf('%s ',solution)];
[20500]194
[24686]195 %execution directory and model name:
[20500]196 mpistring=[mpistring sprintf('%s/%s %s',cluster.executionpath,dirname,modelname)];
197
[24686]198 %inform main executable of how many icecaps, glaciers and earth models are being run:
[20500]199 mpistring=[mpistring sprintf(' %i ',length(dirnames))];
[24686]200
[20500]201 %icecaps, glaciers and earth location, names and number of processors associated:
202 for i=1:length(dirnames),
203 mpistring=[mpistring sprintf(' %s/%s %s %i ',cluster.executionpath,dirnames{i},modelnames{i},nps{i})];
204 end
205
[24686]206 %log files:
[20500]207 if ~cluster.interactive,
208 mpistring=[mpistring sprintf('2> %s.errlog> %s.outlog',modelname,modelname)];
209 end
210
[24686]211 %write this long string to disk:
[20500]212 fprintf(fid,mpistring);
213 fclose(fid);
214
215 %in interactive mode, create a run file, and errlog and outlog file
216 if cluster.interactive,
217 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
218 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
219 end
220 end
221 %}}}
[22758]222 function BuildQueueScriptIceOcean(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota) % {{{
223
[24686]224 %write queuing script
225 %what is the executable being called?
[22758]226 executable='issm_ocean.exe';
227
228 fid=fopen([modelname '.queue'],'w');
229 fprintf(fid,'#!%s\n',cluster.shell);
[22822]230 if ~isvalgrind,
231 fprintf(fid,'mpiexec -np %i %s/%s %s %s %s : -np %i ./mitgcmuv\n',cluster.np,cluster.codepath,executable,solution,cluster.executionpath,modelname,cluster.npocean);
232
233 else
234 fprintf(fid,'mpiexec -np %i %s --leak-check=full --error-limit=no --dsymutil=yes --suppressions=%s %s/%s %s %s %s : -np %i ./mitgcmuv\n',...
235 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,cluster.executionpath,modelname,cluster.npocean);
236 end
[22758]237 fclose(fid);
238
239 %in interactive mode, create a run file, and errlog and outlog file
240 if cluster.interactive,
241 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
242 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
243 end
244 end
245 %}}}
[13395]246 function BuildKrigingQueueScript(cluster,modelname,solution,io_gather,isvalgrind,isgprof) % {{{
[12706]247
[24686]248 %write queuing script
[13395]249 if ~ispc(),
[12706]250
[13395]251 fid=fopen([modelname '.queue'],'w');
252 fprintf(fid,'#!/bin/sh\n');
253 if ~isvalgrind,
254 if cluster.interactive
255 fprintf(fid,'mpiexec -np %i %s/kriging.exe %s %s ',cluster.np,cluster.codepath,[cluster.executionpath '/' modelname],modelname);
256 else
257 fprintf(fid,'mpiexec -np %i %s/kriging.exe %s %s 2> %s.errlog >%s.outlog ',cluster.np,cluster.codepath,[cluster.executionpath '/' modelname],modelname,modelname,modelname);
258 end
259 elseif isgprof,
260 fprintf(fid,'\n gprof %s/kriging.exe gmon.out > %s.performance',cluster.codepath,modelname);
261 else
262 %Add --gen-suppressions=all to get suppression lines
[24686]263 %fprintf(fid,'LD_PRELOAD=%s \\\n',cluster.valgrindlib); it could be deleted
[13395]264 fprintf(fid,'mpiexec -np %i %s --leak-check=full --suppressions=%s %s/kriging.exe %s %s 2> %s.errlog >%s.outlog ',...
265 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,[cluster.executionpath '/' modelname],modelname,modelname,modelname);
266 end
267 if ~io_gather, %concatenate the output files:
268 fprintf(fid,'\ncat %s.outbin.* > %s.outbin',modelname,modelname);
269 end
270 fclose(fid);
[11527]271
[13395]272 else % Windows
[8578]273
[13395]274 fid=fopen([modelname '.bat'],'w');
275 fprintf(fid,'@echo off\n');
276 if cluster.interactive
[21341]277 fprintf(fid,'"%s/issm.exe" %s "%s" %s ',cluster.codepath,solution,[cluster.executionpath '/' modelname],modelname);
[13395]278 else
279 fprintf(fid,'"%s/issm.exe" %s "%s" %s 2> %s.errlog >%s.outlog',...
[21341]280 cluster.codepath,solution,[cluster.executionpath '/' modelname],modelname,modelname,modelname);
[13395]281 end
282 fclose(fid);
283 end
[8578]284
[13395]285 %in interactive mode, create a run file, and errlog and outlog file
286 if cluster.interactive,
287 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
288 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
289 end
290 end
291 %}}}
[26744]292 function UploadQueueJob(cluster,modelname,dirname,filelist) % {{{
[8578]293
[15396]294 if ~ispc,
[18301]295
[15396]296 %compress the files into one zip.
297 compressstring=['tar -zcf ' dirname '.tar.gz '];
298 for i=1:numel(filelist),
[22758]299 if ~exist(filelist{i},'file')
300 error(['File ' filelist{i} ' not found']);
301 end
[15396]302 compressstring = [compressstring ' ' filelist{i}];
303 end
304 if cluster.interactive,
305 compressstring = [compressstring ' ' modelname '.errlog ' modelname '.outlog '];
306 end
307 system(compressstring);
[11527]308
[17806]309 if cluster.verbose, disp('uploading input file and queueing script'); end
[15396]310 issmscpout(cluster.name,cluster.executionpath,cluster.login,cluster.port,{[dirname '.tar.gz']});
[20500]311 end
312 end %}}}
[26744]313 function LaunchQueueJob(cluster,modelname,dirname,filelist,restart,batch) % {{{
[11527]314
[20500]315 if ~ispc,
316 %figure out what shell extension we will use:
317 if isempty(strfind(cluster.shell,'csh')),
318 shellext='sh';
319 else
320 shellext='csh';
321 end
322
[17806]323 if cluster.verbose, disp('launching solution sequence on remote cluster'); end
[20500]324
325 if ~isempty(restart)
326 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && cd ' dirname ' && source ' modelname '.queue '];
327 else
328 if ~batch,
[26744]329 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
[20500]330 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz && source ' modelname '.queue '];
331 else
[26744]332 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
[20500]333 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz '];
334 end
335 end
[15396]336 issmssh(cluster.name,cluster.login,cluster.port,launchcommand);
337 else
338 system([modelname '.bat']);
339 end
[20500]340
[13395]341 end %}}}
[26744]342 function LaunchQueueJobIceOcean(cluster,modelname,dirname,filelist,restart,batch) % {{{
[22758]343
344 if ~ispc,
345
346 %figure out what shell extension we will use:
347 if isempty(strfind(cluster.shell,'csh')),
348 shellext='sh';
349 else
350 shellext='csh';
351 end
352
353 if cluster.verbose, disp('launching solution sequence on remote cluster'); end
354
355 if ~isempty(restart)
356 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && cd ' dirname ' && source ' modelname '.queue '];
357 else
358 if ~batch,
359 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && tar -zxf ' dirname '.tar.gz && source ' modelname '.queue '];
360 else
361 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
362 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz '];
363 end
364 end
365 issmssh(cluster.name,cluster.login,cluster.port,launchcommand);
366 else
367 system([modelname '.bat']);
368 end
369
370 end %}}}
[26744]371 function Download(cluster,dirname,filelist) % {{{
[11527]372
[13395]373 if ispc(),
[12706]374 %do nothing
375 return;
[8578]376 end
377
[12706]378 %copy files from cluster to current directory
379 directory=[cluster.executionpath '/' dirname '/'];
380 issmscpin(cluster.name,cluster.login,cluster.port,directory,filelist);
[11527]381 end %}}}
[8578]382 end
383end
Note: See TracBrowser for help on using the repository browser.