source: issm/trunk/src/m/classes/clusters/generic.m@ 25836

Last change on this file since 25836 was 25836, checked in by Mathieu Morlighem, 4 years ago

merged trunk-jpl and trunk for revision 25834

File size: 14.1 KB
RevLine 
[8586]1%GENERIC cluster class definition
[8578]2%
3% Usage:
[8586]4% cluster=generic('name','astrid','np',3);
5% cluster=generic('name',oshostname(),'np',3,'login','username');
[8578]6
7classdef generic
[24686]8 properties (SetAccess=public)
[13395]9 % {{{
[21341]10 name = '';
11 login = '';
12 np = 1;
[22758]13 npocean = 1;
[21341]14 port = 0;
15 interactive = 1;
16 codepath = [IssmConfig('ISSM_PREFIX') '/bin'];
17 etcpath = [issmdir() '/etc'];
18 executionpath = [issmdir() '/execution'];
19 valgrind = [issmdir() '/externalpackages/valgrind/install/bin/valgrind'];
20 valgrindlib = [issmdir() '/externalpackages/valgrind/install/lib/libmpidebug.so'];
21 valgrindsup = [issmdir() '/externalpackages/valgrind/issm.supp'];
22 verbose = 1;
23 shell = '/bin/sh';
[13395]24 %}}}
25 end
26 methods
27 function cluster=generic(varargin) % {{{
[8586]28
[19105]29 %Change the defaults if ispc
30 if ispc,
31 cluster.codepath = [issmdir() '\bin'];
32 cluster.etcpath = [issmdir() '\etc'];
33 cluster.executionpath = [issmdir() '\execution'];
34 end
35
[13395]36 %use provided options to change fields
37 options=pairoptions(varargin{:});
[8586]38
[13395]39 %get name
40 cluster.name=getfieldvalue(options,'name',oshostname());
[8586]41
[13395]42 %initialize cluster using user settings if provided
43 if (exist([cluster.name '_settings'])==2), eval([cluster.name '_settings']); end
[8578]44
[13395]45 %OK get other fields
46 cluster=AssignObjectFields(pairoptions(varargin{:}),cluster);
47 end
48 %}}}
49 function disp(cluster) % {{{
50 % display the object
51 disp(sprintf('class ''%s'' object ''%s'' = ',class(cluster),inputname(1)));
52 disp(sprintf(' name: %s',cluster.name));
53 disp(sprintf(' login: %s',cluster.login));
54 disp(sprintf(' np: %i',cluster.np));
[22758]55 disp(sprintf(' npocean: %i',cluster.npocean));
[13395]56 disp(sprintf(' port: %i',cluster.port));
57 disp(sprintf(' codepath: %s',cluster.codepath));
58 disp(sprintf(' executionpath: %s',cluster.executionpath));
[20500]59 disp(sprintf(' etcpath: %s',cluster.etcpath));
[13395]60 disp(sprintf(' valgrind: %s',cluster.valgrind));
61 disp(sprintf(' valgrindlib: %s',cluster.valgrindlib));
62 disp(sprintf(' valgrindsup: %s',cluster.valgrindsup));
[17806]63 disp(sprintf(' verbose: %s',cluster.verbose));
[18301]64 disp(sprintf(' shell: %s',cluster.shell));
[13395]65 end
66 %}}}
67 function md = checkconsistency(cluster,md,solution,analyses) % {{{
68 if cluster.np<1
69 md = checkmessage(md,['number of processors should be at least 1']);
70 end
71 if isnan(cluster.np),
72 md = checkmessage(md,'number of processors should not be NaN!');
73 end
74 end
75 %}}}
[21729]76 function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{
[24686]77 % Which executable are we calling?
78 executable='issm.exe'; % default
[8578]79
[20500]80 if isdakota,
[24686]81 version=IssmConfig('_DAKOTA_VERSION_');
82 version=str2num(version(1:3));
[20500]83 if (version>=6),
84 executable='issm_dakota.exe';
85 end
86 end
[21729]87 if isoceancoupling,
88 executable='issm_ocean.exe';
89 end
[20500]90
[13395]91 if ~ispc(),
92 fid=fopen([modelname '.queue'],'w');
[18301]93 fprintf(fid,'#!%s\n',cluster.shell);
[13395]94 if ~isvalgrind,
95 if cluster.interactive
[16137]96 if IssmConfig('_HAVE_MPI_'),
[25836]97 fprintf(fid,'mpiexec -np %i %s/%s %s %s %s\n',cluster.np,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname);
[13395]98 else
[25836]99 fprintf(fid,'%s/%s %s %s %s',cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname);
[13395]100 end
101 else
[16137]102 if IssmConfig('_HAVE_MPI_'),
[25836]103 fprintf(fid,'mpiexec -np %i %s/%s %s %s %s 2> %s.errlog > %s.outlog &',cluster.np,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname,modelname,modelname);
[13395]104 else
[25836]105 fprintf(fid,'%s/%s %s %s %s 2> %s.errlog > %s.outlog &',cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname,modelname,modelname);
[13395]106 end
107 end
108 elseif isgprof,
109 fprintf(fid,'\n gprof %s/issm.exe gmon.out > %s.performance',cluster.codepath,modelname);
110 else
111 %Add --gen-suppressions=all to get suppression lines
[24686]112 %fprintf(fid,'LD_PRELOAD=%s \\\n',cluster.valgrindlib); it could be deleted
113 if ismac,
[16137]114 if IssmConfig('_HAVE_MPI_'),
[25836]115 fprintf(fid,'mpiexec -np %i %s --leak-check=full --error-limit=no --dsymutil=yes --suppressions=%s %s/%s %s %s %s 2> %s.errlog > %s.outlog ',...
[22758]116 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname], modelname,modelname,modelname);
[13395]117 else
[25836]118 fprintf(fid,'%s --leak-check=full --dsymutil=yes --error-limit=no --suppressions=%s %s/%s %s %s %s 2> %s.errlog > %s.outlog',...
[22758]119 cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname], modelname,modelname,modelname);
[13395]120 end
121 else
[16137]122 if IssmConfig('_HAVE_MPI_'),
[25836]123 fprintf(fid,'mpiexec -np %i %s --leak-check=full --error-limit=no --suppressions=%s %s/%s %s %s %s 2> %s.errlog > %s.outlog',...
[22758]124 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname,modelname,modelname);
[13395]125 else
[25836]126 fprintf(fid,'%s --leak-check=full --error-limit=no --suppressions=%s %s/%s %s %s %s 2> %s.errlog > %s.outlog',...
[22758]127 cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname,modelname,modelname);
[13395]128 end
129 end
130 end
131 if ~io_gather, %concatenate the output files:
132 fprintf(fid,'\ncat %s.outbin.* > %s.outbin',modelname,modelname);
133 end
134 fclose(fid);
[12706]135
[13395]136 else % Windows
[12706]137
[13395]138 fid=fopen([modelname '.bat'],'w');
139 fprintf(fid,'@echo off\n');
[15396]140
[20500]141 if IssmConfig('_HAVE_PETSC_MPI_'),
142 warning('parallel runs not allowed yet in Windows. Defaulting to 1 cpus');
143 cluster.np=1;
144 end
[17806]145
[15396]146 if cluster.np>1,
[22758]147 fprintf(fid,'"C:\\Program Files\\MPICH2\\bin\\mpiexec.exe" -n %i "%s/%s" %s ./ %s ',cluster.np,cluster.codepath,executable,solution,modelname);
[13395]148 else
[22758]149 fprintf(fid,'"%s/%s" %s ./ %s ',cluster.codepath,executable,solution,modelname);
[13395]150 end
151 fclose(fid);
152 end
[8578]153
[13395]154 %in interactive mode, create a run file, and errlog and outlog file
155 if cluster.interactive,
156 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
157 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
158 end
159 end
160 %}}}
[20500]161 function BuildQueueScriptMultipleModels(cluster,dirname,modelname,solution,dirnames,modelnames,nps) % {{{
[24686]162
163 %some checks:
[20500]164 if isempty(modelname), error('BuildQueueScriptMultipleModels error message: need a non empty model name!');end
165
[24686]166 %what is the executable being called?
[20500]167 executable='issm_slr.exe';
168
169 if ispc(), error('BuildQueueScriptMultipleModels not support yet on windows machines');end;
[24686]170
171 %write queuing script
[20500]172 fid=fopen([modelname '.queue'],'w');
[24686]173
[20500]174 fprintf(fid,'#!%s\n',cluster.shell);
175
[24686]176 %number of cpus:
[20500]177 mpistring=sprintf('mpiexec -np %i ',cluster.np);
178
[24686]179 %executable:
[20500]180 mpistring=[mpistring sprintf('%s/%s ',cluster.codepath,executable)];
[24686]181
182 %solution name:
[21341]183 mpistring=[mpistring sprintf('%s ',solution)];
[20500]184
[24686]185 %execution directory and model name:
[20500]186 mpistring=[mpistring sprintf('%s/%s %s',cluster.executionpath,dirname,modelname)];
187
[24686]188 %inform main executable of how many icecaps, glaciers and earth models are being run:
[20500]189 mpistring=[mpistring sprintf(' %i ',length(dirnames))];
[24686]190
[20500]191 %icecaps, glaciers and earth location, names and number of processors associated:
192 for i=1:length(dirnames),
193 mpistring=[mpistring sprintf(' %s/%s %s %i ',cluster.executionpath,dirnames{i},modelnames{i},nps{i})];
194 end
195
[24686]196 %log files:
[20500]197 if ~cluster.interactive,
198 mpistring=[mpistring sprintf('2> %s.errlog> %s.outlog',modelname,modelname)];
199 end
200
[24686]201 %write this long string to disk:
[20500]202 fprintf(fid,mpistring);
203 fclose(fid);
204
205 %in interactive mode, create a run file, and errlog and outlog file
206 if cluster.interactive,
207 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
208 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
209 end
210 end
211 %}}}
[22758]212 function BuildQueueScriptIceOcean(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota) % {{{
213
[24686]214 %write queuing script
215 %what is the executable being called?
[22758]216 executable='issm_ocean.exe';
217
218 fid=fopen([modelname '.queue'],'w');
219 fprintf(fid,'#!%s\n',cluster.shell);
[22822]220 if ~isvalgrind,
221 fprintf(fid,'mpiexec -np %i %s/%s %s %s %s : -np %i ./mitgcmuv\n',cluster.np,cluster.codepath,executable,solution,cluster.executionpath,modelname,cluster.npocean);
222
223 else
224 fprintf(fid,'mpiexec -np %i %s --leak-check=full --error-limit=no --dsymutil=yes --suppressions=%s %s/%s %s %s %s : -np %i ./mitgcmuv\n',...
225 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,cluster.executionpath,modelname,cluster.npocean);
226 end
[22758]227 fclose(fid);
228
229 %in interactive mode, create a run file, and errlog and outlog file
230 if cluster.interactive,
231 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
232 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
233 end
234 end
235 %}}}
[13395]236 function BuildKrigingQueueScript(cluster,modelname,solution,io_gather,isvalgrind,isgprof) % {{{
[12706]237
[24686]238 %write queuing script
[13395]239 if ~ispc(),
[12706]240
[13395]241 fid=fopen([modelname '.queue'],'w');
242 fprintf(fid,'#!/bin/sh\n');
243 if ~isvalgrind,
244 if cluster.interactive
245 fprintf(fid,'mpiexec -np %i %s/kriging.exe %s %s ',cluster.np,cluster.codepath,[cluster.executionpath '/' modelname],modelname);
246 else
247 fprintf(fid,'mpiexec -np %i %s/kriging.exe %s %s 2> %s.errlog >%s.outlog ',cluster.np,cluster.codepath,[cluster.executionpath '/' modelname],modelname,modelname,modelname);
248 end
249 elseif isgprof,
250 fprintf(fid,'\n gprof %s/kriging.exe gmon.out > %s.performance',cluster.codepath,modelname);
251 else
252 %Add --gen-suppressions=all to get suppression lines
[24686]253 %fprintf(fid,'LD_PRELOAD=%s \\\n',cluster.valgrindlib); it could be deleted
[13395]254 fprintf(fid,'mpiexec -np %i %s --leak-check=full --suppressions=%s %s/kriging.exe %s %s 2> %s.errlog >%s.outlog ',...
255 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,[cluster.executionpath '/' modelname],modelname,modelname,modelname);
256 end
257 if ~io_gather, %concatenate the output files:
258 fprintf(fid,'\ncat %s.outbin.* > %s.outbin',modelname,modelname);
259 end
260 fclose(fid);
[11527]261
[13395]262 else % Windows
[8578]263
[13395]264 fid=fopen([modelname '.bat'],'w');
265 fprintf(fid,'@echo off\n');
266 if cluster.interactive
[21341]267 fprintf(fid,'"%s/issm.exe" %s "%s" %s ',cluster.codepath,solution,[cluster.executionpath '/' modelname],modelname);
[13395]268 else
269 fprintf(fid,'"%s/issm.exe" %s "%s" %s 2> %s.errlog >%s.outlog',...
[21341]270 cluster.codepath,solution,[cluster.executionpath '/' modelname],modelname,modelname,modelname);
[13395]271 end
272 fclose(fid);
273 end
[8578]274
[13395]275 %in interactive mode, create a run file, and errlog and outlog file
276 if cluster.interactive,
277 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
278 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
279 end
280 end
281 %}}}
[20500]282 function UploadQueueJob(cluster,modelname,dirname,filelist)% {{{
[8578]283
[15396]284 if ~ispc,
[18301]285
[15396]286 %compress the files into one zip.
287 compressstring=['tar -zcf ' dirname '.tar.gz '];
288 for i=1:numel(filelist),
[22758]289 if ~exist(filelist{i},'file')
290 error(['File ' filelist{i} ' not found']);
291 end
[15396]292 compressstring = [compressstring ' ' filelist{i}];
293 end
294 if cluster.interactive,
295 compressstring = [compressstring ' ' modelname '.errlog ' modelname '.outlog '];
296 end
297 system(compressstring);
[11527]298
[17806]299 if cluster.verbose, disp('uploading input file and queueing script'); end
[15396]300 issmscpout(cluster.name,cluster.executionpath,cluster.login,cluster.port,{[dirname '.tar.gz']});
[20500]301 end
302 end %}}}
303 function LaunchQueueJob(cluster,modelname,dirname,filelist,restart,batch)% {{{
[11527]304
[20500]305 if ~ispc,
306
307 %figure out what shell extension we will use:
308 if isempty(strfind(cluster.shell,'csh')),
309 shellext='sh';
310 else
311 shellext='csh';
312 end
313
[17806]314 if cluster.verbose, disp('launching solution sequence on remote cluster'); end
[20500]315
316 if ~isempty(restart)
317 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && cd ' dirname ' && source ' modelname '.queue '];
318 else
319 if ~batch,
320 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
321 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz && source ' modelname '.queue '];
322 else
323 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
324 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz '];
325 end
326 end
[15396]327 issmssh(cluster.name,cluster.login,cluster.port,launchcommand);
328 else
329 system([modelname '.bat']);
330 end
[20500]331
[13395]332 end %}}}
[22758]333 function LaunchQueueJobIceOcean(cluster,modelname,dirname,filelist,restart,batch)% {{{
334
335 if ~ispc,
336
337 %figure out what shell extension we will use:
338 if isempty(strfind(cluster.shell,'csh')),
339 shellext='sh';
340 else
341 shellext='csh';
342 end
343
344 if cluster.verbose, disp('launching solution sequence on remote cluster'); end
345
346 if ~isempty(restart)
347 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && cd ' dirname ' && source ' modelname '.queue '];
348 else
349 if ~batch,
350 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && tar -zxf ' dirname '.tar.gz && source ' modelname '.queue '];
351 else
352 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
353 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz '];
354 end
355 end
356 issmssh(cluster.name,cluster.login,cluster.port,launchcommand);
357 else
358 system([modelname '.bat']);
359 end
360
361 end %}}}
[13395]362 function Download(cluster,dirname,filelist)% {{{
[11527]363
[13395]364 if ispc(),
[12706]365 %do nothing
366 return;
[8578]367 end
368
[12706]369 %copy files from cluster to current directory
370 directory=[cluster.executionpath '/' dirname '/'];
371 issmscpin(cluster.name,cluster.login,cluster.port,directory,filelist);
[11527]372 end %}}}
[8578]373 end
374end
Note: See TracBrowser for help on using the repository browser.