source: issm/trunk/src/m/classes/clusters/generic.m

Last change on this file was 28013, checked in by Mathieu Morlighem, 16 months ago

merged trunk-jpl and trunk for revision 28011

File size: 14.5 KB
Line 
1%GENERIC cluster class definition
2%
3% Usage:
4% cluster=generic('name','astrid','np',3);
5% cluster=generic('name',oshostname(),'np',3,'login','username');
6%
7% TODO:
8% - Add support for restart to Windows (under MSYS2), then activate tests 125
9% and 126 in test suite
10%
11
12classdef generic
13 properties (SetAccess=public)
14 % {{{
15 name = '';
16 login = '';
17 np = 1;
18 npocean = 1;
19 port = 0;
20 interactive = 1;
21 codepath = [issmdir() '/bin'];
22 etcpath = [issmdir() '/etc'];
23 executionpath = [issmdir() '/execution'];
24 valgrind = [issmdir() '/externalpackages/valgrind/install/bin/valgrind'];
25 valgrindlib = [issmdir() '/externalpackages/valgrind/install/lib/libmpidebug.so'];
26 valgrindsup = [issmdir() '/externalpackages/valgrind/issm.supp'];
27 verbose = 1;
28 shell = '/bin/sh';
29 %}}}
30 end
31 methods
32 function cluster=generic(varargin) % {{{
33
34 %Change the defaults if ispc
35 if ispc,
36 cluster.codepath = [issmdir() '\bin'];
37 cluster.etcpath = [issmdir() '\etc'];
38 cluster.executionpath = [issmdir() '\execution'];
39 end
40
41 %use provided options to change fields
42 options=pairoptions(varargin{:});
43
44 %get name
45 cluster.name=getfieldvalue(options,'name',oshostname());
46
47 %initialize cluster using user settings if provided
48 if (exist([cluster.name '_settings'])==2), eval([cluster.name '_settings']); end
49
50 %OK get other fields
51 cluster=AssignObjectFields(pairoptions(varargin{:}),cluster);
52 end
53 %}}}
54 function disp(cluster) % {{{
55 % display the object
56 disp(sprintf('class ''%s'' object ''%s'' = ',class(cluster),inputname(1)));
57 disp(sprintf(' name: %s',cluster.name));
58 disp(sprintf(' login: %s',cluster.login));
59 disp(sprintf(' np: %i',cluster.np));
60 disp(sprintf(' npocean: %i',cluster.npocean));
61 disp(sprintf(' port: %i',cluster.port));
62 disp(sprintf(' codepath: %s',cluster.codepath));
63 disp(sprintf(' executionpath: %s',cluster.executionpath));
64 disp(sprintf(' etcpath: %s',cluster.etcpath));
65 disp(sprintf(' valgrind: %s',cluster.valgrind));
66 disp(sprintf(' valgrindlib: %s',cluster.valgrindlib));
67 disp(sprintf(' valgrindsup: %s',cluster.valgrindsup));
68 disp(sprintf(' verbose: %s',cluster.verbose));
69 disp(sprintf(' shell: %s',cluster.shell));
70 end
71 %}}}
72 function numprocs=nprocs(cluster) % {{{
73 numprocs=cluster.np;
74 end
75 %}}}
76 function md = checkconsistency(cluster,md,solution,analyses) % {{{
77 if cluster.np<1
78 md = checkmessage(md,['number of processors should be at least 1']);
79 end
80 if isnan(cluster.np),
81 md = checkmessage(md,'number of processors should not be NaN!');
82 end
83 end
84 %}}}
85 function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{
86 % Which executable are we calling?
87 executable='issm.exe'; % default
88
89 if isdakota,
90 version=IssmConfig('_DAKOTA_VERSION_');
91 version=str2num(version(1:3));
92 if (version>=6),
93 executable='issm_dakota.exe';
94 end
95 end
96 if isoceancoupling,
97 executable='issm_ocean.exe';
98 end
99
100 if ~ispc(),
101 % Check that executable exists at the right path
102 if ~exist([cluster.codepath '/' executable],'file'),
103 error(['File ' cluster.codepath '/' executable ' does not exist']);
104 end
105
106 % Process codepath and prepend empty spaces with \ to avoid errors in queuing script
107 codepath=strrep(cluster.codepath,' ','\ ');
108
109 % Write queuing script
110 fid=fopen([modelname '.queue'],'w');
111 fprintf(fid,'#!%s\n',cluster.shell);
112 if isvalgrind,
113 %Add --gen-suppressions=all to get suppression lines
114 %fprintf(fid,'LD_PRELOAD=%s \\\n',cluster.valgrindlib); it could be deleted
115 if ismac,
116 if IssmConfig('_HAVE_MPI_'),
117 fprintf(fid,'mpiexec -np %i %s --leak-check=full --leak-check=full --show-leak-kinds=all --error-limit=no --dsymutil=yes --suppressions=%s %s/%s %s %s %s 2> %s.errlog > %s.outlog ',...
118 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname], modelname,modelname,modelname);
119 else
120 fprintf(fid,'%s --leak-check=full --dsymutil=yes --error-limit=no --leak-check=full --show-leak-kinds=all --suppressions=%s %s/%s %s %s %s 2> %s.errlog > %s.outlog',...
121 cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname], modelname,modelname,modelname);
122 end
123 else
124 if IssmConfig('_HAVE_MPI_'),
125 fprintf(fid,'mpiexec -np %i %s --leak-check=full --error-limit=no --suppressions=%s %s/%s %s %s %s 2> %s.errlog > %s.outlog',...
126 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname,modelname,modelname);
127 else
128 fprintf(fid,'%s --leak-check=full --error-limit=no --suppressions=%s %s/%s %s %s %s 2> %s.errlog > %s.outlog',...
129 cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname,modelname,modelname);
130 end
131 end
132 elseif isgprof,
133 fprintf(fid,'\n gprof %s/issm.exe gmon.out > %s.performance',cluster.codepath,modelname);
134 else
135 if cluster.interactive
136 if IssmConfig('_HAVE_MPI_'),
137 fprintf(fid,'mpiexec -np %i %s/%s %s %s %s\n',cluster.np,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname);
138 else
139 fprintf(fid,'%s/%s %s %s %s',cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname);
140 end
141 else
142 if IssmConfig('_HAVE_MPI_'),
143 fprintf(fid,'mpiexec -np %i %s/%s %s %s %s 2> %s.errlog > %s.outlog &',cluster.np,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname,modelname,modelname);
144 else
145 fprintf(fid,'%s/%s %s %s %s 2> %s.errlog > %s.outlog &',cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname,modelname,modelname);
146 end
147 end
148 end
149 if ~io_gather, %concatenate the output files:
150 fprintf(fid,'\ncat %s.outbin.* > %s.outbin',modelname,modelname);
151 end
152 fclose(fid);
153
154 else % Windows
155 fid=fopen([modelname '.bat'],'w');
156 fprintf(fid,'@echo off\n');
157
158 if cluster.np>1,
159 fprintf(fid,'"C:\\Program Files\\Microsoft MPI\\Bin\\mpiexec.exe" -n %i "%s/%s" %s ./ %s',cluster.np,cluster.codepath,executable,solution,modelname);
160 else
161 fprintf(fid,'"%s\\%s" %s ./ %s',cluster.codepath,executable,solution,modelname);
162 end
163 fclose(fid);
164 end
165
166 %in interactive mode, create a run file, and errlog and outlog file
167 if cluster.interactive,
168 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
169 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
170 end
171 end
172 %}}}
173 function BuildQueueScriptMultipleModels(cluster,dirname,modelname,solution,dirnames,modelnames,nps) % {{{
174
175 %some checks:
176 if isempty(modelname), error('BuildQueueScriptMultipleModels error message: need a non empty model name!');end
177
178 %what is the executable being called?
179 executable='issm_slc.exe';
180
181 if ispc(), error('BuildQueueScriptMultipleModels not support yet on windows machines');end;
182
183 %write queuing script
184 fid=fopen([modelname '.queue'],'w');
185
186 fprintf(fid,'#!%s\n',cluster.shell);
187
188 %number of cpus:
189 mpistring=sprintf('mpiexec -np %i ',cluster.np);
190
191 %executable:
192 mpistring=[mpistring sprintf('%s/%s ',cluster.codepath,executable)];
193
194 %solution name:
195 mpistring=[mpistring sprintf('%s ',solution)];
196
197 %execution directory and model name:
198 mpistring=[mpistring sprintf('%s/%s %s',cluster.executionpath,dirname,modelname)];
199
200 %inform main executable of how many icecaps, glaciers and earth models are being run:
201 mpistring=[mpistring sprintf(' %i ',length(dirnames))];
202
203 %icecaps, glaciers and earth location, names and number of processors associated:
204 for i=1:length(dirnames),
205 mpistring=[mpistring sprintf(' %s/%s %s %i ',cluster.executionpath,dirnames{i},modelnames{i},nps{i})];
206 end
207
208 %log files:
209 if ~cluster.interactive,
210 mpistring=[mpistring sprintf('2> %s.errlog> %s.outlog',modelname,modelname)];
211 end
212
213 %write this long string to disk:
214 fprintf(fid,mpistring);
215 fclose(fid);
216
217 %in interactive mode, create a run file, and errlog and outlog file
218 if cluster.interactive,
219 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
220 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
221 end
222 end
223 %}}}
224 function BuildQueueScriptIceOcean(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota) % {{{
225
226 %write queuing script
227 %what is the executable being called?
228 executable='issm_ocean.exe';
229
230 fid=fopen([modelname '.queue'],'w');
231 fprintf(fid,'#!%s\n',cluster.shell);
232 if ~isvalgrind,
233 fprintf(fid,'mpiexec -np %i %s/%s %s %s %s : -np %i ./mitgcmuv\n',cluster.np,cluster.codepath,executable,solution,cluster.executionpath,modelname,cluster.npocean);
234
235 else
236 fprintf(fid,'mpiexec -np %i %s --leak-check=full --error-limit=no --dsymutil=yes --suppressions=%s %s/%s %s %s %s : -np %i ./mitgcmuv\n',...
237 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,cluster.executionpath,modelname,cluster.npocean);
238 end
239 fclose(fid);
240
241 %in interactive mode, create a run file, and errlog and outlog file
242 if cluster.interactive,
243 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
244 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
245 end
246 end
247 %}}}
248 function BuildKrigingQueueScript(cluster,modelname,solution,io_gather,isvalgrind,isgprof) % {{{
249
250 %write queuing script
251 if ~ispc(),
252
253 fid=fopen([modelname '.queue'],'w');
254 fprintf(fid,'#!/bin/sh\n');
255 if ~isvalgrind,
256 if cluster.interactive
257 fprintf(fid,'mpiexec -np %i %s/kriging.exe %s %s ',cluster.np,cluster.codepath,[cluster.executionpath '/' modelname],modelname);
258 else
259 fprintf(fid,'mpiexec -np %i %s/kriging.exe %s %s 2> %s.errlog >%s.outlog ',cluster.np,cluster.codepath,[cluster.executionpath '/' modelname],modelname,modelname,modelname);
260 end
261 elseif isgprof,
262 fprintf(fid,'\n gprof %s/kriging.exe gmon.out > %s.performance',cluster.codepath,modelname);
263 else
264 %Add --gen-suppressions=all to get suppression lines
265 %fprintf(fid,'LD_PRELOAD=%s \\\n',cluster.valgrindlib); it could be deleted
266 fprintf(fid,'mpiexec -np %i %s --leak-check=full --suppressions=%s %s/kriging.exe %s %s 2> %s.errlog >%s.outlog ',...
267 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,[cluster.executionpath '/' modelname],modelname,modelname,modelname);
268 end
269 if ~io_gather, %concatenate the output files:
270 fprintf(fid,'\ncat %s.outbin.* > %s.outbin',modelname,modelname);
271 end
272 fclose(fid);
273
274 else % Windows
275
276 fid=fopen([modelname '.bat'],'w');
277 fprintf(fid,'@echo off\n');
278 if cluster.interactive
279 fprintf(fid,'"%s/issm.exe" %s "%s" %s ',cluster.codepath,solution,[cluster.executionpath '/' modelname],modelname);
280 else
281 fprintf(fid,'"%s/issm.exe" %s "%s" %s 2> %s.errlog >%s.outlog',...
282 cluster.codepath,solution,[cluster.executionpath '/' modelname],modelname,modelname,modelname);
283 end
284 fclose(fid);
285 end
286
287 %in interactive mode, create a run file, and errlog and outlog file
288 if cluster.interactive,
289 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
290 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
291 end
292 end
293 %}}}
294 function UploadQueueJob(cluster,modelname,dirname,filelist) % {{{
295
296 if ~ispc,
297
298 %compress the files into one zip.
299 compressstring=['tar -zcf ' dirname '.tar.gz '];
300 for i=1:numel(filelist),
301 if ~exist(filelist{i},'file')
302 error(['File ' filelist{i} ' not found']);
303 end
304 compressstring = [compressstring ' ' filelist{i}];
305 end
306 if cluster.interactive,
307 compressstring = [compressstring ' ' modelname '.errlog ' modelname '.outlog '];
308 end
309 system(compressstring);
310
311 if cluster.verbose, disp('uploading input file and queuing script'); end
312 issmscpout(cluster.name,cluster.executionpath,cluster.login,cluster.port,{[dirname '.tar.gz']});
313 end
314 end %}}}
315 function LaunchQueueJob(cluster,modelname,dirname,filelist,restart,batch) % {{{
316
317 if ~ispc,
318 %figure out what shell extension we will use:
319 if isempty(strfind(cluster.shell,'csh')),
320 shellext='sh';
321 else
322 shellext='csh';
323 end
324
325 if cluster.verbose, disp('launching solution sequence on remote cluster'); end
326
327 if ~isempty(restart)
328 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && cd ' dirname ' && source ' modelname '.queue '];
329 else
330 if ~batch,
331 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
332 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz && source ' modelname '.queue '];
333 else
334 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
335 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz '];
336 end
337 end
338 issmssh(cluster.name,cluster.login,cluster.port,launchcommand);
339 else
340 system([modelname '.bat']);
341 end
342
343 end %}}}
344 function LaunchQueueJobIceOcean(cluster,modelname,dirname,filelist,restart,batch) % {{{
345
346 if ~ispc,
347
348 %figure out what shell extension we will use:
349 if isempty(strfind(cluster.shell,'csh')),
350 shellext='sh';
351 else
352 shellext='csh';
353 end
354
355 if cluster.verbose, disp('launching solution sequence on remote cluster'); end
356
357 if ~isempty(restart)
358 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && cd ' dirname ' && source ' modelname '.queue '];
359 else
360 if ~batch,
361 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && tar -zxf ' dirname '.tar.gz && source ' modelname '.queue '];
362 else
363 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
364 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz '];
365 end
366 end
367 issmssh(cluster.name,cluster.login,cluster.port,launchcommand);
368 else
369 system([modelname '.bat']);
370 end
371
372 end %}}}
373 function Download(cluster,dirname,filelist) % {{{
374
375 if ispc(),
376 %do nothing
377 return;
378 end
379
380 %copy files from cluster to current directory
381 directory=[cluster.executionpath '/' dirname '/'];
382 issmscpin(cluster.name,cluster.login,cluster.port,directory,filelist);
383 end %}}}
384 end
385end
Note: See TracBrowser for help on using the repository browser.