source: issm/trunk/src/m/classes/clusters/generic.m@ 22822

Last change on this file since 22822 was 22822, checked in by Mathieu Morlighem, 7 years ago

merged trunk-jpl and trunk for revision 22820

File size: 14.1 KB
Line 
1%GENERIC cluster class definition
2%
3% Usage:
4% cluster=generic('name','astrid','np',3);
5% cluster=generic('name',oshostname(),'np',3,'login','username');
6
7classdef generic
8 properties (SetAccess=public)
9 % {{{
10 name = '';
11 login = '';
12 np = 1;
13 npocean = 1;
14 port = 0;
15 interactive = 1;
16 codepath = [IssmConfig('ISSM_PREFIX') '/bin'];
17 etcpath = [issmdir() '/etc'];
18 executionpath = [issmdir() '/execution'];
19 valgrind = [issmdir() '/externalpackages/valgrind/install/bin/valgrind'];
20 valgrindlib = [issmdir() '/externalpackages/valgrind/install/lib/libmpidebug.so'];
21 valgrindsup = [issmdir() '/externalpackages/valgrind/issm.supp'];
22 verbose = 1;
23 shell = '/bin/sh';
24 %}}}
25 end
26 methods
27 function cluster=generic(varargin) % {{{
28
29 %Change the defaults if ispc
30 if ispc,
31 cluster.codepath = [issmdir() '\bin'];
32 cluster.etcpath = [issmdir() '\etc'];
33 cluster.executionpath = [issmdir() '\execution'];
34 end
35
36 %use provided options to change fields
37 options=pairoptions(varargin{:});
38
39 %get name
40 cluster.name=getfieldvalue(options,'name',oshostname());
41
42 %initialize cluster using user settings if provided
43 if (exist([cluster.name '_settings'])==2), eval([cluster.name '_settings']); end
44
45 %OK get other fields
46 cluster=AssignObjectFields(pairoptions(varargin{:}),cluster);
47 end
48 %}}}
49 function disp(cluster) % {{{
50 % display the object
51 disp(sprintf('class ''%s'' object ''%s'' = ',class(cluster),inputname(1)));
52 disp(sprintf(' name: %s',cluster.name));
53 disp(sprintf(' login: %s',cluster.login));
54 disp(sprintf(' np: %i',cluster.np));
55 disp(sprintf(' npocean: %i',cluster.npocean));
56 disp(sprintf(' port: %i',cluster.port));
57 disp(sprintf(' codepath: %s',cluster.codepath));
58 disp(sprintf(' executionpath: %s',cluster.executionpath));
59 disp(sprintf(' etcpath: %s',cluster.etcpath));
60 disp(sprintf(' valgrind: %s',cluster.valgrind));
61 disp(sprintf(' valgrindlib: %s',cluster.valgrindlib));
62 disp(sprintf(' valgrindsup: %s',cluster.valgrindsup));
63 disp(sprintf(' verbose: %s',cluster.verbose));
64 disp(sprintf(' shell: %s',cluster.shell));
65 end
66 %}}}
67 function md = checkconsistency(cluster,md,solution,analyses) % {{{
68 if cluster.np<1
69 md = checkmessage(md,['number of processors should be at least 1']);
70 end
71 if isnan(cluster.np),
72 md = checkmessage(md,'number of processors should not be NaN!');
73 end
74 end
75 %}}}
76 function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{
77
78 %write queuing script
79 %what is the executable being called?
80 executable='issm.exe';
81 if isdakota,
82 version=IssmConfig('_DAKOTA_VERSION_'); version=str2num(version(1:3));
83 if (version>=6),
84 executable='issm_dakota.exe';
85 end
86 end
87 if isoceancoupling,
88 executable='issm_ocean.exe';
89 end
90
91 if ~ispc(),
92
93 fid=fopen([modelname '.queue'],'w');
94 fprintf(fid,'#!%s\n',cluster.shell);
95 if ~isvalgrind,
96 if cluster.interactive
97 if IssmConfig('_HAVE_MPI_'),
98 fprintf(fid,'mpiexec -np %i %s/%s %s %s %s \n',cluster.np,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname);
99 else
100 fprintf(fid,'%s/%s %s %s %s ',cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname);
101 end
102 else
103 if IssmConfig('_HAVE_MPI_'),
104 fprintf(fid,'mpiexec -np %i %s/%s %s %s %s 2> %s.errlog >%s.outlog &',cluster.np,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname,modelname,modelname);
105 else
106 fprintf(fid,'%s/%s %s %s %s 2> %s.errlog >%s.outlog &',cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname,modelname,modelname);
107 end
108 end
109 elseif isgprof,
110 fprintf(fid,'\n gprof %s/issm.exe gmon.out > %s.performance',cluster.codepath,modelname);
111 else
112 %Add --gen-suppressions=all to get suppression lines
113 fprintf(fid,'LD_PRELOAD=%s \\\n',cluster.valgrindlib);
114 if ismac,
115 if IssmConfig('_HAVE_MPI_'),
116 fprintf(fid,'mpiexec -np %i %s --leak-check=full --error-limit=no --dsymutil=yes --suppressions=%s %s/%s %s %s %s 2> %s.errlog >%s.outlog ',...
117 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname], modelname,modelname,modelname);
118 else
119 fprintf(fid,'%s --leak-check=full --dsymutil=yes --error-limit=no --suppressions=%s %s/%s %s %s %s 2> %s.errlog >%s.outlog ',...
120 cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname], modelname,modelname,modelname);
121 end
122 else
123 if IssmConfig('_HAVE_MPI_'),
124 fprintf(fid,'mpiexec -np %i %s --leak-check=full --error-limit=no --suppressions=%s %s/%s %s %s %s 2> %s.errlog >%s.outlog ',...
125 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname,modelname,modelname);
126 else
127 fprintf(fid,'%s --leak-check=full --error-limit=no --suppressions=%s %s/%s %s %s %s 2> %s.errlog >%s.outlog ',...
128 cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname,modelname,modelname);
129 end
130 end
131 end
132 if ~io_gather, %concatenate the output files:
133 fprintf(fid,'\ncat %s.outbin.* > %s.outbin',modelname,modelname);
134 end
135 fclose(fid);
136
137 else % Windows
138
139 fid=fopen([modelname '.bat'],'w');
140 fprintf(fid,'@echo off\n');
141
142 if IssmConfig('_HAVE_PETSC_MPI_'),
143 warning('parallel runs not allowed yet in Windows. Defaulting to 1 cpus');
144 cluster.np=1;
145 end
146
147 if cluster.np>1,
148 fprintf(fid,'"C:\\Program Files\\MPICH2\\bin\\mpiexec.exe" -n %i "%s/%s" %s ./ %s ',cluster.np,cluster.codepath,executable,solution,modelname);
149 else
150 fprintf(fid,'"%s/%s" %s ./ %s ',cluster.codepath,executable,solution,modelname);
151 end
152 fclose(fid);
153 end
154
155 %in interactive mode, create a run file, and errlog and outlog file
156 if cluster.interactive,
157 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
158 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
159 end
160 end
161 %}}}
162 function BuildQueueScriptMultipleModels(cluster,dirname,modelname,solution,dirnames,modelnames,nps) % {{{
163
164 %some checks:
165 if isempty(modelname), error('BuildQueueScriptMultipleModels error message: need a non empty model name!');end
166
167 %what is the executable being called?
168 executable='issm_slr.exe';
169
170 if ispc(), error('BuildQueueScriptMultipleModels not support yet on windows machines');end;
171
172 %write queuing script
173 fid=fopen([modelname '.queue'],'w');
174
175 fprintf(fid,'#!%s\n',cluster.shell);
176
177 %number of cpus:
178 mpistring=sprintf('mpiexec -np %i ',cluster.np);
179
180 %executable:
181 mpistring=[mpistring sprintf('%s/%s ',cluster.codepath,executable)];
182
183 %solution name:
184 mpistring=[mpistring sprintf('%s ',solution)];
185
186 %execution directory and model name:
187 mpistring=[mpistring sprintf('%s/%s %s',cluster.executionpath,dirname,modelname)];
188
189 %inform main executable of how many icecaps, glaciers and earth models are being run:
190 mpistring=[mpistring sprintf(' %i ',length(dirnames))];
191
192 %icecaps, glaciers and earth location, names and number of processors associated:
193 for i=1:length(dirnames),
194 mpistring=[mpistring sprintf(' %s/%s %s %i ',cluster.executionpath,dirnames{i},modelnames{i},nps{i})];
195 end
196
197 %log files:
198 if ~cluster.interactive,
199 mpistring=[mpistring sprintf('2> %s.errlog> %s.outlog',modelname,modelname)];
200 end
201
202 %write this long string to disk:
203 fprintf(fid,mpistring);
204 fclose(fid);
205
206 %in interactive mode, create a run file, and errlog and outlog file
207 if cluster.interactive,
208 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
209 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
210 end
211 end
212 %}}}
213 function BuildQueueScriptIceOcean(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota) % {{{
214
215 %write queuing script
216 %what is the executable being called?
217 executable='issm_ocean.exe';
218
219 fid=fopen([modelname '.queue'],'w');
220 fprintf(fid,'#!%s\n',cluster.shell);
221 if ~isvalgrind,
222 fprintf(fid,'mpiexec -np %i %s/%s %s %s %s : -np %i ./mitgcmuv\n',cluster.np,cluster.codepath,executable,solution,cluster.executionpath,modelname,cluster.npocean);
223
224 else
225 fprintf(fid,'mpiexec -np %i %s --leak-check=full --error-limit=no --dsymutil=yes --suppressions=%s %s/%s %s %s %s : -np %i ./mitgcmuv\n',...
226 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,executable,solution,cluster.executionpath,modelname,cluster.npocean);
227 end
228 fclose(fid);
229
230 %in interactive mode, create a run file, and errlog and outlog file
231 if cluster.interactive,
232 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
233 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
234 end
235 end
236 %}}}
237 function BuildKrigingQueueScript(cluster,modelname,solution,io_gather,isvalgrind,isgprof) % {{{
238
239 %write queuing script
240 if ~ispc(),
241
242 fid=fopen([modelname '.queue'],'w');
243 fprintf(fid,'#!/bin/sh\n');
244 if ~isvalgrind,
245 if cluster.interactive
246 fprintf(fid,'mpiexec -np %i %s/kriging.exe %s %s ',cluster.np,cluster.codepath,[cluster.executionpath '/' modelname],modelname);
247 else
248 fprintf(fid,'mpiexec -np %i %s/kriging.exe %s %s 2> %s.errlog >%s.outlog ',cluster.np,cluster.codepath,[cluster.executionpath '/' modelname],modelname,modelname,modelname);
249 end
250 elseif isgprof,
251 fprintf(fid,'\n gprof %s/kriging.exe gmon.out > %s.performance',cluster.codepath,modelname);
252 else
253 %Add --gen-suppressions=all to get suppression lines
254 fprintf(fid,'LD_PRELOAD=%s \\\n',cluster.valgrindlib);
255 fprintf(fid,'mpiexec -np %i %s --leak-check=full --suppressions=%s %s/kriging.exe %s %s 2> %s.errlog >%s.outlog ',...
256 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,[cluster.executionpath '/' modelname],modelname,modelname,modelname);
257 end
258 if ~io_gather, %concatenate the output files:
259 fprintf(fid,'\ncat %s.outbin.* > %s.outbin',modelname,modelname);
260 end
261 fclose(fid);
262
263 else % Windows
264
265 fid=fopen([modelname '.bat'],'w');
266 fprintf(fid,'@echo off\n');
267 if cluster.interactive
268 fprintf(fid,'"%s/issm.exe" %s "%s" %s ',cluster.codepath,solution,[cluster.executionpath '/' modelname],modelname);
269 else
270 fprintf(fid,'"%s/issm.exe" %s "%s" %s 2> %s.errlog >%s.outlog',...
271 cluster.codepath,solution,[cluster.executionpath '/' modelname],modelname,modelname,modelname);
272 end
273 fclose(fid);
274 end
275
276 %in interactive mode, create a run file, and errlog and outlog file
277 if cluster.interactive,
278 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
279 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
280 end
281 end
282 %}}}
283 function UploadQueueJob(cluster,modelname,dirname,filelist)% {{{
284
285 if ~ispc,
286
287 %compress the files into one zip.
288 compressstring=['tar -zcf ' dirname '.tar.gz '];
289 for i=1:numel(filelist),
290 if ~exist(filelist{i},'file')
291 error(['File ' filelist{i} ' not found']);
292 end
293 compressstring = [compressstring ' ' filelist{i}];
294 end
295 if cluster.interactive,
296 compressstring = [compressstring ' ' modelname '.errlog ' modelname '.outlog '];
297 end
298 system(compressstring);
299
300 if cluster.verbose, disp('uploading input file and queueing script'); end
301 issmscpout(cluster.name,cluster.executionpath,cluster.login,cluster.port,{[dirname '.tar.gz']});
302 end
303 end %}}}
304 function LaunchQueueJob(cluster,modelname,dirname,filelist,restart,batch)% {{{
305
306 if ~ispc,
307
308 %figure out what shell extension we will use:
309 if isempty(strfind(cluster.shell,'csh')),
310 shellext='sh';
311 else
312 shellext='csh';
313 end
314
315 if cluster.verbose, disp('launching solution sequence on remote cluster'); end
316
317 if ~isempty(restart)
318 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && cd ' dirname ' && source ' modelname '.queue '];
319 else
320 if ~batch,
321 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
322 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz && source ' modelname '.queue '];
323 else
324 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
325 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz '];
326 end
327 end
328 issmssh(cluster.name,cluster.login,cluster.port,launchcommand);
329 else
330 system([modelname '.bat']);
331 end
332
333 end %}}}
334 function LaunchQueueJobIceOcean(cluster,modelname,dirname,filelist,restart,batch)% {{{
335
336 if ~ispc,
337
338 %figure out what shell extension we will use:
339 if isempty(strfind(cluster.shell,'csh')),
340 shellext='sh';
341 else
342 shellext='csh';
343 end
344
345 if cluster.verbose, disp('launching solution sequence on remote cluster'); end
346
347 if ~isempty(restart)
348 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && cd ' dirname ' && source ' modelname '.queue '];
349 else
350 if ~batch,
351 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && tar -zxf ' dirname '.tar.gz && source ' modelname '.queue '];
352 else
353 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
354 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz '];
355 end
356 end
357 issmssh(cluster.name,cluster.login,cluster.port,launchcommand);
358 else
359 system([modelname '.bat']);
360 end
361
362 end %}}}
363 function Download(cluster,dirname,filelist)% {{{
364
365 if ispc(),
366 %do nothing
367 return;
368 end
369
370 %copy files from cluster to current directory
371 directory=[cluster.executionpath '/' dirname '/'];
372 issmscpin(cluster.name,cluster.login,cluster.port,directory,filelist);
373 end %}}}
374 end
375end
Note: See TracBrowser for help on using the repository browser.