source: issm/trunk/src/m/classes/clusters/generic.m@ 20500

Last change on this file since 20500 was 20500, checked in by Mathieu Morlighem, 9 years ago

merged trunk-jpl and trunk for revision 20497

File size: 11.8 KB
Line 
1%GENERIC cluster class definition
2%
3% Usage:
4% cluster=generic('name','astrid','np',3);
5% cluster=generic('name',oshostname(),'np',3,'login','username');
6
7classdef generic
8 properties (SetAccess=public)
9 % {{{
10 name='';
11 login='';
12 np=1;
13 port=0;
14 interactive=1;
15 codepath=[issmdir() '/bin'];
16 etcpath=[issmdir() '/etc'];
17 executionpath=[issmdir() '/execution'];
18 valgrind=[issmdir() '/externalpackages/valgrind/install/bin/valgrind'];
19 valgrindlib=[issmdir() '/externalpackages/valgrind/install/lib/libmpidebug.so'];
20 valgrindsup=[issmdir() '/externalpackages/valgrind/issm.supp'];
21 verbose=1;
22 shell='/bin/sh';
23 %}}}
24 end
25 methods
26 function cluster=generic(varargin) % {{{
27
28 %Change the defaults if ispc
29 if ispc,
30 cluster.codepath = [issmdir() '\bin'];
31 cluster.etcpath = [issmdir() '\etc'];
32 cluster.executionpath = [issmdir() '\execution'];
33 end
34
35 %use provided options to change fields
36 options=pairoptions(varargin{:});
37
38 %get name
39 cluster.name=getfieldvalue(options,'name',oshostname());
40
41 %initialize cluster using user settings if provided
42 if (exist([cluster.name '_settings'])==2), eval([cluster.name '_settings']); end
43
44 %OK get other fields
45 cluster=AssignObjectFields(pairoptions(varargin{:}),cluster);
46 end
47 %}}}
48 function disp(cluster) % {{{
49 % display the object
50 disp(sprintf('class ''%s'' object ''%s'' = ',class(cluster),inputname(1)));
51 disp(sprintf(' name: %s',cluster.name));
52 disp(sprintf(' login: %s',cluster.login));
53 disp(sprintf(' np: %i',cluster.np));
54 disp(sprintf(' port: %i',cluster.port));
55 disp(sprintf(' codepath: %s',cluster.codepath));
56 disp(sprintf(' executionpath: %s',cluster.executionpath));
57 disp(sprintf(' etcpath: %s',cluster.etcpath));
58 disp(sprintf(' valgrind: %s',cluster.valgrind));
59 disp(sprintf(' valgrindlib: %s',cluster.valgrindlib));
60 disp(sprintf(' valgrindsup: %s',cluster.valgrindsup));
61 disp(sprintf(' verbose: %s',cluster.verbose));
62 disp(sprintf(' shell: %s',cluster.shell));
63 end
64 %}}}
65 function md = checkconsistency(cluster,md,solution,analyses) % {{{
66 if cluster.np<1
67 md = checkmessage(md,['number of processors should be at least 1']);
68 end
69 if isnan(cluster.np),
70 md = checkmessage(md,'number of processors should not be NaN!');
71 end
72 end
73 %}}}
74 function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota) % {{{
75
76 %write queuing script
77 %what is the executable being called?
78 executable='issm.exe';
79 if isdakota,
80 version=IssmConfig('_DAKOTA_VERSION_'); version=str2num(version(1:3));
81 if (version>=6),
82 executable='issm_dakota.exe';
83 end
84 end
85
86 if ~ispc(),
87
88 fid=fopen([modelname '.queue'],'w');
89 fprintf(fid,'#!%s\n',cluster.shell);
90 if ~isvalgrind,
91 if cluster.interactive
92 if IssmConfig('_HAVE_MPI_'),
93 fprintf(fid,'mpiexec -np %i %s/%s %s %s %s \n',cluster.np,cluster.codepath,executable,EnumToString(solution),[cluster.executionpath '/' dirname],modelname);
94 else
95 fprintf(fid,'%s/%s %s %s %s ',cluster.codepath,executable,EnumToString(solution),[cluster.executionpath '/' dirname],modelname);
96 end
97 else
98 if IssmConfig('_HAVE_MPI_'),
99 fprintf(fid,'mpiexec -np %i %s/%s %s %s %s 2> %s.errlog >%s.outlog &',cluster.np,cluster.codepath,executable,EnumToString(solution),[cluster.executionpath '/' dirname],modelname,modelname,modelname);
100 else
101 fprintf(fid,'%s/%s %s %s %s 2> %s.errlog >%s.outlog &',cluster.codepath,executable,EnumToString(solution),[cluster.executionpath '/' dirname],modelname,modelname,modelname);
102 end
103 end
104 elseif isgprof,
105 fprintf(fid,'\n gprof %s/issm.exe gmon.out > %s.performance',cluster.codepath,modelname);
106 else
107 %Add --gen-suppressions=all to get suppression lines
108 fprintf(fid,'LD_PRELOAD=%s \\\n',cluster.valgrindlib);
109 if ismac,
110 if IssmConfig('_HAVE_MPI_'),
111 fprintf(fid,'mpiexec -np %i %s --leak-check=full --error-limit=no --dsymutil=yes --suppressions=%s %s/issm.exe %s %s %s 2> %s.errlog >%s.outlog ',...
112 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,EnumToString(solution),[cluster.executionpath '/' dirname], modelname,modelname,modelname);
113 else
114 fprintf(fid,'%s --leak-check=full --dsymutil=yes --error-limit=no --suppressions=%s %s/issm.exe %s %s %s 2> %s.errlog >%s.outlog ',...
115 cluster.valgrind,cluster.valgrindsup,cluster.codepath,EnumToString(solution),[cluster.executionpath '/' dirname], modelname,modelname,modelname);
116 end
117 else
118 if IssmConfig('_HAVE_MPI_'),
119 fprintf(fid,'mpiexec -np %i %s --leak-check=full --error-limit=no --suppressions=%s %s/issm.exe %s %s %s 2> %s.errlog >%s.outlog ',...
120 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,EnumToString(solution),[cluster.executionpath '/' dirname],modelname,modelname,modelname);
121 else
122 fprintf(fid,'%s --leak-check=full --error-limit=no --suppressions=%s %s/issm.exe %s %s %s 2> %s.errlog >%s.outlog ',...
123 cluster.valgrind,cluster.valgrindsup,cluster.codepath,EnumToString(solution),[cluster.executionpath '/' dirname],modelname,modelname,modelname);
124 end
125 end
126 end
127 if ~io_gather, %concatenate the output files:
128 fprintf(fid,'\ncat %s.outbin.* > %s.outbin',modelname,modelname);
129 end
130 fclose(fid);
131
132 else % Windows
133
134 fid=fopen([modelname '.bat'],'w');
135 fprintf(fid,'@echo off\n');
136
137 if IssmConfig('_HAVE_PETSC_MPI_'),
138 warning('parallel runs not allowed yet in Windows. Defaulting to 1 cpus');
139 cluster.np=1;
140 end
141
142 if cluster.np>1,
143 fprintf(fid,'"C:\\Program Files\\MPICH2\\bin\\mpiexec.exe" -n %i "%s/issm.exe" %s ./ %s ',cluster.np,cluster.codepath,EnumToString(solution),modelname);
144 else
145 fprintf(fid,'"%s/issm.exe" %s ./ %s ',cluster.codepath,EnumToString(solution),modelname);
146 end
147 fclose(fid);
148 end
149
150 %in interactive mode, create a run file, and errlog and outlog file
151 if cluster.interactive,
152 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
153 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
154 end
155 end
156 %}}}
157 function BuildQueueScriptMultipleModels(cluster,dirname,modelname,solution,dirnames,modelnames,nps) % {{{
158
159 %some checks:
160 if isempty(modelname), error('BuildQueueScriptMultipleModels error message: need a non empty model name!');end
161
162 %what is the executable being called?
163 executable='issm_slr.exe';
164
165 if ispc(), error('BuildQueueScriptMultipleModels not support yet on windows machines');end;
166
167 %write queuing script
168 fid=fopen([modelname '.queue'],'w');
169
170 fprintf(fid,'#!%s\n',cluster.shell);
171
172 %number of cpus:
173 mpistring=sprintf('mpiexec -np %i ',cluster.np);
174
175 %executable:
176 mpistring=[mpistring sprintf('%s/%s ',cluster.codepath,executable)];
177
178 %solution name:
179 mpistring=[mpistring sprintf('%s ',EnumToString(solution))];
180
181 %execution directory and model name:
182 mpistring=[mpistring sprintf('%s/%s %s',cluster.executionpath,dirname,modelname)];
183
184 %inform main executable of how many icecaps, glaciers and earth models are being run:
185 mpistring=[mpistring sprintf(' %i ',length(dirnames))];
186
187 %icecaps, glaciers and earth location, names and number of processors associated:
188 for i=1:length(dirnames),
189 mpistring=[mpistring sprintf(' %s/%s %s %i ',cluster.executionpath,dirnames{i},modelnames{i},nps{i})];
190 end
191
192 %log files:
193 if ~cluster.interactive,
194 mpistring=[mpistring sprintf('2> %s.errlog> %s.outlog',modelname,modelname)];
195 end
196
197 %write this long string to disk:
198 fprintf(fid,mpistring);
199 fclose(fid);
200
201 %in interactive mode, create a run file, and errlog and outlog file
202 if cluster.interactive,
203 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
204 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
205 end
206 end
207 %}}}
208 function BuildKrigingQueueScript(cluster,modelname,solution,io_gather,isvalgrind,isgprof) % {{{
209
210 %write queuing script
211 if ~ispc(),
212
213 fid=fopen([modelname '.queue'],'w');
214 fprintf(fid,'#!/bin/sh\n');
215 if ~isvalgrind,
216 if cluster.interactive
217 fprintf(fid,'mpiexec -np %i %s/kriging.exe %s %s ',cluster.np,cluster.codepath,[cluster.executionpath '/' modelname],modelname);
218 else
219 fprintf(fid,'mpiexec -np %i %s/kriging.exe %s %s 2> %s.errlog >%s.outlog ',cluster.np,cluster.codepath,[cluster.executionpath '/' modelname],modelname,modelname,modelname);
220 end
221 elseif isgprof,
222 fprintf(fid,'\n gprof %s/kriging.exe gmon.out > %s.performance',cluster.codepath,modelname);
223 else
224 %Add --gen-suppressions=all to get suppression lines
225 fprintf(fid,'LD_PRELOAD=%s \\\n',cluster.valgrindlib);
226 fprintf(fid,'mpiexec -np %i %s --leak-check=full --suppressions=%s %s/kriging.exe %s %s 2> %s.errlog >%s.outlog ',...
227 cluster.np,cluster.valgrind,cluster.valgrindsup,cluster.codepath,[cluster.executionpath '/' modelname],modelname,modelname,modelname);
228 end
229 if ~io_gather, %concatenate the output files:
230 fprintf(fid,'\ncat %s.outbin.* > %s.outbin',modelname,modelname);
231 end
232 fclose(fid);
233
234 else % Windows
235
236 fid=fopen([modelname '.bat'],'w');
237 fprintf(fid,'@echo off\n');
238 if cluster.interactive
239 fprintf(fid,'"%s/issm.exe" %s "%s" %s ',cluster.codepath,EnumToString(solution),[cluster.executionpath '/' modelname],modelname);
240 else
241 fprintf(fid,'"%s/issm.exe" %s "%s" %s 2> %s.errlog >%s.outlog',...
242 cluster.codepath,EnumToString(solution),[cluster.executionpath '/' modelname],modelname,modelname,modelname);
243 end
244 fclose(fid);
245 end
246
247 %in interactive mode, create a run file, and errlog and outlog file
248 if cluster.interactive,
249 fid=fopen([modelname '.errlog'],'w'); fclose(fid);
250 fid=fopen([modelname '.outlog'],'w'); fclose(fid);
251 end
252 end
253 %}}}
254 function UploadQueueJob(cluster,modelname,dirname,filelist)% {{{
255
256 if ~ispc,
257
258 %compress the files into one zip.
259 compressstring=['tar -zcf ' dirname '.tar.gz '];
260 for i=1:numel(filelist),
261 compressstring = [compressstring ' ' filelist{i}];
262 end
263 if cluster.interactive,
264 compressstring = [compressstring ' ' modelname '.errlog ' modelname '.outlog '];
265 end
266 system(compressstring);
267
268 if cluster.verbose, disp('uploading input file and queueing script'); end
269 issmscpout(cluster.name,cluster.executionpath,cluster.login,cluster.port,{[dirname '.tar.gz']});
270 end
271 end %}}}
272 function LaunchQueueJob(cluster,modelname,dirname,filelist,restart,batch)% {{{
273
274 if ~ispc,
275
276 %figure out what shell extension we will use:
277 if isempty(strfind(cluster.shell,'csh')),
278 shellext='sh';
279 else
280 shellext='csh';
281 end
282
283 if cluster.verbose, disp('launching solution sequence on remote cluster'); end
284
285 if ~isempty(restart)
286 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && cd ' dirname ' && source ' modelname '.queue '];
287 else
288 if ~batch,
289 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
290 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz && source ' modelname '.queue '];
291 else
292 launchcommand=['source ' cluster.etcpath '/environment.' shellext ' && cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
293 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz '];
294 end
295 end
296 issmssh(cluster.name,cluster.login,cluster.port,launchcommand);
297 else
298 system([modelname '.bat']);
299 end
300
301 end %}}}
302 function Download(cluster,dirname,filelist)% {{{
303
304 if ispc(),
305 %do nothing
306 return;
307 end
308
309 %copy files from cluster to current directory
310 directory=[cluster.executionpath '/' dirname '/'];
311 issmscpin(cluster.name,cluster.login,cluster.port,directory,filelist);
312 end %}}}
313 end
314end
Note: See TracBrowser for help on using the repository browser.