source: issm/trunk/src/m/classes/clusters/discovery.m

Last change on this file was 28013, checked in by Mathieu Morlighem, 16 months ago

merged trunk-jpl and trunk for revision 28011

File size: 7.0 KB
RevLine 
[26756]1%DISCOVERY(Dartmouth) cluster class definition
2%
3% Usage:
4% cluster=discovery();
5% cluster=discovery('np',3);
6% cluster=discovery('np',3,'login','username');
7
8classdef discovery
9 properties (SetAccess=public)
10 % {{{
11 name = 'discovery'
12 login = '';
[26994]13 numnodes = 1;
14 cpuspernode = 16;
[26756]15 codepath = '';
16 executionpath = '';
17 interactive = 0;
[26994]18 time = 10; %in hours
19 memory = 2; %in Gb
20 email = 'END,FAIL';
[28013]21 deleteckptdata= 0;
[26756]22 end
23 %}}}
24 methods
25 function cluster=discovery(varargin) % {{{
26
27 %initialize cluster using default settings if provided
28 if (exist('discovery_settings')==2), discovery_settings; end
29
30 %use provided options to change fields
31 cluster=AssignObjectFields(pairoptions(varargin{:}),cluster);
32 end
33 %}}}
34 function disp(cluster) % {{{
35 % display the object
36 disp(sprintf('class ''%s'' object ''%s'' = ',class(cluster),inputname(1)));
[26995]37 disp(sprintf(' name: %s',cluster.name));
[26756]38 disp(sprintf(' login: %s',cluster.login));
[26995]39 disp(sprintf(' numnodes: %i',cluster.numnodes));
[26756]40 disp(sprintf(' cpuspernode: %i',cluster.cpuspernode));
[26995]41 disp(sprintf(' time: %i hours',cluster.time));
42 disp(sprintf(' memory: %i Gb',cluster.memory));
43 disp(sprintf(' email: %s (notifications: BEGIN,END,FAIL)',cluster.email));
[28013]44 disp(sprintf(' deleteckptdata: %i',cluster.deleteckptdata));
[26995]45 disp(sprintf(' codepath: %s',cluster.codepath));
[26756]46 disp(sprintf(' executionpath: %s',cluster.executionpath));
47 disp(sprintf(' interactive: %i',cluster.interactive));
48 end
49 %}}}
50 function numprocs=nprocs(cluster) % {{{
51 %compute number of processors
52 numprocs=cluster.numnodes*cluster.cpuspernode;
53 end
54 %}}}
55 function md = checkconsistency(cluster,md,solution,analyses) % {{{
56 %Miscellaneous
57 if isempty(cluster.login), md = checkmessage(md,'login empty'); end
58 if isempty(cluster.codepath), md = checkmessage(md,'codepath empty'); end
59 if isempty(cluster.executionpath), md = checkmessage(md,'executionpath empty'); end
60 end
61 %}}}
62 function BuildKrigingQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{
63
64 if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end
65 if(isgprof), disp('gprof not supported by cluster, ignoring...'); end
66
67 %write queuing script
68 fid=fopen([modelname '.queue'],'w');
69 fprintf(fid,'#!/bin/bash\n');
70 fprintf(fid,'#SBATCH --job-name=%s\n',modelname);
[27008]71 fprintf(fid,'#SBATCH --account=ice\n'); %Make sure we use the ICE account for this run
[26756]72 fprintf(fid,'#SBATCH -o %s.outlog \n',modelname);
73 fprintf(fid,'#SBATCH -e %s.errlog \n',modelname);
74 fprintf(fid,'#SBATCH --nodes=%i\n',cluster.numnodes);
75 fprintf(fid,'#SBATCH --ntasks-per-node=%i\n',cluster.cpuspernode);
76 fprintf(fid,'#SBATCH --time=%s\n',datestr(cluster.time/24,'HH:MM:SS')); %walltime is in HH:MM:SS format. cluster.time is in hour
[26994]77 fprintf(fid,'#SBATCH --mem=%iG\n',cluster.memory);
78 if ~isempty(cluster.email)
79 fprintf(fid,'#SBATCH --mail-type=%s\n',cluster.email);
80 end
81 fprintf(fid,'\n');
82
[26756]83 fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath);
84 fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n');
85 fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,dirname);
86 fprintf(fid,'srun %s/kriging.exe %s %s\n', cluster.codepath,[cluster.executionpath '/' modelname],modelname);
87 if ~io_gather, %concatenate the output files:
88 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
89 end
90 fclose(fid);
91 end
92 %}}}
93 function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{
[26995]94
95 if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end
96 if(isgprof), disp('gprof not supported by cluster, ignoring...'); end
97
98 %write queuing script
99 fid=fopen([modelname '.queue'],'w');
100 fprintf(fid,'#!/bin/bash\n');
101 fprintf(fid,'#SBATCH --job-name=%s\n',modelname);
[27008]102 fprintf(fid,'#SBATCH --account=ice\n'); %Make sure we use the ICE account for this run
[26995]103 fprintf(fid,'#SBATCH -o %s.outlog \n',modelname);
104 fprintf(fid,'#SBATCH -e %s.errlog \n',modelname);
105 fprintf(fid,'#SBATCH --nodes=%i\n',cluster.numnodes);
106 fprintf(fid,'#SBATCH --ntasks-per-node=%i\n',cluster.cpuspernode);
[28013]107 fprintf(fid,'#SBATCH --time=%s\n',eraseBetween(datestr(cluster.time/24,'dd-HH:MM:SS'),1,1)); %walltime is in d-HH:MM:SS format. cluster.time is in hour
[26995]108 fprintf(fid,'#SBATCH --mem=%iG\n',cluster.memory);
109 if ~isempty(cluster.email)
110 fprintf(fid,'#SBATCH --mail-type=%s\n',cluster.email);
111 end
112 fprintf(fid,'\n');
113 fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath);
114 fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n');
115 fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,dirname);
116 fprintf(fid,'mpirun -n %i %s/issm.exe %s %s %s\n',cluster.nprocs(), cluster.codepath,solution,[cluster.executionpath '/' dirname],modelname);
117 if ~io_gather, %concatenate the output files:
118 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
119 end
[28013]120
121 if (cluster.deleteckptdata)
122 fprintf(fid,'rm -rf *.rst *.ckpt\n');
123 end
[26995]124 fclose(fid);
125
126 %in interactive mode, create a run file, and errlog and outlog file
127 if cluster.interactive,
128 fid=fopen([modelname '.run'],'w');
129 fprintf(fid,'mpirun -n %i %s/issm.exe %s %s %s\n',cluster.nprocs(), cluster.codepath,solution,[cluster.executionpath '/' dirname],modelname);
130 if ~io_gather, %concatenate the output files:
131 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
132 end
133 fclose(fid);
134 fid=fopen([modelname '.errlog'],'w');
135 fclose(fid);
136 fid=fopen([modelname '.outlog'],'w');
137 fclose(fid);
138 end
[26756]139 end %}}}
140 function UploadQueueJob(cluster,modelname,dirname,filelist) % {{{
141
142 %compress the files into one zip.
143 compressstring=['tar -zcf ' dirname '.tar.gz '];
144 for i=1:numel(filelist),
145 compressstring = [compressstring ' ' filelist{i}];
146 end
147 if cluster.interactive,
148 compressstring = [compressstring ' ' modelname '.errlog ' modelname '.outlog '];
149 end
150 system(compressstring);
151
[27347]152 disp('uploading input file and queuing script');
[26996]153 issmscpout(cluster.name,cluster.executionpath,cluster.login,0,{[dirname '.tar.gz']});
[26756]154
155 end %}}}
156 function LaunchQueueJob(cluster,modelname,dirname,filelist,restart,batch) % {{{
157
158 disp('launching solution sequence on remote cluster');
159 if ~isempty(restart)
160 launchcommand=['cd ' cluster.executionpath ' && cd ' dirname ' && hostname && sbatch ' modelname '.queue '];
161 else
162 launchcommand=['cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
163 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz && hostname && sbatch ' modelname '.queue '];
164 end
[26996]165 issmssh(cluster.name,cluster.login,0,launchcommand);
[26756]166 end %}}}
167 function Download(cluster,dirname,filelist) % {{{
168
169 %copy files from cluster to current directory
170 directory=[cluster.executionpath '/' dirname '/'];
[26996]171 issmscpin(cluster.name,cluster.login,0,directory,filelist);
[26756]172
173 end %}}}
174 end
175end
Note: See TracBrowser for help on using the repository browser.