1 | %DISCOVERY(Dartmouth) cluster class definition
|
---|
2 | %
|
---|
3 | % Usage:
|
---|
4 | % cluster=discovery();
|
---|
5 | % cluster=discovery('np',3);
|
---|
6 | % cluster=discovery('np',3,'login','username');
|
---|
7 |
|
---|
8 | classdef discovery
|
---|
9 | properties (SetAccess=public)
|
---|
10 | % {{{
|
---|
11 | name = 'discovery'
|
---|
12 | login = '';
|
---|
13 | numnodes = 1;
|
---|
14 | cpuspernode = 16;
|
---|
15 | codepath = '';
|
---|
16 | executionpath = '';
|
---|
17 | interactive = 0;
|
---|
18 | time = 10; %in hours
|
---|
19 | memory = 2; %in Gb
|
---|
20 | email = 'END,FAIL';
|
---|
21 | deleteckptdata= 0;
|
---|
22 | end
|
---|
23 | %}}}
|
---|
24 | methods
|
---|
25 | function cluster=discovery(varargin) % {{{
|
---|
26 |
|
---|
27 | %initialize cluster using default settings if provided
|
---|
28 | if (exist('discovery_settings')==2), discovery_settings; end
|
---|
29 |
|
---|
30 | %use provided options to change fields
|
---|
31 | cluster=AssignObjectFields(pairoptions(varargin{:}),cluster);
|
---|
32 | end
|
---|
33 | %}}}
|
---|
34 | function disp(cluster) % {{{
|
---|
35 | % display the object
|
---|
36 | disp(sprintf('class ''%s'' object ''%s'' = ',class(cluster),inputname(1)));
|
---|
37 | disp(sprintf(' name: %s',cluster.name));
|
---|
38 | disp(sprintf(' login: %s',cluster.login));
|
---|
39 | disp(sprintf(' numnodes: %i',cluster.numnodes));
|
---|
40 | disp(sprintf(' cpuspernode: %i',cluster.cpuspernode));
|
---|
41 | disp(sprintf(' time: %i hours',cluster.time));
|
---|
42 | disp(sprintf(' memory: %i Gb',cluster.memory));
|
---|
43 | disp(sprintf(' email: %s (notifications: BEGIN,END,FAIL)',cluster.email));
|
---|
44 | disp(sprintf(' deleteckptdata: %i',cluster.deleteckptdata));
|
---|
45 | disp(sprintf(' codepath: %s',cluster.codepath));
|
---|
46 | disp(sprintf(' executionpath: %s',cluster.executionpath));
|
---|
47 | disp(sprintf(' interactive: %i',cluster.interactive));
|
---|
48 | end
|
---|
49 | %}}}
|
---|
50 | function numprocs=nprocs(cluster) % {{{
|
---|
51 | %compute number of processors
|
---|
52 | numprocs=cluster.numnodes*cluster.cpuspernode;
|
---|
53 | end
|
---|
54 | %}}}
|
---|
55 | function md = checkconsistency(cluster,md,solution,analyses) % {{{
|
---|
56 | %Miscellaneous
|
---|
57 | if isempty(cluster.login), md = checkmessage(md,'login empty'); end
|
---|
58 | if isempty(cluster.codepath), md = checkmessage(md,'codepath empty'); end
|
---|
59 | if isempty(cluster.executionpath), md = checkmessage(md,'executionpath empty'); end
|
---|
60 | end
|
---|
61 | %}}}
|
---|
62 | function BuildKrigingQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{
|
---|
63 |
|
---|
64 | if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end
|
---|
65 | if(isgprof), disp('gprof not supported by cluster, ignoring...'); end
|
---|
66 |
|
---|
67 | %write queuing script
|
---|
68 | fid=fopen([modelname '.queue'],'w');
|
---|
69 | fprintf(fid,'#!/bin/bash\n');
|
---|
70 | fprintf(fid,'#SBATCH --job-name=%s\n',modelname);
|
---|
71 | fprintf(fid,'#SBATCH --account=ice\n'); %Make sure we use the ICE account for this run
|
---|
72 | fprintf(fid,'#SBATCH -o %s.outlog \n',modelname);
|
---|
73 | fprintf(fid,'#SBATCH -e %s.errlog \n',modelname);
|
---|
74 | fprintf(fid,'#SBATCH --nodes=%i\n',cluster.numnodes);
|
---|
75 | fprintf(fid,'#SBATCH --ntasks-per-node=%i\n',cluster.cpuspernode);
|
---|
76 | fprintf(fid,'#SBATCH --time=%s\n',datestr(cluster.time/24,'HH:MM:SS')); %walltime is in HH:MM:SS format. cluster.time is in hour
|
---|
77 | fprintf(fid,'#SBATCH --mem=%iG\n',cluster.memory);
|
---|
78 | if ~isempty(cluster.email)
|
---|
79 | fprintf(fid,'#SBATCH --mail-type=%s\n',cluster.email);
|
---|
80 | end
|
---|
81 | fprintf(fid,'\n');
|
---|
82 |
|
---|
83 | fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath);
|
---|
84 | fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n');
|
---|
85 | fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,dirname);
|
---|
86 | fprintf(fid,'srun %s/kriging.exe %s %s\n', cluster.codepath,[cluster.executionpath '/' modelname],modelname);
|
---|
87 | if ~io_gather, %concatenate the output files:
|
---|
88 | fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
|
---|
89 | end
|
---|
90 | fclose(fid);
|
---|
91 | end
|
---|
92 | %}}}
|
---|
93 | function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{
|
---|
94 |
|
---|
95 | if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end
|
---|
96 | if(isgprof), disp('gprof not supported by cluster, ignoring...'); end
|
---|
97 |
|
---|
98 | %write queuing script
|
---|
99 | fid=fopen([modelname '.queue'],'w');
|
---|
100 | fprintf(fid,'#!/bin/bash\n');
|
---|
101 | fprintf(fid,'#SBATCH --job-name=%s\n',modelname);
|
---|
102 | fprintf(fid,'#SBATCH --account=ice\n'); %Make sure we use the ICE account for this run
|
---|
103 | fprintf(fid,'#SBATCH -o %s.outlog \n',modelname);
|
---|
104 | fprintf(fid,'#SBATCH -e %s.errlog \n',modelname);
|
---|
105 | fprintf(fid,'#SBATCH --nodes=%i\n',cluster.numnodes);
|
---|
106 | fprintf(fid,'#SBATCH --ntasks-per-node=%i\n',cluster.cpuspernode);
|
---|
107 | fprintf(fid,'#SBATCH --time=%s\n',eraseBetween(datestr(cluster.time/24,'dd-HH:MM:SS'),1,1)); %walltime is in d-HH:MM:SS format. cluster.time is in hour
|
---|
108 | fprintf(fid,'#SBATCH --mem=%iG\n',cluster.memory);
|
---|
109 | if ~isempty(cluster.email)
|
---|
110 | fprintf(fid,'#SBATCH --mail-type=%s\n',cluster.email);
|
---|
111 | end
|
---|
112 | fprintf(fid,'\n');
|
---|
113 | fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath);
|
---|
114 | fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n');
|
---|
115 | fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,dirname);
|
---|
116 | fprintf(fid,'mpirun -n %i %s/issm.exe %s %s %s\n',cluster.nprocs(), cluster.codepath,solution,[cluster.executionpath '/' dirname],modelname);
|
---|
117 | if ~io_gather, %concatenate the output files:
|
---|
118 | fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
|
---|
119 | end
|
---|
120 |
|
---|
121 | if (cluster.deleteckptdata)
|
---|
122 | fprintf(fid,'rm -rf *.rst *.ckpt\n');
|
---|
123 | end
|
---|
124 | fclose(fid);
|
---|
125 |
|
---|
126 | %in interactive mode, create a run file, and errlog and outlog file
|
---|
127 | if cluster.interactive,
|
---|
128 | fid=fopen([modelname '.run'],'w');
|
---|
129 | fprintf(fid,'mpirun -n %i %s/issm.exe %s %s %s\n',cluster.nprocs(), cluster.codepath,solution,[cluster.executionpath '/' dirname],modelname);
|
---|
130 | if ~io_gather, %concatenate the output files:
|
---|
131 | fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
|
---|
132 | end
|
---|
133 | fclose(fid);
|
---|
134 | fid=fopen([modelname '.errlog'],'w');
|
---|
135 | fclose(fid);
|
---|
136 | fid=fopen([modelname '.outlog'],'w');
|
---|
137 | fclose(fid);
|
---|
138 | end
|
---|
139 | end %}}}
|
---|
140 | function UploadQueueJob(cluster,modelname,dirname,filelist) % {{{
|
---|
141 |
|
---|
142 | %compress the files into one zip.
|
---|
143 | compressstring=['tar -zcf ' dirname '.tar.gz '];
|
---|
144 | for i=1:numel(filelist),
|
---|
145 | compressstring = [compressstring ' ' filelist{i}];
|
---|
146 | end
|
---|
147 | if cluster.interactive,
|
---|
148 | compressstring = [compressstring ' ' modelname '.errlog ' modelname '.outlog '];
|
---|
149 | end
|
---|
150 | system(compressstring);
|
---|
151 |
|
---|
152 | disp('uploading input file and queuing script');
|
---|
153 | issmscpout(cluster.name,cluster.executionpath,cluster.login,0,{[dirname '.tar.gz']});
|
---|
154 |
|
---|
155 | end %}}}
|
---|
156 | function LaunchQueueJob(cluster,modelname,dirname,filelist,restart,batch) % {{{
|
---|
157 |
|
---|
158 | disp('launching solution sequence on remote cluster');
|
---|
159 | if ~isempty(restart)
|
---|
160 | launchcommand=['cd ' cluster.executionpath ' && cd ' dirname ' && hostname && sbatch ' modelname '.queue '];
|
---|
161 | else
|
---|
162 | launchcommand=['cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
|
---|
163 | ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz && hostname && sbatch ' modelname '.queue '];
|
---|
164 | end
|
---|
165 | issmssh(cluster.name,cluster.login,0,launchcommand);
|
---|
166 | end %}}}
|
---|
167 | function Download(cluster,dirname,filelist) % {{{
|
---|
168 |
|
---|
169 | %copy files from cluster to current directory
|
---|
170 | directory=[cluster.executionpath '/' dirname '/'];
|
---|
171 | issmscpin(cluster.name,cluster.login,0,directory,filelist);
|
---|
172 |
|
---|
173 | end %}}}
|
---|
174 | end
|
---|
175 | end
|
---|