source: issm/trunk/src/m/classes/clusters/discovery.m@ 27035

Last change on this file since 27035 was 27035, checked in by Mathieu Morlighem, 3 years ago

merged trunk-jpl and trunk for revision 27033

File size: 6.8 KB
RevLine 
[26756]1%DISCOVERY(Dartmouth) cluster class definition
2%
3% Usage:
4% cluster=discovery();
5% cluster=discovery('np',3);
6% cluster=discovery('np',3,'login','username');
7
8classdef discovery
9 properties (SetAccess=public)
10 % {{{
11 name = 'discovery'
12 login = '';
[26994]13 numnodes = 1;
14 cpuspernode = 16;
[26756]15 codepath = '';
16 executionpath = '';
17 interactive = 0;
[26994]18 time = 10; %in hours
19 memory = 2; %in Gb
20 email = 'END,FAIL';
[26756]21
22 end
23 %}}}
24 methods
25 function cluster=discovery(varargin) % {{{
26
27 %initialize cluster using default settings if provided
28 if (exist('discovery_settings')==2), discovery_settings; end
29
30 %use provided options to change fields
31 cluster=AssignObjectFields(pairoptions(varargin{:}),cluster);
32 end
33 %}}}
34 function disp(cluster) % {{{
35 % display the object
36 disp(sprintf('class ''%s'' object ''%s'' = ',class(cluster),inputname(1)));
[26995]37 disp(sprintf(' name: %s',cluster.name));
[26756]38 disp(sprintf(' login: %s',cluster.login));
[26995]39 disp(sprintf(' numnodes: %i',cluster.numnodes));
[26756]40 disp(sprintf(' cpuspernode: %i',cluster.cpuspernode));
[26995]41 disp(sprintf(' time: %i hours',cluster.time));
42 disp(sprintf(' memory: %i Gb',cluster.memory));
43 disp(sprintf(' email: %s (notifications: BEGIN,END,FAIL)',cluster.email));
44 disp(sprintf(' codepath: %s',cluster.codepath));
[26756]45 disp(sprintf(' executionpath: %s',cluster.executionpath));
46 disp(sprintf(' interactive: %i',cluster.interactive));
47 end
48 %}}}
49 function numprocs=nprocs(cluster) % {{{
50 %compute number of processors
51 numprocs=cluster.numnodes*cluster.cpuspernode;
52 end
53 %}}}
54 function md = checkconsistency(cluster,md,solution,analyses) % {{{
55 %Miscellaneous
56 if isempty(cluster.login), md = checkmessage(md,'login empty'); end
57 if isempty(cluster.codepath), md = checkmessage(md,'codepath empty'); end
58 if isempty(cluster.executionpath), md = checkmessage(md,'executionpath empty'); end
59 end
60 %}}}
61 function BuildKrigingQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{
62
63 if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end
64 if(isgprof), disp('gprof not supported by cluster, ignoring...'); end
65
66 %write queuing script
67 fid=fopen([modelname '.queue'],'w');
68 fprintf(fid,'#!/bin/bash\n');
69 fprintf(fid,'#SBATCH --job-name=%s\n',modelname);
[27008]70 fprintf(fid,'#SBATCH --account=ice\n'); %Make sure we use the ICE account for this run
[26756]71 fprintf(fid,'#SBATCH -o %s.outlog \n',modelname);
72 fprintf(fid,'#SBATCH -e %s.errlog \n',modelname);
73 fprintf(fid,'#SBATCH --nodes=%i\n',cluster.numnodes);
74 fprintf(fid,'#SBATCH --ntasks-per-node=%i\n',cluster.cpuspernode);
75 fprintf(fid,'#SBATCH --time=%s\n',datestr(cluster.time/24,'HH:MM:SS')); %walltime is in HH:MM:SS format. cluster.time is in hour
[26994]76 fprintf(fid,'#SBATCH --mem=%iG\n',cluster.memory);
77 if ~isempty(cluster.email)
78 fprintf(fid,'#SBATCH --mail-type=%s\n',cluster.email);
79 end
80 fprintf(fid,'\n');
81
[26756]82 fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath);
83 fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n');
84 fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,dirname);
85 fprintf(fid,'srun %s/kriging.exe %s %s\n', cluster.codepath,[cluster.executionpath '/' modelname],modelname);
86 if ~io_gather, %concatenate the output files:
87 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
88 end
89 fclose(fid);
90 end
91 %}}}
92 function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{
[26995]93
94 if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end
95 if(isgprof), disp('gprof not supported by cluster, ignoring...'); end
96
97 %write queuing script
98 fid=fopen([modelname '.queue'],'w');
99 fprintf(fid,'#!/bin/bash\n');
100 fprintf(fid,'#SBATCH --job-name=%s\n',modelname);
[27008]101 fprintf(fid,'#SBATCH --account=ice\n'); %Make sure we use the ICE account for this run
[26995]102 fprintf(fid,'#SBATCH -o %s.outlog \n',modelname);
103 fprintf(fid,'#SBATCH -e %s.errlog \n',modelname);
104 fprintf(fid,'#SBATCH --nodes=%i\n',cluster.numnodes);
105 fprintf(fid,'#SBATCH --ntasks-per-node=%i\n',cluster.cpuspernode);
106 fprintf(fid,'#SBATCH --time=%s\n',datestr(cluster.time/24,'HH:MM:SS')); %walltime is in HH:MM:SS format. cluster.time is in hour
107 fprintf(fid,'#SBATCH --mem=%iG\n',cluster.memory);
108 if ~isempty(cluster.email)
109 fprintf(fid,'#SBATCH --mail-type=%s\n',cluster.email);
110 end
111 fprintf(fid,'\n');
112 fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath);
113 fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n');
114 fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,dirname);
115 fprintf(fid,'mpirun -n %i %s/issm.exe %s %s %s\n',cluster.nprocs(), cluster.codepath,solution,[cluster.executionpath '/' dirname],modelname);
116 if ~io_gather, %concatenate the output files:
117 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
118 end
119 fclose(fid);
120
121 %in interactive mode, create a run file, and errlog and outlog file
122 if cluster.interactive,
123 fid=fopen([modelname '.run'],'w');
124 fprintf(fid,'mpirun -n %i %s/issm.exe %s %s %s\n',cluster.nprocs(), cluster.codepath,solution,[cluster.executionpath '/' dirname],modelname);
125 if ~io_gather, %concatenate the output files:
126 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
127 end
128 fclose(fid);
129 fid=fopen([modelname '.errlog'],'w');
130 fclose(fid);
131 fid=fopen([modelname '.outlog'],'w');
132 fclose(fid);
133 end
[26756]134 end %}}}
135 function UploadQueueJob(cluster,modelname,dirname,filelist) % {{{
136
137 %compress the files into one zip.
138 compressstring=['tar -zcf ' dirname '.tar.gz '];
139 for i=1:numel(filelist),
140 compressstring = [compressstring ' ' filelist{i}];
141 end
142 if cluster.interactive,
143 compressstring = [compressstring ' ' modelname '.errlog ' modelname '.outlog '];
144 end
145 system(compressstring);
146
147 disp('uploading input file and queueing script');
[26996]148 issmscpout(cluster.name,cluster.executionpath,cluster.login,0,{[dirname '.tar.gz']});
[26756]149
150 end %}}}
151 function LaunchQueueJob(cluster,modelname,dirname,filelist,restart,batch) % {{{
152
153 disp('launching solution sequence on remote cluster');
154 if ~isempty(restart)
155 launchcommand=['cd ' cluster.executionpath ' && cd ' dirname ' && hostname && sbatch ' modelname '.queue '];
156 else
157 launchcommand=['cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
158 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz && hostname && sbatch ' modelname '.queue '];
159 end
[26996]160 issmssh(cluster.name,cluster.login,0,launchcommand);
[26756]161 end %}}}
162 function Download(cluster,dirname,filelist) % {{{
163
164 %copy files from cluster to current directory
165 directory=[cluster.executionpath '/' dirname '/'];
[26996]166 issmscpin(cluster.name,cluster.login,0,directory,filelist);
[26756]167
168 end %}}}
169 end
170end
Note: See TracBrowser for help on using the repository browser.