[26756] | 1 | %DISCOVERY(Dartmouth) cluster class definition
|
---|
| 2 | %
|
---|
| 3 | % Usage:
|
---|
| 4 | % cluster=discovery();
|
---|
| 5 | % cluster=discovery('np',3);
|
---|
| 6 | % cluster=discovery('np',3,'login','username');
|
---|
| 7 |
|
---|
| 8 | classdef discovery
|
---|
| 9 | properties (SetAccess=public)
|
---|
| 10 | % {{{
|
---|
| 11 | name = 'discovery'
|
---|
| 12 | login = '';
|
---|
[26994] | 13 | numnodes = 1;
|
---|
| 14 | cpuspernode = 16;
|
---|
[26756] | 15 | codepath = '';
|
---|
| 16 | executionpath = '';
|
---|
| 17 | interactive = 0;
|
---|
[26994] | 18 | time = 10; %in hours
|
---|
| 19 | memory = 2; %in Gb
|
---|
| 20 | email = 'END,FAIL';
|
---|
[28013] | 21 | deleteckptdata= 0;
|
---|
[26756] | 22 | end
|
---|
| 23 | %}}}
|
---|
| 24 | methods
|
---|
| 25 | function cluster=discovery(varargin) % {{{
|
---|
| 26 |
|
---|
| 27 | %initialize cluster using default settings if provided
|
---|
| 28 | if (exist('discovery_settings')==2), discovery_settings; end
|
---|
| 29 |
|
---|
| 30 | %use provided options to change fields
|
---|
| 31 | cluster=AssignObjectFields(pairoptions(varargin{:}),cluster);
|
---|
| 32 | end
|
---|
| 33 | %}}}
|
---|
| 34 | function disp(cluster) % {{{
|
---|
| 35 | % display the object
|
---|
| 36 | disp(sprintf('class ''%s'' object ''%s'' = ',class(cluster),inputname(1)));
|
---|
[26995] | 37 | disp(sprintf(' name: %s',cluster.name));
|
---|
[26756] | 38 | disp(sprintf(' login: %s',cluster.login));
|
---|
[26995] | 39 | disp(sprintf(' numnodes: %i',cluster.numnodes));
|
---|
[26756] | 40 | disp(sprintf(' cpuspernode: %i',cluster.cpuspernode));
|
---|
[26995] | 41 | disp(sprintf(' time: %i hours',cluster.time));
|
---|
| 42 | disp(sprintf(' memory: %i Gb',cluster.memory));
|
---|
| 43 | disp(sprintf(' email: %s (notifications: BEGIN,END,FAIL)',cluster.email));
|
---|
[28013] | 44 | disp(sprintf(' deleteckptdata: %i',cluster.deleteckptdata));
|
---|
[26995] | 45 | disp(sprintf(' codepath: %s',cluster.codepath));
|
---|
[26756] | 46 | disp(sprintf(' executionpath: %s',cluster.executionpath));
|
---|
| 47 | disp(sprintf(' interactive: %i',cluster.interactive));
|
---|
| 48 | end
|
---|
| 49 | %}}}
|
---|
| 50 | function numprocs=nprocs(cluster) % {{{
|
---|
| 51 | %compute number of processors
|
---|
| 52 | numprocs=cluster.numnodes*cluster.cpuspernode;
|
---|
| 53 | end
|
---|
| 54 | %}}}
|
---|
| 55 | function md = checkconsistency(cluster,md,solution,analyses) % {{{
|
---|
| 56 | %Miscellaneous
|
---|
| 57 | if isempty(cluster.login), md = checkmessage(md,'login empty'); end
|
---|
| 58 | if isempty(cluster.codepath), md = checkmessage(md,'codepath empty'); end
|
---|
| 59 | if isempty(cluster.executionpath), md = checkmessage(md,'executionpath empty'); end
|
---|
| 60 | end
|
---|
| 61 | %}}}
|
---|
| 62 | function BuildKrigingQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{
|
---|
| 63 |
|
---|
| 64 | if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end
|
---|
| 65 | if(isgprof), disp('gprof not supported by cluster, ignoring...'); end
|
---|
| 66 |
|
---|
| 67 | %write queuing script
|
---|
| 68 | fid=fopen([modelname '.queue'],'w');
|
---|
| 69 | fprintf(fid,'#!/bin/bash\n');
|
---|
| 70 | fprintf(fid,'#SBATCH --job-name=%s\n',modelname);
|
---|
[27008] | 71 | fprintf(fid,'#SBATCH --account=ice\n'); %Make sure we use the ICE account for this run
|
---|
[26756] | 72 | fprintf(fid,'#SBATCH -o %s.outlog \n',modelname);
|
---|
| 73 | fprintf(fid,'#SBATCH -e %s.errlog \n',modelname);
|
---|
| 74 | fprintf(fid,'#SBATCH --nodes=%i\n',cluster.numnodes);
|
---|
| 75 | fprintf(fid,'#SBATCH --ntasks-per-node=%i\n',cluster.cpuspernode);
|
---|
| 76 | fprintf(fid,'#SBATCH --time=%s\n',datestr(cluster.time/24,'HH:MM:SS')); %walltime is in HH:MM:SS format. cluster.time is in hour
|
---|
[26994] | 77 | fprintf(fid,'#SBATCH --mem=%iG\n',cluster.memory);
|
---|
| 78 | if ~isempty(cluster.email)
|
---|
| 79 | fprintf(fid,'#SBATCH --mail-type=%s\n',cluster.email);
|
---|
| 80 | end
|
---|
| 81 | fprintf(fid,'\n');
|
---|
| 82 |
|
---|
[26756] | 83 | fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath);
|
---|
| 84 | fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n');
|
---|
| 85 | fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,dirname);
|
---|
| 86 | fprintf(fid,'srun %s/kriging.exe %s %s\n', cluster.codepath,[cluster.executionpath '/' modelname],modelname);
|
---|
| 87 | if ~io_gather, %concatenate the output files:
|
---|
| 88 | fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
|
---|
| 89 | end
|
---|
| 90 | fclose(fid);
|
---|
| 91 | end
|
---|
| 92 | %}}}
|
---|
| 93 | function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{
|
---|
[26995] | 94 |
|
---|
| 95 | if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end
|
---|
| 96 | if(isgprof), disp('gprof not supported by cluster, ignoring...'); end
|
---|
| 97 |
|
---|
| 98 | %write queuing script
|
---|
| 99 | fid=fopen([modelname '.queue'],'w');
|
---|
| 100 | fprintf(fid,'#!/bin/bash\n');
|
---|
| 101 | fprintf(fid,'#SBATCH --job-name=%s\n',modelname);
|
---|
[27008] | 102 | fprintf(fid,'#SBATCH --account=ice\n'); %Make sure we use the ICE account for this run
|
---|
[26995] | 103 | fprintf(fid,'#SBATCH -o %s.outlog \n',modelname);
|
---|
| 104 | fprintf(fid,'#SBATCH -e %s.errlog \n',modelname);
|
---|
| 105 | fprintf(fid,'#SBATCH --nodes=%i\n',cluster.numnodes);
|
---|
| 106 | fprintf(fid,'#SBATCH --ntasks-per-node=%i\n',cluster.cpuspernode);
|
---|
[28013] | 107 | fprintf(fid,'#SBATCH --time=%s\n',eraseBetween(datestr(cluster.time/24,'dd-HH:MM:SS'),1,1)); %walltime is in d-HH:MM:SS format. cluster.time is in hour
|
---|
[26995] | 108 | fprintf(fid,'#SBATCH --mem=%iG\n',cluster.memory);
|
---|
| 109 | if ~isempty(cluster.email)
|
---|
| 110 | fprintf(fid,'#SBATCH --mail-type=%s\n',cluster.email);
|
---|
| 111 | end
|
---|
| 112 | fprintf(fid,'\n');
|
---|
| 113 | fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath);
|
---|
| 114 | fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n');
|
---|
| 115 | fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,dirname);
|
---|
| 116 | fprintf(fid,'mpirun -n %i %s/issm.exe %s %s %s\n',cluster.nprocs(), cluster.codepath,solution,[cluster.executionpath '/' dirname],modelname);
|
---|
| 117 | if ~io_gather, %concatenate the output files:
|
---|
| 118 | fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
|
---|
| 119 | end
|
---|
[28013] | 120 |
|
---|
| 121 | if (cluster.deleteckptdata)
|
---|
| 122 | fprintf(fid,'rm -rf *.rst *.ckpt\n');
|
---|
| 123 | end
|
---|
[26995] | 124 | fclose(fid);
|
---|
| 125 |
|
---|
| 126 | %in interactive mode, create a run file, and errlog and outlog file
|
---|
| 127 | if cluster.interactive,
|
---|
| 128 | fid=fopen([modelname '.run'],'w');
|
---|
| 129 | fprintf(fid,'mpirun -n %i %s/issm.exe %s %s %s\n',cluster.nprocs(), cluster.codepath,solution,[cluster.executionpath '/' dirname],modelname);
|
---|
| 130 | if ~io_gather, %concatenate the output files:
|
---|
| 131 | fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
|
---|
| 132 | end
|
---|
| 133 | fclose(fid);
|
---|
| 134 | fid=fopen([modelname '.errlog'],'w');
|
---|
| 135 | fclose(fid);
|
---|
| 136 | fid=fopen([modelname '.outlog'],'w');
|
---|
| 137 | fclose(fid);
|
---|
| 138 | end
|
---|
[26756] | 139 | end %}}}
|
---|
| 140 | function UploadQueueJob(cluster,modelname,dirname,filelist) % {{{
|
---|
| 141 |
|
---|
| 142 | %compress the files into one zip.
|
---|
| 143 | compressstring=['tar -zcf ' dirname '.tar.gz '];
|
---|
| 144 | for i=1:numel(filelist),
|
---|
| 145 | compressstring = [compressstring ' ' filelist{i}];
|
---|
| 146 | end
|
---|
| 147 | if cluster.interactive,
|
---|
| 148 | compressstring = [compressstring ' ' modelname '.errlog ' modelname '.outlog '];
|
---|
| 149 | end
|
---|
| 150 | system(compressstring);
|
---|
| 151 |
|
---|
[27347] | 152 | disp('uploading input file and queuing script');
|
---|
[26996] | 153 | issmscpout(cluster.name,cluster.executionpath,cluster.login,0,{[dirname '.tar.gz']});
|
---|
[26756] | 154 |
|
---|
| 155 | end %}}}
|
---|
| 156 | function LaunchQueueJob(cluster,modelname,dirname,filelist,restart,batch) % {{{
|
---|
| 157 |
|
---|
| 158 | disp('launching solution sequence on remote cluster');
|
---|
| 159 | if ~isempty(restart)
|
---|
| 160 | launchcommand=['cd ' cluster.executionpath ' && cd ' dirname ' && hostname && sbatch ' modelname '.queue '];
|
---|
| 161 | else
|
---|
| 162 | launchcommand=['cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
|
---|
| 163 | ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz && hostname && sbatch ' modelname '.queue '];
|
---|
| 164 | end
|
---|
[26996] | 165 | issmssh(cluster.name,cluster.login,0,launchcommand);
|
---|
[26756] | 166 | end %}}}
|
---|
| 167 | function Download(cluster,dirname,filelist) % {{{
|
---|
| 168 |
|
---|
| 169 | %copy files from cluster to current directory
|
---|
| 170 | directory=[cluster.executionpath '/' dirname '/'];
|
---|
[26996] | 171 | issmscpin(cluster.name,cluster.login,0,directory,filelist);
|
---|
[26756] | 172 |
|
---|
| 173 | end %}}}
|
---|
| 174 | end
|
---|
| 175 | end
|
---|