[26756] | 1 | %DISCOVERY(Dartmouth) cluster class definition
|
---|
| 2 | %
|
---|
| 3 | % Usage:
|
---|
| 4 | % cluster=discovery();
|
---|
| 5 | % cluster=discovery('np',3);
|
---|
| 6 | % cluster=discovery('np',3,'login','username');
|
---|
| 7 |
|
---|
| 8 | classdef discovery
|
---|
| 9 | properties (SetAccess=public)
|
---|
| 10 | % {{{
|
---|
| 11 | name = 'discovery'
|
---|
| 12 | login = '';
|
---|
[26994] | 13 | numnodes = 1;
|
---|
| 14 | cpuspernode = 16;
|
---|
[26756] | 15 | codepath = '';
|
---|
| 16 | executionpath = '';
|
---|
| 17 | interactive = 0;
|
---|
[26994] | 18 | time = 10; %in hours
|
---|
| 19 | memory = 2; %in Gb
|
---|
| 20 | email = 'END,FAIL';
|
---|
[26756] | 21 |
|
---|
| 22 | end
|
---|
| 23 | %}}}
|
---|
| 24 | methods
|
---|
| 25 | function cluster=discovery(varargin) % {{{
|
---|
| 26 |
|
---|
| 27 | %initialize cluster using default settings if provided
|
---|
| 28 | if (exist('discovery_settings')==2), discovery_settings; end
|
---|
| 29 |
|
---|
| 30 | %use provided options to change fields
|
---|
| 31 | cluster=AssignObjectFields(pairoptions(varargin{:}),cluster);
|
---|
| 32 | end
|
---|
| 33 | %}}}
|
---|
| 34 | function disp(cluster) % {{{
|
---|
| 35 | % display the object
|
---|
| 36 | disp(sprintf('class ''%s'' object ''%s'' = ',class(cluster),inputname(1)));
|
---|
[26995] | 37 | disp(sprintf(' name: %s',cluster.name));
|
---|
[26756] | 38 | disp(sprintf(' login: %s',cluster.login));
|
---|
[26995] | 39 | disp(sprintf(' numnodes: %i',cluster.numnodes));
|
---|
[26756] | 40 | disp(sprintf(' cpuspernode: %i',cluster.cpuspernode));
|
---|
[26995] | 41 | disp(sprintf(' time: %i hours',cluster.time));
|
---|
| 42 | disp(sprintf(' memory: %i Gb',cluster.memory));
|
---|
| 43 | disp(sprintf(' email: %s (notifications: BEGIN,END,FAIL)',cluster.email));
|
---|
| 44 | disp(sprintf(' codepath: %s',cluster.codepath));
|
---|
[26756] | 45 | disp(sprintf(' executionpath: %s',cluster.executionpath));
|
---|
| 46 | disp(sprintf(' interactive: %i',cluster.interactive));
|
---|
| 47 | end
|
---|
| 48 | %}}}
|
---|
| 49 | function numprocs=nprocs(cluster) % {{{
|
---|
| 50 | %compute number of processors
|
---|
| 51 | numprocs=cluster.numnodes*cluster.cpuspernode;
|
---|
| 52 | end
|
---|
| 53 | %}}}
|
---|
| 54 | function md = checkconsistency(cluster,md,solution,analyses) % {{{
|
---|
| 55 | %Miscellaneous
|
---|
| 56 | if isempty(cluster.login), md = checkmessage(md,'login empty'); end
|
---|
| 57 | if isempty(cluster.codepath), md = checkmessage(md,'codepath empty'); end
|
---|
| 58 | if isempty(cluster.executionpath), md = checkmessage(md,'executionpath empty'); end
|
---|
| 59 | end
|
---|
| 60 | %}}}
|
---|
| 61 | function BuildKrigingQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{
|
---|
| 62 |
|
---|
| 63 | if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end
|
---|
| 64 | if(isgprof), disp('gprof not supported by cluster, ignoring...'); end
|
---|
| 65 |
|
---|
| 66 | %write queuing script
|
---|
| 67 | fid=fopen([modelname '.queue'],'w');
|
---|
| 68 | fprintf(fid,'#!/bin/bash\n');
|
---|
| 69 | fprintf(fid,'#SBATCH --job-name=%s\n',modelname);
|
---|
[27008] | 70 | fprintf(fid,'#SBATCH --account=ice\n'); %Make sure we use the ICE account for this run
|
---|
[26756] | 71 | fprintf(fid,'#SBATCH -o %s.outlog \n',modelname);
|
---|
| 72 | fprintf(fid,'#SBATCH -e %s.errlog \n',modelname);
|
---|
| 73 | fprintf(fid,'#SBATCH --nodes=%i\n',cluster.numnodes);
|
---|
| 74 | fprintf(fid,'#SBATCH --ntasks-per-node=%i\n',cluster.cpuspernode);
|
---|
| 75 | fprintf(fid,'#SBATCH --time=%s\n',datestr(cluster.time/24,'HH:MM:SS')); %walltime is in HH:MM:SS format. cluster.time is in hour
|
---|
[26994] | 76 | fprintf(fid,'#SBATCH --mem=%iG\n',cluster.memory);
|
---|
| 77 | if ~isempty(cluster.email)
|
---|
| 78 | fprintf(fid,'#SBATCH --mail-type=%s\n',cluster.email);
|
---|
| 79 | end
|
---|
| 80 | fprintf(fid,'\n');
|
---|
| 81 |
|
---|
[26756] | 82 | fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath);
|
---|
| 83 | fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n');
|
---|
| 84 | fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,dirname);
|
---|
| 85 | fprintf(fid,'srun %s/kriging.exe %s %s\n', cluster.codepath,[cluster.executionpath '/' modelname],modelname);
|
---|
| 86 | if ~io_gather, %concatenate the output files:
|
---|
| 87 | fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
|
---|
| 88 | end
|
---|
| 89 | fclose(fid);
|
---|
| 90 | end
|
---|
| 91 | %}}}
|
---|
| 92 | function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{
|
---|
[26995] | 93 |
|
---|
| 94 | if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end
|
---|
| 95 | if(isgprof), disp('gprof not supported by cluster, ignoring...'); end
|
---|
| 96 |
|
---|
| 97 | %write queuing script
|
---|
| 98 | fid=fopen([modelname '.queue'],'w');
|
---|
| 99 | fprintf(fid,'#!/bin/bash\n');
|
---|
| 100 | fprintf(fid,'#SBATCH --job-name=%s\n',modelname);
|
---|
[27008] | 101 | fprintf(fid,'#SBATCH --account=ice\n'); %Make sure we use the ICE account for this run
|
---|
[26995] | 102 | fprintf(fid,'#SBATCH -o %s.outlog \n',modelname);
|
---|
| 103 | fprintf(fid,'#SBATCH -e %s.errlog \n',modelname);
|
---|
| 104 | fprintf(fid,'#SBATCH --nodes=%i\n',cluster.numnodes);
|
---|
| 105 | fprintf(fid,'#SBATCH --ntasks-per-node=%i\n',cluster.cpuspernode);
|
---|
| 106 | fprintf(fid,'#SBATCH --time=%s\n',datestr(cluster.time/24,'HH:MM:SS')); %walltime is in HH:MM:SS format. cluster.time is in hour
|
---|
| 107 | fprintf(fid,'#SBATCH --mem=%iG\n',cluster.memory);
|
---|
| 108 | if ~isempty(cluster.email)
|
---|
| 109 | fprintf(fid,'#SBATCH --mail-type=%s\n',cluster.email);
|
---|
| 110 | end
|
---|
| 111 | fprintf(fid,'\n');
|
---|
| 112 | fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath);
|
---|
| 113 | fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n');
|
---|
| 114 | fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,dirname);
|
---|
| 115 | fprintf(fid,'mpirun -n %i %s/issm.exe %s %s %s\n',cluster.nprocs(), cluster.codepath,solution,[cluster.executionpath '/' dirname],modelname);
|
---|
| 116 | if ~io_gather, %concatenate the output files:
|
---|
| 117 | fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
|
---|
| 118 | end
|
---|
| 119 | fclose(fid);
|
---|
| 120 |
|
---|
| 121 | %in interactive mode, create a run file, and errlog and outlog file
|
---|
| 122 | if cluster.interactive,
|
---|
| 123 | fid=fopen([modelname '.run'],'w');
|
---|
| 124 | fprintf(fid,'mpirun -n %i %s/issm.exe %s %s %s\n',cluster.nprocs(), cluster.codepath,solution,[cluster.executionpath '/' dirname],modelname);
|
---|
| 125 | if ~io_gather, %concatenate the output files:
|
---|
| 126 | fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
|
---|
| 127 | end
|
---|
| 128 | fclose(fid);
|
---|
| 129 | fid=fopen([modelname '.errlog'],'w');
|
---|
| 130 | fclose(fid);
|
---|
| 131 | fid=fopen([modelname '.outlog'],'w');
|
---|
| 132 | fclose(fid);
|
---|
| 133 | end
|
---|
[26756] | 134 | end %}}}
|
---|
| 135 | function UploadQueueJob(cluster,modelname,dirname,filelist) % {{{
|
---|
| 136 |
|
---|
| 137 | %compress the files into one zip.
|
---|
| 138 | compressstring=['tar -zcf ' dirname '.tar.gz '];
|
---|
| 139 | for i=1:numel(filelist),
|
---|
| 140 | compressstring = [compressstring ' ' filelist{i}];
|
---|
| 141 | end
|
---|
| 142 | if cluster.interactive,
|
---|
| 143 | compressstring = [compressstring ' ' modelname '.errlog ' modelname '.outlog '];
|
---|
| 144 | end
|
---|
| 145 | system(compressstring);
|
---|
| 146 |
|
---|
| 147 | disp('uploading input file and queueing script');
|
---|
[26996] | 148 | issmscpout(cluster.name,cluster.executionpath,cluster.login,0,{[dirname '.tar.gz']});
|
---|
[26756] | 149 |
|
---|
| 150 | end %}}}
|
---|
| 151 | function LaunchQueueJob(cluster,modelname,dirname,filelist,restart,batch) % {{{
|
---|
| 152 |
|
---|
| 153 | disp('launching solution sequence on remote cluster');
|
---|
| 154 | if ~isempty(restart)
|
---|
| 155 | launchcommand=['cd ' cluster.executionpath ' && cd ' dirname ' && hostname && sbatch ' modelname '.queue '];
|
---|
| 156 | else
|
---|
| 157 | launchcommand=['cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
|
---|
| 158 | ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz && hostname && sbatch ' modelname '.queue '];
|
---|
| 159 | end
|
---|
[26996] | 160 | issmssh(cluster.name,cluster.login,0,launchcommand);
|
---|
[26756] | 161 | end %}}}
|
---|
| 162 | function Download(cluster,dirname,filelist) % {{{
|
---|
| 163 |
|
---|
| 164 | %copy files from cluster to current directory
|
---|
| 165 | directory=[cluster.executionpath '/' dirname '/'];
|
---|
[26996] | 166 | issmscpin(cluster.name,cluster.login,0,directory,filelist);
|
---|
[26756] | 167 |
|
---|
| 168 | end %}}}
|
---|
| 169 | end
|
---|
| 170 | end
|
---|