Changeset 26330 for issm/trunk-jpl/src/m/classes/clusters/lonestar.m
- Timestamp:
- 06/23/21 13:51:11 (4 years ago)
- File:
-
- 1 edited
-
issm/trunk-jpl/src/m/classes/clusters/lonestar.m (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
issm/trunk-jpl/src/m/classes/clusters/lonestar.m
r26291 r26330 1 %LONESTAR cl ass definition1 %LONESTAR cluster class definition 2 2 % 3 3 % Usage: … … 7 7 8 8 classdef lonestar 9 properties (SetAccess=public) 10 % {{{11 name = 'ls5.tacc.utexas.edu'12 login = '';13 modules = {'intel/18.0.2' 'gsl'};14 numnodes = 1;15 cpuspernode = 24;16 port = 1099;17 queue = 'normal';18 codepath = '';19 executionpath = '';20 interactive = 0;21 time = 48*60*60;22 email = '';23 end24 %}}}25 methods26 function cluster=lonestar(varargin) % {{{27 28 %initialize cluster using default settings if provided29 if (exist('lonestar_settings')==2), lonestar_settings; end30 31 %use provided options to change fields32 cluster=AssignObjectFields(pairoptions(varargin{:}),cluster);33 34 end35 %}}}36 function disp(cluster) % {{{37 % display the object38 disp(sprintf('class ''%s'' object ''%s'' = ',class(cluster),inputname(1)));39 disp(sprintf(' name: %s',cluster.name));40 disp(sprintf(' login: %s',cluster.login));41 disp(sprintf(' modules: %s',strjoin(cluster.modules,', ')));42 disp(sprintf(' port: %i',cluster.port));43 disp(sprintf(' numnodes: %i',cluster.numnodes));44 disp(sprintf(' cpuspernode: %i',cluster.cpuspernode));45 disp(sprintf(' np: %i',cluster.np));46 disp(sprintf(' queue: %s',cluster.queue));47 disp(sprintf(' codepath: %s',cluster.codepath));48 disp(sprintf(' executionpath: %s',cluster.executionpath));49 disp(sprintf(' interactive: %i',cluster.interactive));50 disp(sprintf(' time: %i',cluster.time));51 disp(sprintf(' email: %s',cluster.email));52 end53 %}}}54 function numprocs=np(cluster) % {{{55 %compute number of processors56 numprocs=cluster.numnodes*cluster.cpuspernode;57 end58 %}}}59 function md = checkconsistency(cluster,md,solution,analyses) % {{{60 61 available_queues={'normal','development'};62 queue_requirements_time=[48*60*60 2*60*60];63 queue_requirements_np=[4104 264];64 65 QueueRequirements(available_queues,queue_requirements_time,queue_requirements_np,cluster.queue,cluster.np,cluster.time)66 67 %Miscelaneous68 if isempty(cluster.login), md = checkmessage(md,'login empty'); end69 if isempty(cluster.codepath), md = checkmessage(md,'codepath empty'); end70 if isempty(cluster.executionpath), md = checkmessage(md,'executionpath empty'); end71 end72 %}}}73 function BuildKrigingQueueScript(cluster,modelname,solution,io_gather,isvalgrind,isgprof) % {{{74 75 if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end76 if(isgprof), disp('gprof not supported by cluster, ignoring...'); end77 78 %write queuing script79 fid=fopen([modelname '.queue'],'w');80 fprintf(fid,'#!/bin/bash\n');81 fprintf(fid,'#$ -N %s\n',modelname);82 fprintf(fid,'#$ -q %s \n',cluster.queue);83 fprintf(fid,'#$ -pe one-node-mpi 2-64\n');84 fprintf(fid,'#$ -R y\n');85 fprintf(fid,'#$ -m beas\n');86 fprintf(fid,'#$ -o %s.outlog \n',modelname);87 fprintf(fid,'#$ -e %s.errlog \n\n',modelname);88 fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath); %FIXME89 fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n'); %FIXME90 fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,modelname);91 fprintf(fid,'mpiexec -np %i %s/kriging.exe %s %s\n',cluster.np,cluster.codepath,[cluster.executionpath '/' modelname],modelname);92 if ~io_gather, %concatenate the output files:93 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);94 end95 fclose(fid);96 end97 %}}}98 function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{99 100 if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end101 if(isgprof), disp('gprof not supported by cluster, ignoring...'); end102 103 executable='issm.exe';104 if isdakota,105 version=IssmConfig('_DAKOTA_VERSION_'); version=str2num(version(1:3));106 if (version>=6),107 executable='issm_dakota.exe';108 end109 end110 if isoceancoupling,111 executable='issm_ocean.exe';112 end113 114 %write queuing script115 fid=fopen([modelname '.queue'],'w');116 117 fprintf(fid,'#!/bin/bash\n');118 fprintf(fid,'#SBATCH -J %s \n',modelname);119 fprintf(fid,'#SBATCH -p %s \n',cluster.queue);120 fprintf(fid,'#SBATCH -o %s.outlog \n',modelname);121 fprintf(fid,'#SBATCH -e %s.errlog \n',modelname);122 fprintf(fid,'#SBATCH -n %i \n',cluster.numnodes*max(cluster.np/cluster.numnodes,24));123 fprintf(fid,'#SBATCH -N %i \n',cluster.numnodes);124 fprintf(fid,'#SBATCH -t %02i:%02i:00 \n\n',floor(cluster.time/3600),floor(mod(cluster.time,3600)/60));125 for i=1:numel(cluster.modules),126 fprintf(fid,['module load ' cluster.modules{i} '\n']);127 end128 129 if isdakota,130 fprintf(fid,'export KMP_AFFINITY="granularity=fine,compact,verbose" \n\n');131 end132 133 if length(find(cluster.email=='@'))>0134 fprintf(fid,'#SBATCH --mail-user=%s \n',cluster.email);135 fprintf(fid,'#SBATCH --mail-type=end \n\n');136 137 %fprintf(fid,'ssh login1 "mail -s ''SLURM Jobid=${SLURM_JOBID} Name=${SLURM_JOB_NAME} Began on Lonestar 5.'' %s <<< ''Job Started'' " \n\n',cluster.email);138 end139 140 fprintf(fid,'export PATH="$PATH:."\n\n');141 fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath); %FIXME142 fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n'); %FIXME143 fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,dirname);144 fprintf(fid,'ibrun -np %i %s/%s %s %s %s\n',cluster.np,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname);145 if ~io_gather, %concatenate the output files:146 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);147 end148 149 fclose(fid);150 151 %in interactive mode, create a run file, and errlog and outlog file152 if cluster.interactive,153 fid=fopen([modelname '.run'],'w');154 fprintf(fid,'ibrun -np %i %s/%s %s %s %s\n',cluster.np,executable,cluster.codepath,solution,[cluster.executionpath '/' dirname],modelname);155 if ~io_gather, %concatenate the output files:156 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);157 end158 fclose(fid);159 fid=fopen([modelname '.errlog'],'w');160 fclose(fid);161 fid=fopen([modelname '.outlog'],'w');162 fclose(fid);163 end164 end %}}}165 function UploadQueueJob(cluster,modelname,dirname,filelist)% {{{166 167 %compress the files into one zip.168 compressstring=['tar -zcf ' dirname '.tar.gz '];169 for i=1:numel(filelist),170 compressstring = [compressstring ' ' filelist{i}];171 end172 if cluster.interactive,173 compressstring = [compressstring ' ' modelname '.errlog ' modelname '.outlog '];174 end175 system(compressstring);176 177 disp('uploading input file and queueing script');178 issmscpout(cluster.name,cluster.executionpath,cluster.login,cluster.port,{[dirname '.tar.gz']});179 180 end %}}}181 function LaunchQueueJob(cluster,modelname,dirname,filelist,restart,batch)% {{{182 183 disp('launching solution sequence on remote cluster');184 if ~isempty(restart)185 launchcommand=['cd ' cluster.executionpath ' && cd ' dirname ' && hostname && sbatch ' modelname '.queue '];186 else187 launchcommand=['cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...188 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz && hostname && sbatch ' modelname '.queue '];189 end190 issmssh(cluster.name,cluster.login,cluster.port,launchcommand);191 end %}}}192 function Download(cluster,dirname,filelist)% {{{193 194 %copy files from cluster to current directory195 directory=[cluster.executionpath '/' dirname '/'];196 issmscpin(cluster.name,cluster.login,cluster.port,directory,filelist);197 198 end %}}}9 properties (SetAccess=public) 10 % {{{ 11 name = 'ls5.tacc.utexas.edu' 12 login = ''; 13 modules = {'intel/18.0.2' 'gsl'}; 14 numnodes = 1; 15 cpuspernode = 24; 16 port = 1099; 17 queue = 'normal'; 18 codepath = ''; 19 executionpath = ''; 20 interactive = 0; 21 time = 48*60*60; 22 email = ''; 23 end 24 %}}} 25 methods 26 function cluster=lonestar(varargin) % {{{ 27 28 %initialize cluster using default settings if provided 29 if (exist('lonestar_settings')==2), lonestar_settings; end 30 31 %use provided options to change fields 32 cluster=AssignObjectFields(pairoptions(varargin{:}),cluster); 33 34 end 35 %}}} 36 function disp(cluster) % {{{ 37 % display the object 38 disp(sprintf('class ''%s'' object ''%s'' = ',class(cluster),inputname(1))); 39 disp(sprintf(' name: %s',cluster.name)); 40 disp(sprintf(' login: %s',cluster.login)); 41 disp(sprintf(' modules: %s',strjoin(cluster.modules,', '))); 42 disp(sprintf(' port: %i',cluster.port)); 43 disp(sprintf(' numnodes: %i',cluster.numnodes)); 44 disp(sprintf(' cpuspernode: %i',cluster.cpuspernode)); 45 disp(sprintf(' np: %i',cluster.np)); 46 disp(sprintf(' queue: %s',cluster.queue)); 47 disp(sprintf(' codepath: %s',cluster.codepath)); 48 disp(sprintf(' executionpath: %s',cluster.executionpath)); 49 disp(sprintf(' interactive: %i',cluster.interactive)); 50 disp(sprintf(' time: %i',cluster.time)); 51 disp(sprintf(' email: %s',cluster.email)); 52 end 53 %}}} 54 function numprocs=nprocs(cluster) % {{{ 55 %compute number of processors 56 numprocs=cluster.numnodes*cluster.cpuspernode; 57 end 58 %}}} 59 function md = checkconsistency(cluster,md,solution,analyses) % {{{ 60 61 available_queues={'normal','development'}; 62 queue_requirements_time=[48*60*60 2*60*60]; 63 queue_requirements_np=[4104 264]; 64 65 QueueRequirements(available_queues,queue_requirements_time,queue_requirements_np,cluster.queue,cluster.np,cluster.time) 66 67 %Miscelaneous 68 if isempty(cluster.login), md = checkmessage(md,'login empty'); end 69 if isempty(cluster.codepath), md = checkmessage(md,'codepath empty'); end 70 if isempty(cluster.executionpath), md = checkmessage(md,'executionpath empty'); end 71 end 72 %}}} 73 function BuildKrigingQueueScript(cluster,modelname,solution,io_gather,isvalgrind,isgprof) % {{{ 74 75 if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end 76 if(isgprof), disp('gprof not supported by cluster, ignoring...'); end 77 78 %write queuing script 79 fid=fopen([modelname '.queue'],'w'); 80 fprintf(fid,'#!/bin/bash\n'); 81 fprintf(fid,'#$ -N %s\n',modelname); 82 fprintf(fid,'#$ -q %s \n',cluster.queue); 83 fprintf(fid,'#$ -pe one-node-mpi 2-64\n'); 84 fprintf(fid,'#$ -R y\n'); 85 fprintf(fid,'#$ -m beas\n'); 86 fprintf(fid,'#$ -o %s.outlog \n',modelname); 87 fprintf(fid,'#$ -e %s.errlog \n\n',modelname); 88 fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath); %FIXME 89 fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n'); %FIXME 90 fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,modelname); 91 fprintf(fid,'mpiexec -np %i %s/kriging.exe %s %s\n',cluster.np,cluster.codepath,[cluster.executionpath '/' modelname],modelname); 92 if ~io_gather, %concatenate the output files: 93 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname); 94 end 95 fclose(fid); 96 end 97 %}}} 98 function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{ 99 100 if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end 101 if(isgprof), disp('gprof not supported by cluster, ignoring...'); end 102 103 executable='issm.exe'; 104 if isdakota, 105 version=IssmConfig('_DAKOTA_VERSION_'); version=str2num(version(1:3)); 106 if (version>=6), 107 executable='issm_dakota.exe'; 108 end 109 end 110 if isoceancoupling, 111 executable='issm_ocean.exe'; 112 end 113 114 %write queuing script 115 fid=fopen([modelname '.queue'],'w'); 116 117 fprintf(fid,'#!/bin/bash\n'); 118 fprintf(fid,'#SBATCH -J %s \n',modelname); 119 fprintf(fid,'#SBATCH -p %s \n',cluster.queue); 120 fprintf(fid,'#SBATCH -o %s.outlog \n',modelname); 121 fprintf(fid,'#SBATCH -e %s.errlog \n',modelname); 122 fprintf(fid,'#SBATCH -n %i \n',cluster.numnodes*max(cluster.np/cluster.numnodes,24)); 123 fprintf(fid,'#SBATCH -N %i \n',cluster.numnodes); 124 fprintf(fid,'#SBATCH -t %02i:%02i:00 \n\n',floor(cluster.time/3600),floor(mod(cluster.time,3600)/60)); 125 for i=1:numel(cluster.modules), 126 fprintf(fid,['module load ' cluster.modules{i} '\n']); 127 end 128 129 if isdakota, 130 fprintf(fid,'export KMP_AFFINITY="granularity=fine,compact,verbose" \n\n'); 131 end 132 133 if length(find(cluster.email=='@'))>0 134 fprintf(fid,'#SBATCH --mail-user=%s \n',cluster.email); 135 fprintf(fid,'#SBATCH --mail-type=end \n\n'); 136 137 %fprintf(fid,'ssh login1 "mail -s ''SLURM Jobid=${SLURM_JOBID} Name=${SLURM_JOB_NAME} Began on Lonestar 5.'' %s <<< ''Job Started'' " \n\n',cluster.email); 138 end 139 140 fprintf(fid,'export PATH="$PATH:."\n\n'); 141 fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath); %FIXME 142 fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n'); %FIXME 143 fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,dirname); 144 fprintf(fid,'ibrun -np %i %s/%s %s %s %s\n',cluster.np,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname); 145 if ~io_gather, %concatenate the output files: 146 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname); 147 end 148 149 fclose(fid); 150 151 %in interactive mode, create a run file, and errlog and outlog file 152 if cluster.interactive, 153 fid=fopen([modelname '.run'],'w'); 154 fprintf(fid,'ibrun -np %i %s/%s %s %s %s\n',cluster.np,executable,cluster.codepath,solution,[cluster.executionpath '/' dirname],modelname); 155 if ~io_gather, %concatenate the output files: 156 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname); 157 end 158 fclose(fid); 159 fid=fopen([modelname '.errlog'],'w'); 160 fclose(fid); 161 fid=fopen([modelname '.outlog'],'w'); 162 fclose(fid); 163 end 164 end %}}} 165 function UploadQueueJob(cluster,modelname,dirname,filelist) % {{{ 166 167 %compress the files into one zip. 168 compressstring=['tar -zcf ' dirname '.tar.gz ']; 169 for i=1:numel(filelist), 170 compressstring = [compressstring ' ' filelist{i}]; 171 end 172 if cluster.interactive, 173 compressstring = [compressstring ' ' modelname '.errlog ' modelname '.outlog ']; 174 end 175 system(compressstring); 176 177 disp('uploading input file and queueing script'); 178 issmscpout(cluster.name,cluster.executionpath,cluster.login,cluster.port,{[dirname '.tar.gz']}); 179 180 end %}}} 181 function LaunchQueueJob(cluster,modelname,dirname,filelist,restart,batch) % {{{ 182 183 disp('launching solution sequence on remote cluster'); 184 if ~isempty(restart) 185 launchcommand=['cd ' cluster.executionpath ' && cd ' dirname ' && hostname && sbatch ' modelname '.queue ']; 186 else 187 launchcommand=['cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ... 188 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz && hostname && sbatch ' modelname '.queue ']; 189 end 190 issmssh(cluster.name,cluster.login,cluster.port,launchcommand); 191 end %}}} 192 function Download(cluster,dirname,filelist) % {{{ 193 194 %copy files from cluster to current directory 195 directory=[cluster.executionpath '/' dirname '/']; 196 issmscpin(cluster.name,cluster.login,cluster.port,directory,filelist); 197 198 end %}}} 199 199 end 200 200 end
Note:
See TracChangeset
for help on using the changeset viewer.
![(please configure the [header_logo] section in trac.ini)](/trac/issm/chrome/common/trac_banner.png)