Changeset 26744 for issm/trunk/src/m/classes/clusters/lonestar.m
- Timestamp:
- 12/22/21 10:39:44 (3 years ago)
- Location:
- issm/trunk
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
issm/trunk
- Property svn:mergeinfo changed
/issm/trunk-jpl merged: 25837-25866,25868-25993,25995-26330,26332-26733,26736-26739,26741
- Property svn:mergeinfo changed
-
issm/trunk/src
- Property svn:mergeinfo changed
-
issm/trunk/src/m/classes/clusters/lonestar.m
r24313 r26744 1 %LONESTAR cl ass definition1 %LONESTAR cluster class definition 2 2 % 3 3 % Usage: … … 7 7 8 8 classdef lonestar 9 properties (SetAccess=public) 10 % {{{ 11 name = 'ls5.tacc.utexas.edu' 12 login = ''; 13 modules = {'intel/16.0.1'}; 14 numnodes = 1; 15 cpuspernode = 24; 16 port = 1099; 17 queue = 'normal'; 18 codepath = ''; 19 executionpath = ''; 20 interactive = 0; 21 time = 48*60*60; 22 email = ''; 23 end 24 %}}} 25 methods 26 function cluster=lonestar(varargin) % {{{ 27 28 %initialize cluster using default settings if provided 29 if (exist('lonestar_settings')==2), lonestar_settings; end 30 31 %use provided options to change fields 32 cluster=AssignObjectFields(pairoptions(varargin{:}),cluster); 33 34 end 35 %}}} 36 function disp(cluster) % {{{ 37 % display the object 38 disp(sprintf('class ''%s'' object ''%s'' = ',class(cluster),inputname(1))); 39 disp(sprintf(' name: %s',cluster.name)); 40 disp(sprintf(' login: %s',cluster.login)); 41 disp(sprintf(' modules: %s',strjoin(cluster.modules,', '))); 42 disp(sprintf(' port: %i',cluster.port)); 43 disp(sprintf(' numnodes: %i',cluster.numnodes)); 44 disp(sprintf(' cpuspernode: %i',cluster.cpuspernode)); 45 disp(sprintf(' np: %i',cluster.np)); 46 disp(sprintf(' queue: %s',cluster.queue)); 47 disp(sprintf(' codepath: %s',cluster.codepath)); 48 disp(sprintf(' executionpath: %s',cluster.executionpath)); 49 disp(sprintf(' interactive: %i',cluster.interactive)); 50 disp(sprintf(' time: %i',cluster.time)); 51 disp(sprintf(' email: %s',cluster.email)); 52 end 53 %}}} 54 function numprocs=np(cluster) % {{{ 55 %compute number of processors 56 numprocs=cluster.numnodes*cluster.cpuspernode; 57 end 58 %}}} 59 function md = checkconsistency(cluster,md,solution,analyses) % {{{ 60 61 available_queues={'normal','development'}; 62 queue_requirements_time=[48*60*60 2*60*60]; 63 queue_requirements_np=[4104 264]; 64 65 QueueRequirements(available_queues,queue_requirements_time,queue_requirements_np,cluster.queue,cluster.np,cluster.time) 66 67 %Miscelaneous 68 if isempty(cluster.login), md = checkmessage(md,'login empty'); end 69 if isempty(cluster.codepath), md = checkmessage(md,'codepath empty'); end 70 if isempty(cluster.executionpath), md = checkmessage(md,'executionpath empty'); end 71 end 72 %}}} 73 function BuildKrigingQueueScript(cluster,modelname,solution,io_gather,isvalgrind,isgprof) % {{{ 74 75 if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end 76 if(isgprof), disp('gprof not supported by cluster, ignoring...'); end 77 78 %write queuing script 79 fid=fopen([modelname '.queue'],'w'); 80 fprintf(fid,'#!/bin/bash\n'); 81 fprintf(fid,'#$ -N %s\n',modelname); 82 fprintf(fid,'#$ -q %s \n',cluster.queue); 83 fprintf(fid,'#$ -pe one-node-mpi 2-64\n'); 84 fprintf(fid,'#$ -R y\n'); 85 fprintf(fid,'#$ -m beas\n'); 86 fprintf(fid,'#$ -o %s.outlog \n',modelname); 87 fprintf(fid,'#$ -e %s.errlog \n\n',modelname); 88 fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath); %FIXME 89 fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n'); %FIXME 90 fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,modelname); 91 fprintf(fid,'mpiexec -np %i %s/kriging.exe %s %s\n',cluster.np,cluster.codepath,[cluster.executionpath '/' modelname],modelname); 92 if ~io_gather, %concatenate the output files: 93 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname); 94 end 95 fclose(fid); 96 end 97 %}}} 98 function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{ 99 100 if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end 101 if(isgprof), disp('gprof not supported by cluster, ignoring...'); end 102 103 executable='issm.exe'; 104 if isdakota, 105 version=IssmConfig('_DAKOTA_VERSION_'); version=str2num(version(1:3)); 106 if (version>=6), 107 executable='issm_dakota.exe'; 108 end 109 end 110 if isoceancoupling, 111 executable='issm_ocean.exe'; 112 end 113 114 %write queuing script 115 fid=fopen([modelname '.queue'],'w'); 116 117 fprintf(fid,'#!/bin/bash\n'); 118 fprintf(fid,'#SBATCH -J %s \n',modelname); 119 fprintf(fid,'#SBATCH -p %s \n',cluster.queue); 120 fprintf(fid,'#SBATCH -o %s.outlog \n',modelname); 121 fprintf(fid,'#SBATCH -e %s.errlog \n',modelname); 122 fprintf(fid,'#SBATCH -n %i \n',cluster.numnodes*max(cluster.numnodes,24)); 123 fprintf(fid,'#SBATCH -N %i \n',cluster.numnodes); 124 fprintf(fid,'#SBATCH -t %02i:%02i:00 \n\n',floor(cluster.time/3600),floor(mod(cluster.time,3600)/60)); 125 for i=1:numel(cluster.modules), 126 fprintf(fid,['module load ' cluster.modules{i} '\n']); 127 end 128 129 if isdakota, 130 fprintf(fid,'export KMP_AFFINITY="granularity=fine,compact,verbose" \n\n'); 131 end 132 133 if length(find(cluster.email=='@'))>0 134 fprintf(fid,'#SBATCH --mail-user=%s \n',cluster.email); 135 fprintf(fid,'#SBATCH --mail-type=end \n\n'); 136 137 %fprintf(fid,'ssh login1 "mail -s ''SLURM Jobid=${SLURM_JOBID} Name=${SLURM_JOB_NAME} Began on Lonestar 5.'' %s <<< ''Job Started'' " \n\n',cluster.email); 138 end 139 140 fprintf(fid,'export PATH="$PATH:."\n\n'); 141 fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath); %FIXME 142 fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n'); %FIXME 143 fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,dirname); 144 fprintf(fid,'ibrun -np %i %s/%s %s %s %s\n',cluster.np,cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname); 145 if ~io_gather, %concatenate the output files: 146 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname); 147 end 148 149 fclose(fid); 150 151 %in interactive mode, create a run file, and errlog and outlog file 152 if cluster.interactive, 153 fid=fopen([modelname '.run'],'w'); 154 fprintf(fid,'ibrun -np %i %s/%s %s %s %s\n',cluster.np,executable,cluster.codepath,solution,[cluster.executionpath '/' dirname],modelname); 155 if ~io_gather, %concatenate the output files: 156 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname); 157 end 158 fclose(fid); 159 fid=fopen([modelname '.errlog'],'w'); 160 fclose(fid); 161 fid=fopen([modelname '.outlog'],'w'); 162 fclose(fid); 163 end 164 end %}}} 165 function UploadQueueJob(cluster,modelname,dirname,filelist)% {{{ 166 167 %compress the files into one zip. 168 compressstring=['tar -zcf ' dirname '.tar.gz ']; 169 for i=1:numel(filelist), 170 compressstring = [compressstring ' ' filelist{i}]; 171 end 172 if cluster.interactive, 173 compressstring = [compressstring ' ' modelname '.errlog ' modelname '.outlog ']; 174 end 175 system(compressstring); 176 177 disp('uploading input file and queueing script'); 178 issmscpout(cluster.name,cluster.executionpath,cluster.login,cluster.port,{[dirname '.tar.gz']}); 179 180 end %}}} 181 function LaunchQueueJob(cluster,modelname,dirname,filelist,restart,batch)% {{{ 182 183 disp('launching solution sequence on remote cluster'); 184 if ~isempty(restart) 185 launchcommand=['cd ' cluster.executionpath ' && cd ' dirname ' && hostname && sbatch ' modelname '.queue ']; 186 else 187 launchcommand=['cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ... 188 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz && hostname && sbatch ' modelname '.queue ']; 189 end 190 issmssh(cluster.name,cluster.login,cluster.port,launchcommand); 191 end %}}} 192 function Download(cluster,dirname,filelist)% {{{ 193 194 %copy files from cluster to current directory 195 directory=[cluster.executionpath '/' dirname '/']; 196 issmscpin(cluster.name,cluster.login,cluster.port,directory,filelist); 197 198 end %}}} 9 properties (SetAccess=public) 10 % {{{ 11 name = 'ls5.tacc.utexas.edu' 12 login = ''; 13 modules = {'intel/18.0.2' 'gsl'}; 14 numnodes = 1; 15 cpuspernode = 24; 16 port = 1099; 17 queue = 'normal'; 18 codepath = ''; 19 executionpath = ''; 20 interactive = 0; 21 time = 48*60*60; 22 email = ''; 23 end 24 %}}} 25 methods 26 function cluster=lonestar(varargin) % {{{ 27 28 %initialize cluster using default settings if provided 29 if (exist('lonestar_settings')==2), lonestar_settings; end 30 31 %use provided options to change fields 32 cluster=AssignObjectFields(pairoptions(varargin{:}),cluster); 33 34 end 35 %}}} 36 function disp(cluster) % {{{ 37 % display the object 38 disp(sprintf('class ''%s'' object ''%s'' = ',class(cluster),inputname(1))); 39 disp(sprintf(' name: %s',cluster.name)); 40 disp(sprintf(' login: %s',cluster.login)); 41 disp(sprintf(' modules: %s',strjoin(cluster.modules,', '))); 42 disp(sprintf(' port: %i',cluster.port)); 43 disp(sprintf(' numnodes: %i',cluster.numnodes)); 44 disp(sprintf(' cpuspernode: %i',cluster.cpuspernode)); 45 disp(sprintf(' np: %i',cluster.nprocs())); 46 disp(sprintf(' port: %i',cluster.port)); 47 disp(sprintf(' queue: %s',cluster.queue)); 48 disp(sprintf(' codepath: %s',cluster.codepath)); 49 disp(sprintf(' executionpath: %s',cluster.executionpath)); 50 disp(sprintf(' interactive: %i',cluster.interactive)); 51 disp(sprintf(' time: %i',cluster.time)); 52 disp(sprintf(' email: %s',cluster.email)); 53 end 54 %}}} 55 function numprocs=nprocs(cluster) % {{{ 56 %compute number of processors 57 numprocs=cluster.numnodes*cluster.cpuspernode; 58 end 59 %}}} 60 function md = checkconsistency(cluster,md,solution,analyses) % {{{ 61 62 available_queues={'normal','development'}; 63 queue_requirements_time=[48*60*60 2*60*60]; 64 queue_requirements_np=[4104 264]; 65 66 QueueRequirements(available_queues,queue_requirements_time,queue_requirements_np,cluster.queue,cluster.nprocs(),cluster.time) 67 68 %Miscellaneous 69 if isempty(cluster.login), md = checkmessage(md,'login empty'); end 70 if isempty(cluster.codepath), md = checkmessage(md,'codepath empty'); end 71 if isempty(cluster.executionpath), md = checkmessage(md,'executionpath empty'); end 72 end 73 %}}} 74 function BuildKrigingQueueScript(cluster,modelname,solution,io_gather,isvalgrind,isgprof) % {{{ 75 76 if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end 77 if(isgprof), disp('gprof not supported by cluster, ignoring...'); end 78 79 %write queuing script 80 fid=fopen([modelname '.queue'],'w'); 81 fprintf(fid,'#!/bin/bash\n'); 82 fprintf(fid,'#$ -N %s\n',modelname); 83 fprintf(fid,'#$ -q %s \n',cluster.queue); 84 fprintf(fid,'#$ -pe one-node-mpi 2-64\n'); 85 fprintf(fid,'#$ -R y\n'); 86 fprintf(fid,'#$ -m beas\n'); 87 fprintf(fid,'#$ -o %s.outlog \n',modelname); 88 fprintf(fid,'#$ -e %s.errlog \n\n',modelname); 89 fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath); %FIXME 90 fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n'); %FIXME 91 fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,modelname); 92 fprintf(fid,'mpiexec -np %i %s/kriging.exe %s %s\n',cluster.nprocs(),cluster.codepath,[cluster.executionpath '/' modelname],modelname); 93 if ~io_gather, %concatenate the output files: 94 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname); 95 end 96 fclose(fid); 97 end 98 %}}} 99 function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{ 100 101 if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end 102 if(isgprof), disp('gprof not supported by cluster, ignoring...'); end 103 104 executable='issm.exe'; 105 if isdakota, 106 version=IssmConfig('_DAKOTA_VERSION_'); version=str2num(version(1:3)); 107 if (version>=6), 108 executable='issm_dakota.exe'; 109 end 110 end 111 if isoceancoupling, 112 executable='issm_ocean.exe'; 113 end 114 115 %write queuing script 116 fid=fopen([modelname '.queue'],'w'); 117 118 fprintf(fid,'#!/bin/bash\n'); 119 fprintf(fid,'#SBATCH -J %s \n',modelname); 120 fprintf(fid,'#SBATCH -p %s \n',cluster.queue); 121 fprintf(fid,'#SBATCH -o %s.outlog \n',modelname); 122 fprintf(fid,'#SBATCH -e %s.errlog \n',modelname); 123 fprintf(fid,'#SBATCH -n %i \n',cluster.numnodes*max(cluster.nprocs()/cluster.numnodes,24)); 124 fprintf(fid,'#SBATCH -N %i \n',cluster.numnodes); 125 fprintf(fid,'#SBATCH -t %02i:%02i:00 \n\n',floor(cluster.time/3600),floor(mod(cluster.time,3600)/60)); 126 for i=1:numel(cluster.modules), 127 fprintf(fid,['module load ' cluster.modules{i} '\n']); 128 end 129 130 if isdakota, 131 fprintf(fid,'export KMP_AFFINITY="granularity=fine,compact,verbose" \n\n'); 132 end 133 134 if length(find(cluster.email=='@'))>0 135 fprintf(fid,'#SBATCH --mail-user=%s \n',cluster.email); 136 fprintf(fid,'#SBATCH --mail-type=end \n\n'); 137 138 %fprintf(fid,'ssh login1 "mail -s ''SLURM Jobid=${SLURM_JOBID} Name=${SLURM_JOB_NAME} Began on Lonestar 5.'' %s <<< ''Job Started'' " \n\n',cluster.email); 139 end 140 141 fprintf(fid,'export PATH="$PATH:."\n\n'); 142 fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath); %FIXME 143 fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n'); %FIXME 144 fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,dirname); 145 fprintf(fid,'ibrun -np %i %s/%s %s %s %s\n',cluster.nprocs(),cluster.codepath,executable,solution,[cluster.executionpath '/' dirname],modelname); 146 if ~io_gather, %concatenate the output files: 147 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname); 148 end 149 150 fclose(fid); 151 152 %in interactive mode, create a run file, and errlog and outlog file 153 if cluster.interactive, 154 fid=fopen([modelname '.run'],'w'); 155 fprintf(fid,'ibrun -np %i %s/%s %s %s %s\n',cluster.nprocs(),executable,cluster.codepath,solution,[cluster.executionpath '/' dirname],modelname); 156 if ~io_gather, %concatenate the output files: 157 fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname); 158 end 159 fclose(fid); 160 fid=fopen([modelname '.errlog'],'w'); 161 fclose(fid); 162 fid=fopen([modelname '.outlog'],'w'); 163 fclose(fid); 164 end 165 end %}}} 166 function UploadQueueJob(cluster,modelname,dirname,filelist) % {{{ 167 168 %compress the files into one zip. 169 compressstring=['tar -zcf ' dirname '.tar.gz ']; 170 for i=1:numel(filelist), 171 compressstring = [compressstring ' ' filelist{i}]; 172 end 173 if cluster.interactive, 174 compressstring = [compressstring ' ' modelname '.errlog ' modelname '.outlog ']; 175 end 176 system(compressstring); 177 178 disp('uploading input file and queueing script'); 179 issmscpout(cluster.name,cluster.executionpath,cluster.login,cluster.port,{[dirname '.tar.gz']}); 180 181 end %}}} 182 function LaunchQueueJob(cluster,modelname,dirname,filelist,restart,batch) % {{{ 183 184 disp('launching solution sequence on remote cluster'); 185 if ~isempty(restart) 186 launchcommand=['cd ' cluster.executionpath ' && cd ' dirname ' && hostname && sbatch ' modelname '.queue ']; 187 else 188 launchcommand=['cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ... 189 ' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz && hostname && sbatch ' modelname '.queue ']; 190 end 191 issmssh(cluster.name,cluster.login,cluster.port,launchcommand); 192 end %}}} 193 function Download(cluster,dirname,filelist) % {{{ 194 195 %copy files from cluster to current directory 196 directory=[cluster.executionpath '/' dirname '/']; 197 issmscpin(cluster.name,cluster.login,cluster.port,directory,filelist); 198 199 end %}}} 199 200 end 200 201 end
Note:
See TracChangeset
for help on using the changeset viewer.