import subprocess import numpy as np from fielddisplay import fielddisplay from pairoptions import pairoptions from issmssh import issmssh from issmscpin import issmscpin from issmscpout import issmscpout from QueueRequirements import QueueRequirements import datetime try: from fram_settings import fram_settings except ImportError: print 'You need fram_settings.py to proceed, check presence and sys.path' class fram(object): """ Fram cluster class definition This is a SLURM queue The priorities are based on a point system, reservation when reaching 20000 and earning 1 point per min. -Devel queue starts at 19990 -Normal starts at 19940 -Normal unpri atarts at 19400 Jobs can be: -normal (4 to 30 nodes, more if asked, 48h max walltime, 60Gb per nodes) -bigmem for big memory nodes (8 512Gb nodes and 2 6Tb nodes, shared nodes, 14days max walltime Usage: cluster=stallo(); """ def __init__(self,*args): # {{{ self.name = 'fram' self.login = '' self.numnodes = 2 self.cpuspernode = 20 self.mem = 1.6 self.queue = 'normal' self.time = 2*60 self.codepath = '' self.executionpath = '' self.interactive = 0 self.port = [] self.accountname = '' self.profiling = 0 #use provided options to change fields options=pairoptions(*args) #initialize cluster using user settings if provided self=stallo_settings(self) #OK get other fields self=options.AssignObjectFields(self) self.np=self.numnodes*self.cpuspernode # }}} def __repr__(self): # {{{ # display the object s = "class vilje object:" s = "%s\n%s"%(s,fielddisplay(self,'name','name of the cluster')) s = "%s\n%s"%(s,fielddisplay(self,'login','login')) s = "%s\n%s"%(s,fielddisplay(self,'numnodes','number of nodes')) s = "%s\n%s"%(s,fielddisplay(self,'cpuspernode','number of nodes per CPUs')) s = "%s\n%s"%(s,fielddisplay(self,'mem','memory per CPU')) s = "%s\n%s"%(s,fielddisplay(self,'queue','name of the queue (normal (D), short,singlenode,multinode,devel)')) s = "%s\n%s"%(s,fielddisplay(self,'time','walltime requested in minutes')) s = "%s\n%s"%(s,fielddisplay(self,'codepath','code path on the cluster')) s = "%s\n%s"%(s,fielddisplay(self,'executionpath','execution path on the cluster')) s = "%s\n%s"%(s,fielddisplay(self,'interactive','')) s = "%s\n%s"%(s,fielddisplay(self,'accountname','your cluster account')) s = "%s\n%s"%(s,fielddisplay(self,'profiling','enable profiling if 1 default is 0')) return s # }}} def checkconsistency(self,md,solution,analyses): # {{{ #Queue dictionarry gives queue name as key and max walltime and cpus as var queuedict = {'normal':[2*24*60,2048], 'devel':[4*60,2048]} QueueRequirements(queuedict,self.queue,self.np,self.time) #Miscelaneous if not self.login: md = md.checkmessage('login empty') if not self.codepath: md = md.checkmessage('codepath empty') if not self.executionpath: md = md.checkmessage('executionpath empty') if self.interactive==1: md = md.checkmessage('interactive mode not implemented') return self # }}} def BuildQueueScript(self,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling): # {{{ executable='issm.exe' if isdakota: version=IssmConfig('_DAKOTA_VERSION_')[0:2] version=float(version) if version>=6: executable='issm_dakota.exe' if isoceancoupling: executable='issm_ocean.exe' #write queuing script shortname=modelname[0:min(12,len(modelname))] fid=open(modelname+'.queue','w') fid.write('#!/bin/bash -l\n') fid.write('#SBATCH --job-name=%s \n' % shortname) fid.write('#SBATCH --partition %s \n' % self.queue) fid.write('#SBATCH --nodes=%i' % self.numnodes) fid.write('#SBATCH --ntasks-per-nodes==%i \n' % self.cpuspernode) fid.write('#SBATCH --time=%s\n' % self.time) #walltime is minutes fid.write('#SBATCH --mem-per-cpu=%iGB\n' % self.mem)# mem is in GB if (np.mod(self.np,16)+np.mod(self.np,20))==0: fid.write('#SBATCH --ntask=%i\n' % self.np) fid.write('#SBATCH --account=%s\n' % self.accountname) fid.write('#SBATCH --output %s/%s/%s.outlog \n' % (self.executionpath,dirname,modelname)) fid.write('#SBATCH --error %s/%s/%s.errlog \n\n' % (self.executionpath,dirname,modelname)) fid.write('export ISSM_DIR="%s/../"\n' % self.codepath) fid.write('module restore system\n') fid.write('module load load Automake/1.15.1-GCCcore-6.3.0\n') fid.write('module load libtool/2.4.6-GCCcore-6.3.0\n') fid.write('module load CMake/3.9.1\n') fid.write('module load PETSc/3.8.0-intel-2017a-Python-2.7.13\n') fid.write('module load ParMETIS/4.0.3-intel-2017a\n') fid.write('cd %s/%s/\n\n' % (self.executionpath,dirname)) if self.profiling==1: fid.write('module load perf-report\n') fid.write('perf-report mpirun -np %i %s/%s %s %s/%s %s\n' % (self.np,self.codepath,executable,str(solution),self.executionpath,dirname,modelname)) else: fid.write('mpirun -np %i %s/%s %s %s/%s %s\n' % (self.np,self.codepath,executable,str(solution),self.executionpath,dirname,modelname)) fid.close() # }}} def UploadQueueJob(self,modelname,dirname,filelist): # {{{ #compress the files into one zip. compressstring='tar -zcf %s.tar.gz ' % dirname for file in filelist: compressstring += ' %s' % file subprocess.call(compressstring,shell=True) print 'uploading input file and queueing script' issmscpout(self.name,self.executionpath,self.login,self.port,[dirname+'.tar.gz']) # }}} def LaunchQueueJob(self,modelname,dirname,filelist,restart,batch): # {{{ print 'launching solution sequence on remote cluster' if restart: launchcommand='cd %s && cd %s && sbatch %s.queue' % (self.executionpath,dirname,modelname) else: launchcommand='cd %s && rm -rf ./%s && mkdir %s && cd %s && mv ../%s.tar.gz ./ && tar -zxf %s.tar.gz && sbatch %s.queue' % (self.executionpath,dirname,dirname,dirname,dirname,dirname,modelname) issmssh(self.name,self.login,self.port,launchcommand) # }}} def Download(self,dirname,filelist): # {{{ #copy files from cluster to current directory directory='%s/%s/' % (self.executionpath,dirname) issmscpin(self.name,self.login,self.port,directory,filelist) # }}}