source: issm/trunk-jpl/src/m/classes/clusters/hexagon.py@ 20784

Last change on this file since 20784 was 20784, checked in by bdef, 9 years ago

Add and update UiB clusters

File size: 5.5 KB
Line 
1import subprocess
2from fielddisplay import fielddisplay
3from EnumToString import EnumToString
4from pairoptions import pairoptions
5from issmssh import issmssh
6from issmscpin import issmscpin
7from issmscpout import issmscpout
8from QueueRequirements import QueueRequirements
9import datetime
10try:
11 from hexagon_settings import hexagon_settings
12except ImportError:
13 print 'You need hexagon_settings.py to proceed, check presence and sys.path'
14
15class hexagon(object):
16 """
17 Hexagon cluster class definition
18 Hexagon have nodes built of 2*16 CPUs. Nodes are dedicated to one job so the best usage is to use 32 procs per nodes (16 per cores) as it is what is billed anyway.
19 You can reduce this number if you run out of memory as the total node memory is divided by the number of procs
20 Usage:
21 cluster=hexagon();
22 """
23
24 def __init__(self,*args):
25 # {{{
26 self.name = 'hexagon'
27 self.login = ''
28 self.numnodes = 2
29 self.procspernodes = 32
30 self.mem = 32000
31 self.queue = 'batch'
32 self.time = 2*60
33 self.codepath = ''
34 self.executionpath = ''
35 self.interactive = 0
36 self.port = []
37 self.accountname = ''
38
39 #use provided options to change fields
40 options=pairoptions(*args)
41
42 #initialize cluster using user settings if provided
43 self=hexagon_settings(self)
44 self.np=self.numnodes*self.procspernodes
45 #OK get other fields
46 self=options.AssignObjectFields(self)
47
48 # }}}
49 def __repr__(self):
50 # {{{
51 # display the object
52 s = "class hexagon object:"
53 s = "%s\n%s"%(s,fielddisplay(self,'name','name of the cluster'))
54 s = "%s\n%s"%(s,fielddisplay(self,'login','login'))
55 s = "%s\n%s"%(s,fielddisplay(self,'numnodes','number of nodes'))
56 s = "%s\n%s"%(s,fielddisplay(self,'procspernodes','number of mpi procs per nodes default and optimal is 32'))
57 s = "%s\n%s"%(s,fielddisplay(self,'mem','Total node memory'))
58 s = "%s\n%s"%(s,fielddisplay(self,'queue','name of the queue'))
59 s = "%s\n%s"%(s,fielddisplay(self,'time','walltime requested in minutes'))
60 s = "%s\n%s"%(s,fielddisplay(self,'codepath','code path on the cluster'))
61 s = "%s\n%s"%(s,fielddisplay(self,'executionpath','execution path on the cluster'))
62 s = "%s\n%s"%(s,fielddisplay(self,'interactive',''))
63 s = "%s\n%s"%(s,fielddisplay(self,'accountname','your cluster account'))
64 return s
65 # }}}
66 def checkconsistency(self,md,solution,analyses):
67 # {{{
68 #mem should not be over 32000mb
69 #numprocs should not be over 4096
70 #we have cpupernodes*numberofcpus=mppwidth and mppnppn=cpupernodes,
71 #Miscelaneous
72 if not self.login:
73 md = md.checkmessage('login empty')
74 if not self.codepath:
75 md = md.checkmessage('codepath empty')
76 if not self.executionpath:
77 md = md.checkmessage('executionpath empty')
78 if self.interactive==1:
79 md = md.checkmessage('interactive mode not implemented')
80 if self.mem>32000:
81 md = md.checkmessage('asking too much memory max is 32000 per node')
82 return self
83 # }}}
84 def BuildQueueScript(self,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota):
85 # {{{
86
87 executable='issm.exe'
88 if isdakota:
89 version=IssmConfig('_DAKOTA_VERSION_')[0:2]
90 version=float(version)
91 if version>=6:
92 executable='issm_dakota.exe'
93
94 #write queuing script
95 shortname=modelname[0:min(12,len(modelname))]
96 fid=open(modelname+'.queue','w')
97 fid.write('#!/bin/bash\n')
98 fid.write('#PBS -N %s \n' % shortname)
99 fid.write('#PBS -l mppwidth=%i,mppnppn=%i\n' % (self.np,self.procspernodes))
100 timestring= str(datetime.timedelta(minutes=self.time))
101 fid.write('#PBS -l walltime=%s\n' % timestring) #walltime is hh:mm:ss
102 fid.write('#PBS -l mppmem=%imb\n' % int(self.mem/self.procspernodes))
103 fid.write('#PBS -A %s\n' % self.accountname)
104 fid.write('#PBS -o %s/%s/%s.outlog \n' % (self.executionpath,dirname,modelname))
105 fid.write('#PBS -e %s/%s/%s.errlog \n\n' % (self.executionpath,dirname,modelname))
106 fid.write('export ISSM_DIR="%s/../"\n' % self.codepath)
107 fid.write('export CRAY_ROOTFS=DSL\n')
108 fid.write('module swap PrgEnv-cray/5.2.40 PrgEnv-gnu\n')
109 fid.write('module load cray-petsc\n')
110 fid.write('module load cray-tpsl\n')
111 fid.write('module load cray-mpich\n')
112 fid.write('module load gsl\n')
113 fid.write('cd %s/%s/\n\n' % (self.executionpath,dirname))
114 fid.write('aprun -B %s/%s %s %s/%s %s\n' % (self.codepath,executable,str(EnumToString(solution)[0]),self.executionpath,dirname,modelname))
115 fid.close()
116
117 # }}}
118 def UploadQueueJob(self,modelname,dirname,filelist):
119 # {{{
120
121 #compress the files into one zip.
122 compressstring='tar -zcf %s.tar.gz ' % dirname
123 for file in filelist:
124 compressstring += ' %s' % file
125 subprocess.call(compressstring,shell=True)
126
127 print 'uploading input file and queueing script'
128 issmscpout(self.name,self.executionpath,self.login,self.port,[dirname+'.tar.gz'])
129
130 # }}}
131 def LaunchQueueJob(self,modelname,dirname,filelist,restart,batch):
132 # {{{
133
134 print 'launching solution sequence on remote cluster'
135 if restart:
136 launchcommand='cd %s && cd %s && qsub %s.queue' % (self.executionpath,dirname,modelname)
137 else:
138 launchcommand='cd %s && rm -rf ./%s && mkdir %s && cd %s && mv ../%s.tar.gz ./ && tar -zxf %s.tar.gz && qsub %s.queue' % (self.executionpath,dirname,dirname,dirname,dirname,dirname,modelname)
139 issmssh(self.name,self.login,self.port,launchcommand)
140
141 # }}}
142 def Download(self,dirname,filelist):
143 # {{{
144 #copy files from cluster to current directory
145 directory='%s/%s/' % (self.executionpath,dirname)
146 issmscpin(self.name,self.login,self.port,directory,filelist)
147 # }}}
Note: See TracBrowser for help on using the repository browser.