source: issm/trunk-jpl/src/m/classes/clusters/hexagon.py@ 21415

Last change on this file since 21415 was 21415, checked in by bdef, 8 years ago

BUG:Fixing np initialisation

File size: 5.4 KB
Line 
1import subprocess
2from fielddisplay import fielddisplay
3from pairoptions import pairoptions
4from issmssh import issmssh
5from issmscpin import issmscpin
6from issmscpout import issmscpout
7from QueueRequirements import QueueRequirements
8import datetime
9try:
10 from hexagon_settings import hexagon_settings
11except ImportError:
12 print 'You need hexagon_settings.py to proceed, check presence and sys.path'
13
14class hexagon(object):
15 """
16 Hexagon cluster class definition
17 Hexagon have nodes built of 2*16 CPUs. Nodes are dedicated to one job so the best usage is to use 32 procs per nodes (16 per cores) as it is what is billed anyway.
18 You can reduce this number if you run out of memory as the total node memory is divided by the number of procs
19 Usage:
20 cluster=hexagon();
21 """
22
23 def __init__(self,*args):
24 # {{{
25 self.name = 'hexagon'
26 self.login = ''
27 self.numnodes = 2
28 self.procspernodes = 32
29 self.mem = 32000
30 self.queue = 'batch'
31 self.time = 2*60
32 self.codepath = ''
33 self.executionpath = ''
34 self.interactive = 0
35 self.port = []
36 self.accountname = ''
37
38 #use provided options to change fields
39 options=pairoptions(*args)
40
41 #initialize cluster using user settings if provided
42 self=hexagon_settings(self)
43
44 #OK get other fields
45 self=options.AssignObjectFields(self)
46 self.np=self.numnodes*self.procspernodes
47 # }}}
48 def __repr__(self):
49 # {{{
50 # display the object
51 s = "class hexagon object:"
52 s = "%s\n%s"%(s,fielddisplay(self,'name','name of the cluster'))
53 s = "%s\n%s"%(s,fielddisplay(self,'login','login'))
54 s = "%s\n%s"%(s,fielddisplay(self,'numnodes','number of nodes'))
55 s = "%s\n%s"%(s,fielddisplay(self,'procspernodes','number of mpi procs per nodes default and optimal is 32'))
56 s = "%s\n%s"%(s,fielddisplay(self,'mem','Total node memory'))
57 s = "%s\n%s"%(s,fielddisplay(self,'queue','name of the queue'))
58 s = "%s\n%s"%(s,fielddisplay(self,'time','walltime requested in minutes'))
59 s = "%s\n%s"%(s,fielddisplay(self,'codepath','code path on the cluster'))
60 s = "%s\n%s"%(s,fielddisplay(self,'executionpath','execution path on the cluster'))
61 s = "%s\n%s"%(s,fielddisplay(self,'interactive',''))
62 s = "%s\n%s"%(s,fielddisplay(self,'accountname','your cluster account'))
63 return s
64 # }}}
65 def checkconsistency(self,md,solution,analyses):
66 # {{{
67 #mem should not be over 32000mb
68 #numprocs should not be over 4096
69 #we have cpupernodes*numberofcpus=mppwidth and mppnppn=cpupernodes,
70 #Miscelaneous
71 if not self.login:
72 md = md.checkmessage('login empty')
73 if not self.codepath:
74 md = md.checkmessage('codepath empty')
75 if not self.executionpath:
76 md = md.checkmessage('executionpath empty')
77 if self.interactive==1:
78 md = md.checkmessage('interactive mode not implemented')
79 if self.mem>32000:
80 md = md.checkmessage('asking too much memory max is 32000 per node')
81 return self
82 # }}}
83 def BuildQueueScript(self,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota):
84 # {{{
85
86 executable='issm.exe'
87 if isdakota:
88 version=IssmConfig('_DAKOTA_VERSION_')[0:2]
89 version=float(version)
90 if version>=6:
91 executable='issm_dakota.exe'
92
93 #write queuing script
94 shortname=modelname[0:min(12,len(modelname))]
95 fid=open(modelname+'.queue','w')
96 fid.write('#!/bin/bash\n')
97 fid.write('#PBS -N %s \n' % shortname)
98 fid.write('#PBS -l mppwidth=%i,mppnppn=%i\n' % (self.np,self.procspernodes))
99 timestring= str(datetime.timedelta(minutes=self.time))
100 fid.write('#PBS -l walltime=%s\n' % timestring) #walltime is hh:mm:ss
101 fid.write('#PBS -l mppmem=%imb\n' % int(self.mem/self.procspernodes))
102 fid.write('#PBS -A %s\n' % self.accountname)
103 fid.write('#PBS -o %s/%s/%s.outlog \n' % (self.executionpath,dirname,modelname))
104 fid.write('#PBS -e %s/%s/%s.errlog \n\n' % (self.executionpath,dirname,modelname))
105 fid.write('export ISSM_DIR="%s/../"\n' % self.codepath)
106 fid.write('export CRAY_ROOTFS=DSL\n')
107 fid.write('module swap PrgEnv-cray/5.2.40 PrgEnv-gnu\n')
108 fid.write('module load cray-petsc\n')
109 fid.write('module load cray-tpsl\n')
110 fid.write('module load cray-mpich\n')
111 fid.write('module load gsl\n')
112 fid.write('cd %s/%s/\n\n' % (self.executionpath,dirname))
113 fid.write('aprun -B %s/%s %s %s/%s %s\n' % (self.codepath,executable,str(solution),self.executionpath,dirname,modelname))
114 fid.close()
115
116 # }}}
117 def UploadQueueJob(self,modelname,dirname,filelist):
118 # {{{
119
120 #compress the files into one zip.
121 compressstring='tar -zcf %s.tar.gz ' % dirname
122 for file in filelist:
123 compressstring += ' %s' % file
124 subprocess.call(compressstring,shell=True)
125
126 print 'uploading input file and queueing script'
127 issmscpout(self.name,self.executionpath,self.login,self.port,[dirname+'.tar.gz'])
128
129 # }}}
130 def LaunchQueueJob(self,modelname,dirname,filelist,restart,batch):
131 # {{{
132
133 print 'launching solution sequence on remote cluster'
134 if restart:
135 launchcommand='cd %s && cd %s && qsub %s.queue' % (self.executionpath,dirname,modelname)
136 else:
137 launchcommand='cd %s && rm -rf ./%s && mkdir %s && cd %s && mv ../%s.tar.gz ./ && tar -zxf %s.tar.gz && qsub %s.queue' % (self.executionpath,dirname,dirname,dirname,dirname,dirname,modelname)
138 issmssh(self.name,self.login,self.port,launchcommand)
139
140 # }}}
141 def Download(self,dirname,filelist):
142 # {{{
143 #copy files from cluster to current directory
144 directory='%s/%s/' % (self.executionpath,dirname)
145 issmscpin(self.name,self.login,self.port,directory,filelist)
146 # }}}
Note: See TracBrowser for help on using the repository browser.