source: issm/trunk-jpl/src/m/classes/clusters/fram.py@ 22203

Last change on this file since 22203 was 22203, checked in by bdef, 7 years ago

adding Fram cluster file

File size: 6.2 KB
Line 
1import subprocess
2import numpy as np
3from fielddisplay import fielddisplay
4from pairoptions import pairoptions
5from issmssh import issmssh
6from issmscpin import issmscpin
7from issmscpout import issmscpout
8from QueueRequirements import QueueRequirements
9import datetime
10try:
11 from fram_settings import fram_settings
12except ImportError:
13 print 'You need fram_settings.py to proceed, check presence and sys.path'
14
15class fram(object):
16 """
17 Fram cluster class definition
18 This is a SLURM queue
19 The priorities are based on a point system, reservation when reaching 20000 and earning 1 point per min.
20 -Devel queue starts at 19990
21 -Normal starts at 19940
22 -Normal unpri atarts at 19400
23
24 Jobs can be:
25 -normal (4 to 30 nodes, more if asked, 48h max walltime, 60Gb per nodes)
26 -bigmem for big memory nodes (8 512Gb nodes and 2 6Tb nodes, shared nodes, 14days max walltime
27
28 Usage:
29 cluster=stallo();
30 """
31
32 def __init__(self,*args):
33 # {{{
34 self.name = 'fram'
35 self.login = ''
36 self.numnodes = 2
37 self.cpuspernode = 20
38 self.mem = 1.6
39 self.queue = 'normal'
40 self.time = 2*60
41 self.codepath = ''
42 self.executionpath = ''
43 self.interactive = 0
44 self.port = []
45 self.accountname = ''
46 self.profiling = 0
47 #use provided options to change fields
48 options=pairoptions(*args)
49
50 #initialize cluster using user settings if provided
51 self=stallo_settings(self)
52 #OK get other fields
53 self=options.AssignObjectFields(self)
54 self.np=self.numnodes*self.cpuspernode
55 # }}}
56
57 def __repr__(self):
58 # {{{
59 # display the object
60 s = "class vilje object:"
61 s = "%s\n%s"%(s,fielddisplay(self,'name','name of the cluster'))
62 s = "%s\n%s"%(s,fielddisplay(self,'login','login'))
63 s = "%s\n%s"%(s,fielddisplay(self,'numnodes','number of nodes'))
64 s = "%s\n%s"%(s,fielddisplay(self,'cpuspernode','number of nodes per CPUs'))
65 s = "%s\n%s"%(s,fielddisplay(self,'mem','memory per CPU'))
66 s = "%s\n%s"%(s,fielddisplay(self,'queue','name of the queue (normal (D), short,singlenode,multinode,devel)'))
67 s = "%s\n%s"%(s,fielddisplay(self,'time','walltime requested in minutes'))
68 s = "%s\n%s"%(s,fielddisplay(self,'codepath','code path on the cluster'))
69 s = "%s\n%s"%(s,fielddisplay(self,'executionpath','execution path on the cluster'))
70 s = "%s\n%s"%(s,fielddisplay(self,'interactive',''))
71 s = "%s\n%s"%(s,fielddisplay(self,'accountname','your cluster account'))
72 s = "%s\n%s"%(s,fielddisplay(self,'profiling','enable profiling if 1 default is 0'))
73 return s
74 # }}}
75 def checkconsistency(self,md,solution,analyses):
76 # {{{
77 #Queue dictionarry gives queue name as key and max walltime and cpus as var
78 queuedict = {'normal':[2*24*60,2048],
79 'devel':[4*60,2048]}
80 QueueRequirements(queuedict,self.queue,self.np,self.time)
81
82 #Miscelaneous
83 if not self.login:
84 md = md.checkmessage('login empty')
85 if not self.codepath:
86 md = md.checkmessage('codepath empty')
87 if not self.executionpath:
88 md = md.checkmessage('executionpath empty')
89 if self.interactive==1:
90 md = md.checkmessage('interactive mode not implemented')
91 return self
92 # }}}
93 def BuildQueueScript(self,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling):
94 # {{{
95
96 executable='issm.exe'
97 if isdakota:
98 version=IssmConfig('_DAKOTA_VERSION_')[0:2]
99 version=float(version)
100 if version>=6:
101 executable='issm_dakota.exe'
102 if isoceancoupling:
103 executable='issm_ocean.exe'
104 #write queuing script
105 shortname=modelname[0:min(12,len(modelname))]
106 fid=open(modelname+'.queue','w')
107
108 fid.write('#!/bin/bash -l\n')
109 fid.write('#SBATCH --job-name=%s \n' % shortname)
110 fid.write('#SBATCH --partition %s \n' % self.queue)
111 fid.write('#SBATCH --nodes=%i' % self.numnodes)
112 fid.write('#SBATCH --ntasks-per-nodes==%i \n' % self.cpuspernode)
113 fid.write('#SBATCH --time=%s\n' % self.time) #walltime is minutes
114 fid.write('#SBATCH --mem-per-cpu=%iGB\n' % self.mem)# mem is in GB
115 if (np.mod(self.np,16)+np.mod(self.np,20))==0:
116 fid.write('#SBATCH --ntask=%i\n' % self.np)
117 fid.write('#SBATCH --account=%s\n' % self.accountname)
118 fid.write('#SBATCH --output %s/%s/%s.outlog \n' % (self.executionpath,dirname,modelname))
119 fid.write('#SBATCH --error %s/%s/%s.errlog \n\n' % (self.executionpath,dirname,modelname))
120
121 fid.write('export ISSM_DIR="%s/../"\n' % self.codepath)
122 fid.write('module restore system\n')
123 fid.write('module load load Automake/1.15.1-GCCcore-6.3.0\n')
124 fid.write('module load libtool/2.4.6-GCCcore-6.3.0\n')
125 fid.write('module load CMake/3.9.1\n')
126 fid.write('module load PETSc/3.8.0-intel-2017a-Python-2.7.13\n')
127 fid.write('module load ParMETIS/4.0.3-intel-2017a\n')
128 fid.write('cd %s/%s/\n\n' % (self.executionpath,dirname))
129 if self.profiling==1:
130 fid.write('module load perf-report\n')
131 fid.write('perf-report mpirun -np %i %s/%s %s %s/%s %s\n' % (self.np,self.codepath,executable,str(solution),self.executionpath,dirname,modelname))
132 else:
133 fid.write('mpirun -np %i %s/%s %s %s/%s %s\n' % (self.np,self.codepath,executable,str(solution),self.executionpath,dirname,modelname))
134 fid.close()
135
136 # }}}
137 def UploadQueueJob(self,modelname,dirname,filelist):
138 # {{{
139 #compress the files into one zip.
140 compressstring='tar -zcf %s.tar.gz ' % dirname
141 for file in filelist:
142 compressstring += ' %s' % file
143 subprocess.call(compressstring,shell=True)
144
145 print 'uploading input file and queueing script'
146 issmscpout(self.name,self.executionpath,self.login,self.port,[dirname+'.tar.gz'])
147
148 # }}}
149 def LaunchQueueJob(self,modelname,dirname,filelist,restart,batch):
150 # {{{
151
152 print 'launching solution sequence on remote cluster'
153 if restart:
154 launchcommand='cd %s && cd %s && sbatch %s.queue' % (self.executionpath,dirname,modelname)
155 else:
156 launchcommand='cd %s && rm -rf ./%s && mkdir %s && cd %s && mv ../%s.tar.gz ./ && tar -zxf %s.tar.gz && sbatch %s.queue' % (self.executionpath,dirname,dirname,dirname,dirname,dirname,modelname)
157 issmssh(self.name,self.login,self.port,launchcommand)
158
159 # }}}
160 def Download(self,dirname,filelist):
161 # {{{
162
163 #copy files from cluster to current directory
164 directory='%s/%s/' % (self.executionpath,dirname)
165 issmscpin(self.name,self.login,self.port,directory,filelist)
166 # }}}
Note: See TracBrowser for help on using the repository browser.