#!/usr/bin/env python2.3
"""
mpdboot for LSF
[-f | --hostfile hostfile]
[-i | --ifhn=alternate_interface_hostname_of_ip_address
-f | --hostfile hostfile]
[-h]
"""
import re
import string
import time
import sys
import getopt
from time import ctime
from os import environ, path
from sys import argv, exit, stdout
from popen2 import Popen4
from socket import gethostname, gethostbyname
def mpdboot():
# change me
MPI_ROOTDIR="/opt/mpich2"
#
mpdCmd="%s/bin/mpd" % MPI_ROOTDIR
mpdtraceCmd="%s/bin/mpdtrace" % MPI_ROOTDIR
mpdtraceCmd2="%s/bin/mpdtrace -l" % MPI_ROOTDIR
nHosts = 1
host=""
ip=""
localHost=""
localIp=""
found = False
MAX_WAIT = 5
t1 = 0
hostList=""
hostTab = {}
cols = []
hostArr = []
hostfile = environ.get('LSB_DJOB_HOSTFILE')
binDir = environ.get('LSF_BINDIR')
if environ.get('LSB_MCPU_HOSTS') == None \
or hostfile == None \
or binDir == None:
print "not running in LSF"
exit (-1)
rshCmd = binDir + "/blaunch"
p = re.compile("\w+_\d+\s+\(\d+\.\d+\.\d+\.\d+")
#
try:
opts, args = getopt.getopt(sys.argv[1:], "hf:i:", ["help", "hostfile=", "ifhn="])
except getopt.GetoptError, err:
print str(err)
usage()
sys.exit(-1)
fileName = None
ifhn = None
for o, a in opts:
if o == "-v":
version();
sys.exit()
elif o in ("-h", "--help"):
usage()
sys.exit()
elif o in ("-f", "--hostfile"):
fileName = a
elif o in ("-i", "--ifhn"):
ifhn = a
else:
print "option %s unrecognized" % o
usage()
sys.exit(-1)
if fileName == None:
if ifhn != None:
print "--ifhn requires a host file containing 'hostname ifhn=alternate_interface_hostname_of_ip_address'\n"
sys.exit(-1)
# use LSB_DJOB_HOSTFILE
fileName = hostfile
localHost = gethostname()
localIp = gethostbyname(localHost)
pifhn = re.compile("\w+\s+\ifhn=\d+\.\d+\.\d+\.\d+")
try:
# check the hostfile
machinefile = open(fileName, "r")
for line in machinefile:
if not line or line[0] == '#':
continue
line = re.split('#', line)[0]
line = line.strip()
if not line:
continue
if not pifhn.match (line):
# should not have --ifhn option
if ifhn != None:
print "host file %s not valid for --ifhn" % (fileName)
print "host file should contain 'hostname ifhn=ip_address'"
sys.exit(-1)
host = re.split(r'\s+',line)[0]
if cmp (localHost, host) == 0 \
or cmp(localIp, gethostbyname(host))== 0:
continue
hostTab[host] = None
else:
# multiple blaunch-es
cols = re.split(r'\s+\ifhn=',line)
host = cols[0]
ip = cols[1]
if cmp (localHost, host) == 0 \
or cmp(localIp, gethostbyname(host))== 0:
continue
hostTab[host] = ip
nHosts += 1
#print "line: %s" % (line)
machinefile.close()
except IOError, err:
print str(err)
exit (-1)
# launch an mpd on localhost
if ifhn != None:
cmd = mpdCmd + " --ifhn=%s " % (ifhn)
else:
cmd = mpdCmd
print "Starting an mpd on localhost:", cmd
Popen4(cmd, 0)
# wait til 5 seconds at max
while t1 < MAX_WAIT:
time.sleep (1)
trace = Popen4(mpdtraceCmd2, 0)
# hostname_portnumber (IP address)
line = trace.fromchild.readline()
if not p.match (line):
t1 += 1
continue
strings = re.split('\s+', line)
(basehost, baseport) = re.split('_', strings[0])
#print "host:", basehost, "port:", baseport
found = True
host=""
break
if not found:
print "Cannot start mpd on localhost"
sys.exit(-1)
else:
print "Done starting an mpd on localhost"
# launch mpd on the rest of hosts
if nHosts < 2:
sys.exit(0)
print "Constructing an mpd ring ..."
if ifhn != None:
for host, ip in hostTab.items():
#print "host : %s ifhn %s\n" % (host, ip)
cmd="%s %s %s -h %s -p %s --ifhn=%s" % (rshCmd, host, mpdCmd, basehost, baseport, ip)
#print "cmd:", cmd
Popen4(cmd, 0)
else:
for host, ip in hostTab.items():
#print "host : %s ifhn %s\n" % (host, ip)
hostArr.append(host + " ")
hostList = string.join(hostArr)
#print "hostList: %s" % (hostList)
cmd="%s -z \'%s\' %s -h %s -p %s" % (rshCmd, hostList, mpdCmd, basehost, baseport)
#print "cmd:", cmd
Popen4(cmd, 0)
# wait till all mpds are started
MAX_TIMEOUT = 300 + 0.1 * (nHosts)
t1 = 0
started = False
while t1 < MAX_TIMEOUT:
time.sleep (1)
trace = Popen4(mpdtraceCmd, 0)
if len(trace.fromchild.readlines()) < nHosts:
t1 += 1
continue
started = True
break
if not started:
print "Failed to construct an mpd ring"
exit (-1)
print "Done constructing an mpd ring at ", ctime()
def usage():
print __doc__
if __name__ == '__main__':
mpdboot()