This one started off quite simply and turned into a monster by the gradual addition of features. We use it to reboot and power up or down our various development machines. Here’s what it does:
- uses native SNMP to talk to APC networked power bars, turning machines on/off or rebooting them
- alternatively/also uses ipmitool to use the IPMI management controller to do the same thing directly at the machine
- finally, also talks to a conserver to make sure that it doesn’t do something nasty to a machine that someone is currently using
It probably doesn’t make much sense outside ETH in its current form, but there are bits of code in there that might prove useful. An obvious first step might be factoring them out into modules (and moving the configuration out of the program!).
1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 4 # Andrew Baumann andrewb@inf.ethz.ch, 2008/02/27 2008/11/17 2009/07/02 5 6 import socket, popen2, sys, os, time 7 from pysnmp.entity.rfc3413.oneliner import cmdgen 8 from pysnmp.proto import rfc1902 9 from optparse import OptionParser 10 11 # delay in seconds to wait before querying IPMI status after sending a command 12 IPMI_DELAY = 4 13 14 # enable verbose/debug output 15 debug_enable = False 16 17 # command constants 18 class commands: 19 ON = 1 20 OFF = 2 21 RESET = 3 22 23 class config: 24 community = cmdgen.CommunityData('my-agent', 'private', 0) 25 26 snmp_port = socket.getservbyname('snmp', 'udp') 27 power1 = ('power1', snmp_port) 28 power2 = ('power2', snmp_port) 29 30 ports = { 31 # host/console: [(powerbar, outlet)] 32 'nos1': [(power1, 16)], 33 'nos2': [(power2, 16)], 34 'nos3': [(power1, 15)], 35 'nos4': [(power2, 15)], 36 'nos5': [(power1, 14)], 37 'nos6': [(power2, 14)], 38 'gruyere': [(power1,10), (power1,11), (power1,12), (power1,13)], 39 'sbrinz1': [(power2, 7), (power2, 8)], 40 'sbrinz2': [(power2, 9), (power2, 10)], 41 } 42 43 ipmi = { 44 # host/console: (ipmi-host, user, password) 45 'gruyere': ('gruyere-mgmt', 'foo', 'bar'), 46 'sbrinz1': ('sbrinz1-mgmt', 'foo', 'bar'), 47 'sbrinz2': ('sbrinz2-mgmt', 'foo', 'bar'), 48 } 49 50 # print a message if debug is enabled 51 def debug(msg): 52 if debug_enable: 53 print os.path.basename(sys.argv[0]) + : + msg 54 55 # run a process and capture its output 56 def runcmd(cmdline): 57 child = popen2.Popen3(cmdline, True) 58 out = child.fromchild.readlines() 59 err = child.childerr.readlines() 60 61 ret = child.wait() 62 if ret != 0 or err != []: 63 msg = '%s' exited %d % (cmdline, ret) 64 if err != []: 65 msg = msg + ', stderr follows:\n' + '\n'.join(err) 66 debug(msg) 67 68 # only raise an exception if they returned non-zero 69 if ret != 0: 70 raise Exception(msg) 71 72 return out 73 74 class conserver_client: 75 def __init__(self): 76 self.consoles = self.__getstate() 77 78 def who_owns(self, consolename): 79 return self.consoles.get(consolename) 80 81 def __getstate(self): 82 ret = {} 83 for line in runcmd('console -i'): 84 parts = line.strip().split(':') 85 conname, child, contype, details, users, state = parts[:6] 86 ret[conname] = None 87 if users: 88 for userinfo in users.split(','): 89 mode, username, host, port = userinfo.split('@')[:4] 90 if 'w' in mode: 91 ret[conname] = username 92 return ret 93 94 class ipmi_client: 95 command_map = { 96 commands.ON: 'on', 97 commands.OFF: 'off', 98 commands.RESET: 'reset', 99 } 100 101 def _mkcmd(self, (host, user, password), cmd): 102 return 'ipmitool -H %s -U %s -P %s power %s' % (host, user, password, cmd) 103 104 def get(self, controller): 105 return runcmd(self._mkcmd(controller, 'status'))[0].split()[-1] 106 107 def set(self, controller, cmd): 108 runcmd(self._mkcmd(controller, self.command_map[cmd])) 109 110 class apc_control: 111 def __init__(self): 112 self.cg = cmdgen.CommandGenerator() 113 114 port_control_oid = (1,3,6,1,4,1,318,1,1,12,3,3,1,1,4) 115 116 def state_to_string(self, state): 117 try: 118 return ['on', 'off', 'rebooting'][state - 1] 119 except: 120 raise Exception('Invalid state %d' % state) 121 122 command_map = { 123 commands.ON: 1, # immediateOn 124 commands.OFF: 2, # immediateOff 125 commands.RESET: 3, # immediateReboot 126 #: 4, # delayedOn 127 #: 5, # delayedOff 128 #: 6, # delayedReboot 129 #: 7, # cancelPendingCommand 130 } 131 132 def get(self, (dst, portnum)): 133 # construct a get request 134 target = cmdgen.UdpTransportTarget(dst) 135 oid = self.port_control_oid + (portnum,) 136 137 ret = self.cg.getCmd(config.community, target, oid) 138 errorIndication, errorStatus, errorIndex, varBinds = ret 139 140 assert(not (errorIndication or errorStatus)) 141 142 try: 143 [(obj, retval)] = varBinds 144 assert(obj == oid) 145 assert(retval is not None) 146 except: 147 raise Exception(unexpected data returned from SNMP command) 148 return self.state_to_string(int(retval)) 149 150 def set(self, (dst, portnum), cmd): 151 # construct a set request 152 target = cmdgen.UdpTransportTarget(dst) 153 oid = self.port_control_oid + (portnum,) 154 val = rfc1902.Integer32(self.command_map[cmd]) 155 156 ret = self.cg.setCmd(config.community, target, (oid, val)) 157 errorIndication, errorStatus, errorIndex, varBinds = ret 158 159 assert(not (errorIndication or errorStatus)) 160 161 try: 162 [(obj, retval)] = varBinds 163 assert(retval == val) 164 except: 165 raise Exception(unexpected data returned from SNMP command) 166 167 def parse_args(): 168 p = OptionParser(usage='%prog [options] [victim]', 169 description='APC powerbar / IPMI control utility') 170 171 p.add_option('-u', action='store_const', dest='cmd', const=commands.ON, 172 help='switch outlet on') 173 p.add_option('-d', action='store_const', dest='cmd', const=commands.OFF, 174 help='switch outlet off') 175 p.add_option('-r', action='store_const', dest='cmd', const=commands.RESET, 176 help='power cycle (reboot) if already on, switch outlet on if off') 177 p.add_option('-i', action='store_false', dest='ipmi', default=True, 178 help=don't use IPMI, force use of the power bar) 179 p.add_option('-v', action='store_true', dest='verbose', default=False, 180 help=verbose output) 181 p.set_defaults(cmd=None) 182 183 options, args = p.parse_args() 184 if len(args) == 0: 185 victim = None 186 elif len(args) == 1: 187 victim = args[0] 188 if not (config.ports.has_key(victim) or config.ipmi.has_key(victim)): 189 p.error('unknown victim %s' % victim) 190 else: 191 p.error('more than one victim specified') 192 if options.cmd is not None and victim is None: 193 p.error('no victim specified for command') 194 return victim, options 195 196 def main(): 197 victim, options = parse_args() 198 global debug_enable 199 debug_enable = options.verbose 200 201 apc = apc_control() 202 c = conserver_client() 203 i = ipmi_client() 204 205 if options.cmd: 206 # check for console ownership 207 owner = c.who_owns(victim) 208 if owner and owner != os.environ['LOGNAME']: 209 sys.stderr.write( 210 Error: according to conserver %s currently owns this console\n 211 If you really need to do this, force them off first\n % owner) 212 return 1 213 214 apccfg = config.ports.get(victim) 215 ipmicfg = config.ipmi.get(victim) 216 217 # find status of powerbar 218 if apccfg: 219 apcstate = map(apc.get, apccfg) 220 else: 221 apcstate = None 222 223 if apcstate: 224 debug(current APC status: + .join(apcstate)) 225 226 # try to use IPMI if enabled and the port is switched on 227 if options.ipmi and ipmicfg and (apcstate is None or 'on' in apcstate): 228 # get current status 229 status = i.get(ipmicfg) 230 debug(using IPMI: current status is %s % status) 231 232 # if they asked for a reset but the outlet is off, turn it on 233 if status == 'off' and options.cmd == commands.RESET: 234 options.cmd = commands.ON 235 236 # do it 237 debug(sending IPMI %s command... % ipmi_client.command_map[options.cmd]) 238 i.set(ipmicfg, options.cmd) 239 240 # make sure it really happened 241 debug(waiting for %d seconds to check status % IPMI_DELAY) 242 time.sleep(IPMI_DELAY) 243 status = i.get(ipmicfg) 244 debug(IPMI status is now %s % status) 245 if ((options.cmd == commands.OFF and status == 'on') 246 or (options.cmd in [commands.ON, commands.RESET] and status == 'off')): 247 print Warning: IPMI status is still %s, trying again % status 248 if options.cmd == commands.RESET: 249 i.set(ipmicfg, commands.ON) 250 else: 251 i.set(ipmicfg, options.cmd) 252 253 else: 254 # use APC on every configured port 255 for p in apccfg: 256 debug(APC: port %d on %s % (p[1], p[0][0])) 257 apc.set(p, options.cmd) 258 259 else: 260 # print current status 261 if victim: 262 victims = [victim] 263 else: 264 victims = list(set(config.ports.keys() + config.ipmi.keys())) 265 victims.sort() 266 267 formatstr = %-10s %-15s %-4s %s 268 print formatstr % ('VICTIM', 'POWER', 'IPMI', 'OWNER') 269 for victim in victims: 270 owner = c.who_owns(victim) or 271 272 apccfg = config.ports.get(victim) 273 if apccfg: 274 apcstate = map(apc.get, apccfg) 275 else: 276 apcstate = [] 277 278 ipmicfg = config.ipmi.get(victim) 279 if options.ipmi and ipmicfg and ('on' in apcstate): 280 try: 281 ipmistate = i.get(ipmicfg) 282 except: 283 ipmistate = ERR 284 else: 285 ipmistate = 286 287 print formatstr % (victim, .join(apcstate), ipmistate, owner) 288 289 return 0 290 291 if __name__ == '__main__': 292 sys.exit(main())