#!/usr/bin/python # -*- coding: latin-1 -*- # # check_cpq # script di verifica tramite le MIB HP dello stato di saluta dei server # # Version: 1.0 - angeloxx@angeloxx.it # Prima stesura # Version: 1.1 - angeloxx@angeloxx.it # Aggiunta della funzionalità "HEALTH" # # Query results can be: # 1 -> other Status detection is not supported by this system or driver. # 2 -> ok Device is operating properly # 3 -> degraded A redundant device is not operating properly. # 4 -> failed A non-redundant device is not operating properly. # # Device FAN,PSU,TEMP starts from 1 to X # Device CPU,LDEV,PDEV starts from 0 to X # import sys, threading, StringIO from optparse import OptionParser from pysnmp import asn1, v1, v2c, role NAGIOS_OK = 0 NAGIOS_WARNING = 1 NAGIOS_CRITICAL = 2 NAGIOS_UNKNOWN = 3 uso = "%prog [-c community] -s host -d (FAN|PSU|TEMP|LDEV|PDEV|CPU|HEALTH)" parser = OptionParser(uso) parser.add_option("-c", "--community", dest="community", help="SNMP community", default="public") parser.add_option("-s", "--server", dest="host", help="remote host", default='') parser.add_option("-d", "--device", dest="device", help="monitored device", default='PSU') (options, args) = parser.parse_args() if not options.host: parser.error("option host is mandatory") sys.exit(NAGIOS_UNKNOWN) #if options.device == 'HEALTH': # CPQHLTH-MIB::cpqHeEventLogEntrySeverity.0 = INTEGER: informational(2) # CPQHLTH-MIB::cpqHeEventLogErrorDesc.7 = STRING: "System Power Supplies Not Redundant" qry = '.1.3.6.1.4.1.232.6.2.11.3.1.2.' deviceid = 0 ishealth = True if options.device == 'PSU': # CPQHLTH-MIB::cpqHeFltTolPowerSupplyCondition.0.X = INTEGER: ok(2) qry = '.1.3.6.1.4.1.232.6.2.9.3.1.4.0.' deviceid = 1 ishealth = False if options.device == 'TEMP': # CPQHLTH-MIB::cpqHeTemperatureCondition.1.X = INTEGER: ok(2) qry = '.1.3.6.1.4.1.232.6.2.6.8.1.6.1.' deviceid = 1 ishealth = False if options.device == 'FAN': # CPQHLTH-MIB::cpqHeFltTolFanCondition.1.X = INTEGER: ok(2) qry = '.1.3.6.1.4.1.232.6.2.6.7.1.9.1.' deviceid = 1 ishealth = False if options.device == 'CPU': # CPQSTDEQ-MIB::cpqSeCpuStatus.X = INTEGER: ok(2) qry = '.1.3.6.1.4.1.232.1.2.2.1.1.6.' deviceid = 0 ishealth = False if options.device == 'LDEV': # CPQIDA-MIB::cpqDaLogDrvStatus.1.X = INTEGER: ok(2) qry = '.1.3.6.1.4.1.232.3.2.3.1.1.4.1.' deviceid = 0 ishealth = False if options.device == 'PDEV': # CPQIDA-MIB::cpqDaPhyDrvStatus.1.X = INTEGER: ok(2) qry = '1.3.6.1.4.1.232.3.2.5.1.1.6.1.' deviceid = 0 ishealth = False client = role.manager((options.host, 161)) req = eval('v1').GETREQUEST() rsp = eval('v1').GETRESPONSE() # Cache degli stati statuses = "" status = NAGIOS_OK response = "OK" try: walk = True while (walk): (answer, src) = client.send_and_receive(req.encode(community=options.community, encoded_oids=map(asn1.OBJECTID().encode, [qry + str(deviceid)]))) # Decode SNMP response rsp.decode(answer) vals = map(lambda x: x[0](), map(asn1.decode, rsp['encoded_vals'])) if not ishealth: if vals[0] == 1: statuses = "%s [%s is %s]" % (statuses, deviceid, 'unknown') elif vals[0] == 2: statuses = "%s [%s is %s]" % (statuses, deviceid, 'ok') elif vals[0] == 3: statuses = "%s [%s is %s]" % (statuses, deviceid, 'degraded') # cambia lo stato, sempre che sia OK e non peggio if (status == NAGIOS_OK): status = NAGIOS_WARNING response = "WARN" elif vals[0] == 4: statuses = "%s [%s is %s]" % (statuses, deviceid, 'failed') status = NAGIOS_CRITICAL response = "CRIT" if rsp['error_status'] and deviceid >10: # fine della trottata, il walk è terminato perché è stato restituito penso '' # a meno che il deviceid sia simbolicamente inferiore a 10 perché per i dischi # pare che gli oggetti dei dischi non presenti non vengano restituiti (era meglio un unknown) # quindi... non è detto che la cavalcata abbia veramente fine walk = False else: # Possibili valori: # informational (2) # infoWithAlert (3) # repaired (6) # caution (9) # critical (15) # se i valori sono 9 o 15 allora preoccupiamoci e leggiamo da # .1.3.6.1.4.1.232.6.2.11.3.1.8.X la descrizione dell'errore if rsp['error_status'] and deviceid > 0: walk = False if vals[0] == 9 or vals[0] == 15: (answer, src) = client.send_and_receive(req.encode(community=options.community, encoded_oids=map(asn1.OBJECTID().encode, ['.1.3.6.1.4.1.232.6.2.11.3.1.8.' + str(deviceid)]))) rsp.decode(answer) desc = map(lambda x: x[0](), map(asn1.decode, rsp['encoded_vals'])) statuses = "%s [%s]" % (statuses, desc[0]) status = NAGIOS_CRITICAL response = "CRIT" deviceid = deviceid + 1 except role.NetworkError: print "KO: Unable to contact or resolve name of remote HP server (%s)" % options.host sys.exit(NAGIOS_CRITICAL) if statuses == "": statuses = "[no additional information]" print "%s: %s status is %s" % (response,options.host,statuses) sys.exit(status)