afsmon/afsmon/__init__.py

241 lines
8.6 KiB
Python

#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections
import logging
import io
import sys
import subprocess
import re
from datetime import datetime
from enum import Enum
from prettytable import PrettyTable
logger = logging.getLogger("afsmon")
#
# Fileserver
#
class FileServerStatus(Enum):
NORMAL = 0
TEMPORARY_DISABLED = 1
DISABLED = 2
UNKNOWN = 3
NO_CONNECTION = 4
Partition = collections.namedtuple(
'Partition', 'partition, used, free, total, percent_used')
Volume = collections.namedtuple(
'Voume', 'volume, id, perms, used, quota, percent_used')
class FileServerStats:
'''AFS fileserver status
Call ``get_stats()`` to populate the statistics for the server.
Note most attributes are only set if ``status`` is NORMAL
Args:
hostname (str): The hostname of server to query
i.e. argument to ``-server`` for cmd line tools
Attributes:
status (FileServerStatus): enum of possible status
timestamp(:obj:`datetime.datetime`): time statistics retrieved
restart (:obj:`datetime.datetime`): time of last restart
uptime (:obj:`datetime.timedelta`): current uptime
partitions (:obj:`list`): list of :obj:`Partition` tuples for each
partition on the server
calls_waiting (:obj:`int`): number of calls waiting for a thread
idle_threads (:obj:`int`): number of currently idle threads
volumes (:obj:`list`): list of :obj:`Volume` tuples for each
volume present on the server
table (:obj:`PrettyTable`): a printable PrettyTable representation
'''
def _get_volumes(self):
cmd = ["vos", "listvol", "-long", "-server", self.hostname]
logger.debug("Running: %s" % cmd)
output = subprocess.check_output(
cmd, stderr=subprocess.STDOUT).decode('ascii')
# Matching:
# mirror.yum-puppetlabs.readonly 536871036 RO 63026403 K On-line
vol_regex = re.compile(
'^(?P<vol>[^\s]+)\s+(?P<id>\d+)\s(?P<perms>R[OW])\s+(?P<used>\d+) K'
)
# Read the output into chunks where each chunk is the info for
# one volume.
chunks = []
lines = io.StringIO(output)
while True:
line = lines.readline()
if not line:
break
chunk = ''
if "On-line" in line: # chunks start with this
chunk += line
# read in the next 9 lines of status
for i in range(8):
chunk += lines.readline()
# convert it to a Volume()
# todo: there's a bunch more we could extract...
m = vol_regex.search(chunk)
q = re.search('MaxQuota\s+(?P<quota>\d+) K', chunk)
used = int(m['used'])
quota = int(q['quota'])
percent_used = round(float(used) / float(quota) * 100, 2)
self.volumes.append(
Volume(m['vol'], m['id'], m['perms'],
used, quota, percent_used))
def _get_calls_waiting(self):
cmd = ["rxdebug", self.hostname, "7000", "-rxstats", "-noconns"]
logger.debug("Running: %s" % cmd)
output = subprocess.check_output(
cmd, stderr=subprocess.STDOUT).decode('ascii')
for line in output.split('\n'):
m = re.search('(?P<waiting>\d+) calls waiting for a thread', line)
if m:
self.calls_waiting = int(m['waiting'])
m = re.search('(?P<idle>\d+) threads are idle', line)
if m:
self.idle_threads = int(m['idle'])
def _get_partition_stats(self):
cmd = ["vos", "partinfo", self.hostname, "-noauth"]
logger.debug("Running: %s" % cmd)
output = subprocess.check_output(
cmd, stderr=subprocess.STDOUT).decode('ascii')
for line in output.split('\n'):
m = re.search(
'Free space on partition '
'/vicep(?P<partition>[a-z][a-z]?): '
'(?P<free>\d+) K blocks out of total (?P<total>\d+)', line)
if m:
part = 'vicep%s' % m['partition']
# (used, free, total, %age)
used = int(m['total']) - int(m['free'])
self.partitions.append(
Partition(part, used, int(m['free']), int(m['total']),
round(float(used) / float(m['total']) * 100, 2)))
def _get_fs_stats(self):
cmd = ["bos", "status", self.hostname, "-long", "-noauth"]
logger.debug("Running: %s" % cmd)
try:
output = subprocess.check_output(
cmd, stderr=subprocess.STDOUT).decode('ascii')
except subprocess.CalledProcessError:
logger.debug(" ... failed!")
self.status = FileServerStatus.NO_CONNECTION
return
if re.search('currently running normally', output):
self.status = FileServerStatus.NORMAL
m = re.search(
r'last started at (?P<date>\w+ \w+ \w+ \d+:\d+:\d+ \d+)',
output)
self.restart = datetime.strptime(m['date'], '%a %b %d %H:%M:%S %Y')
self.uptime = self.timestamp - self.restart
elif re.search('temporarily disabled, currently shutdown', output):
self.status = FileServerStatus.TEMPORARILY_DISABLED
elif re.search('disabled, currently shutdown', output):
self.status = FileServerStatus.DISABLED
else:
logger.debug(output)
self.status = FileServerStatus.UNKNOWN
def get_stats(self):
'''Get the complete stats set for the fileserver'''
self.timestamp = datetime.now()
self._get_fs_stats()
if self.status == FileServerStatus.NORMAL:
self._get_partition_stats()
self._get_calls_waiting()
self._get_volumes()
self.table = PrettyTable()
self.table.field_names = ["Metric", "Value"]
self.table.align["Metric"] = "l"
self.table.align["Value"] = "l"
self.table.add_row(["Hostname", self.hostname])
self.table.add_row(["Timestamp", self.timestamp])
self.table.add_row(["Status", self.status])
self.table.add_row(["Uptime", self.uptime])
self.table.add_row(["Last Restart", self.restart])
self.table.add_row(["Calls Waiting", self.calls_waiting])
self.table.add_row(["Idle Threads", self.idle_threads])
for p in self.partitions:
n = "/%s" % p.partition
self.table.add_row(["%s used" % n, p.used])
self.table.add_row(["%s free" % n, p.free])
self.table.add_row(["%s total" % n, p.total])
self.table.add_row(["%s %%used" % n,
"%s%%" % p.percent_used])
for v in self.volumes:
# Only add the RW volumes to the table as for now we're
# mostly just worried about viewing the quota.
if v.perms == 'RW':
n = v.volume
self.table.add_row(["%s used" % n, v.used])
self.table.add_row(["%s quota" % n, v.quota])
self.table.add_row(["%s %%used" % n,
"%s%%" % v.percent_used])
def __str__(self):
return str(self.table)
def __init__(self, hostname):
self.hostname = hostname
self.timestamp = None
self.restart = None
self.uptime = None
self.partitions = []
self.volumes = []
self.calls_waiting = None
self.idle_threads = None
def get_fs_addresses(cell):
'''Get the fileservers associated with a cell
:arg str cell: The cell (e.g. ``openstack.org``)
:returns: list of fileservers for the cell
'''
fs = []
cmd = ["vos", "listaddrs", "-noauth", "-cell", cell]
logger.debug("Running: %s" % cmd)
try:
output = subprocess.check_output(
cmd, stderr=subprocess.STDOUT).decode('ascii')
except subprocess.CalledProcessError:
logger.debug(" ... failed!")
return []
for line in output.split('\n'):
if line.strip():
fs.append(line)
return fs