monasca-agent/monasca_agent/collector/checks_d/vcenter.py

645 lines
28 KiB
Python

# (C) Copyright 2016-2017 Hewlett Packard Enterprise Development LP
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""VCenter Only.
Generic VCenter check. This check allows you to specify particular metrics that
you want from vCenter in your configuration.
"""
from monasca_agent.collector.checks import AgentCheck
from oslo_vmware import api
from oslo_vmware import vim_util
import traceback
CLUSTER_COMPUTE_PROPERTIES = ["name", "host", "datastore"]
CLUSTER_HOST_PROPERTIES = ['runtime.connectionState',
'runtime.inMaintenanceMode',
'summary.hardware.numCpuThreads',
'summary.hardware.memorySize',
'summary.hardware.cpuMhz']
CLUSTER_PERF_COUNTERS = [
"cpu.usagemhz.average",
"cpu.totalmhz.average",
"mem.usage.average",
"mem.consumed.average",
"mem.totalmb.average"]
STORAGE_VOLUME_PROPERTIES = ["host",
"summary.capacity",
"summary.freeSpace",
"summary.name",
"summary.multipleHostAccess",
"summary.accessible",
"summary.maintenanceMode",
"summary.type"]
CPU_TOTAL_MHZ = "vcenter.cpu.total_mhz"
CPU_USED_MHZ = "vcenter.cpu.used_mhz"
CPU_USED_PERCENT = "vcenter.cpu.used_perc"
CPU_TOTAL_LOGICAL_CORES = "vcenter.cpu.total_logical_cores"
MEMORY_TOTAL_MB = "vcenter.mem.total_mb"
MEMORY_USED_MB = "vcenter.mem.used_mb"
MEMORY_USED_PERCENT = "vcenter.mem.used_perc"
DISK_TOTAL_SPACE_MB = "vcenter.disk.total_space_mb"
DISK_TOTAL_USED_SPACE_MB = "vcenter.disk.total_used_space_mb"
DISK_TOTAL_USED_SPACE_PERCENT = "vcenter.disk.total_used_space_perc"
PERF_MANAGER_TYPE = "PerformanceManager"
PERF_RESOURCE = 'ClusterResource'
NUM_SAMPLES = 15
SAMPLE_RATE = 300
ALLOWED_DATASTORE_TYPES = ['VMFS', 'NFS']
class VCenterCheck(AgentCheck):
def __init__(self, name, init_config, agent_config):
AgentCheck.__init__(self, name, init_config, agent_config)
self._max_objects = 1000
self.session = None
self.is_new_session = True
self._resource_moid_dict = {}
def stop(self):
"""To be executed when the agent is being stopped to clean resources.
"""
if self.session is not None:
self.session.logout()
self.session = None
self.is_new_session = True
def _propset_dict(self, propset):
"""Turns a propset list into a dictionary
PropSet is an optional attribute on ObjectContent objects
that are returned by the VMware API.
:param propset: a property 'set' from ObjectContent
:return: dictionary representing property set
"""
if propset is None:
return {}
return {prop.name: prop.val for prop in propset}
def _get_api_session(self):
api_session = api.VMwareAPISession(
self.vcenter_ip,
self.user,
self.password,
3, # retry count
0.5, # task_poll_interval
port=self.port,
scheme="https")
return api_session
def _find_entity_mor(self, entity_list, entity_name):
for object_contents in entity_list:
for obj_content in object_contents[1]:
for dyn_props in obj_content.propSet:
if dyn_props.val == entity_name.decode('utf-8'):
return obj_content
def _get_resource_dict_by_name(self, resource_name):
"""Return moid from the specified resource"""
self._get_cluster_info_dict()
self.log.debug("Loaded MORs for clusters %s" %
str(self._resource_moid_dict.keys()))
return self._resource_moid_dict.get(resource_name)
def _build_resource_dict(self, object_content):
"""Method to build the resource dictionary. Cluster_name is the key,
and its host and datastore names are the values
{'cluster_name': {
'moid': 'cluster1',
'host': ['h1', 'h2'],
'datastore': ['ds1', 'ds2']
}
}
"""
cluster_moid = str(object_content.obj.value)
# extract host and datastore names
host_names = []
datastore_names = []
dict_key = None
propSet = object_content.propSet
for dynamic_prop in propSet:
mor_array = dynamic_prop.val
if dynamic_prop.name == 'name':
dict_key = dynamic_prop.val
else:
for obj in mor_array:
for mor in obj[1]:
if dynamic_prop.name == 'datastore':
datastore_names.append(str(mor.value))
elif dynamic_prop.name == 'host':
host_names.append(str(mor.value))
if dict_key:
self._resource_moid_dict[str(dict_key)] = {
'moid': cluster_moid,
'host': host_names,
'datastore': datastore_names,
}
def _get_cluster_info_dict(self):
"""Load configured cluster moids from the vCenter"""
self.log.debug("Inside _get_cluster_info_dict")
if self.session is None:
self.session = self._get_api_session()
result = self.session.invoke_api(vim_util,
"get_objects",
self.session.vim,
"ClusterComputeResource",
self._max_objects,
CLUSTER_COMPUTE_PROPERTIES)
for name in self.clusters:
mor = self._find_entity_mor(result, name)
if mor:
self._build_resource_dict(mor)
def _get_sample(self, samples, counter_name, is_summation=False):
res = 0
num_samples = 0
for cn in samples:
if cn.startswith(counter_name):
vals = samples[cn]
if vals:
for val in vals:
i_val = int(val)
if i_val != -1:
res += i_val
num_samples += 1
if not is_summation and num_samples:
res /= num_samples
return res
def _get_shared_datastores(self, datastore_stats, managed_cluster):
"""Method to find the shared datastores associated with the cluster
Also handles cluster having single host with a non-shared datastore.
"""
shared_datastores_ids = []
res_dict = self._get_resource_dict_by_name(managed_cluster)
host_set = set(res_dict.get('host'))
for object_contents in datastore_stats:
for object_content in object_contents[1]:
ds_mor = object_content.obj.value
datastore_host_ids = []
if ds_mor in res_dict.get('datastore'):
propSets = object_content.propSet
for propSet in propSets:
if propSet.name == 'host':
ds_hm_array = propSet.val
host_mounts = ds_hm_array[0]
for host_mount in host_mounts:
datastore_host_ids.append(host_mount.key.value)
if host_set.issubset(set(datastore_host_ids)):
shared_datastores_ids.append(ds_mor)
self.log.debug("Cluster host list ==" + str(host_set))
self.log.debug("Datastore host list ==" +
str(datastore_host_ids))
self.log.debug("shared_datastores_ids==" +
str(shared_datastores_ids))
return shared_datastores_ids
def _is_valid_datastore(self, propSets):
propdict = self._propset_dict(propSets)
return (propdict.get('summary.accessible') and
(propdict.get('summary.maintenanceMode') is None or
propdict.get('summary.maintenanceMode') == 'normal') and
propdict['summary.type'] in ALLOWED_DATASTORE_TYPES)
def _process_storage_data(self, datastore_stats, managed_cluster):
shared_ds = self._get_shared_datastores(datastore_stats,
managed_cluster)
capacity = 0
freeSpace = 0
self.log.info("Polling for the datastores: " + str(shared_ds))
for object_contents in datastore_stats:
for object_content in object_contents[1]:
ds_mor = object_content.obj.value
if ds_mor in shared_ds:
propSets = object_content.propSet
if self._is_valid_datastore(propSets):
for propSet in propSets:
if propSet.name == 'summary.capacity':
self.log.debug("Calculating capacity "
"of datastore: %s in cluster: "
"%s" %
(ds_mor, managed_cluster))
capacity += int(propSet.val)
elif propSet.name == 'summary.freeSpace':
self.log.debug("Calculating freeSpace of "
"datastore: %s in cluster: %s"
% (ds_mor, managed_cluster))
freeSpace += int(propSet.val)
usedSpace = capacity - freeSpace
self.log.debug("Total capacity:" + str(capacity) +
" used:" + str(usedSpace) + " free:" + str(freeSpace))
return (capacity, usedSpace)
def _get_properties_for_a_collection_of_objects(self, vim, type, obj_list,
properties):
"""Gets the list of properties for the collection of
objects of the type specified.
"""
client_factory = vim.client.factory
if len(obj_list) == 0:
return []
prop_spec = vim_util.build_property_spec(client_factory,
type,
properties)
lst_obj_specs = []
for obj in obj_list:
lst_obj_specs.append(vim_util.build_object_spec(
client_factory,
obj, []))
prop_filter_spec = vim_util.build_property_filter_spec(client_factory,
[prop_spec],
lst_obj_specs)
options = client_factory.create('ns0:RetrieveOptions')
options.maxObjects = self._max_objects
return vim.RetrievePropertiesEx(
vim.service_content.propertyCollector,
specSet=[prop_filter_spec],
options=options)
def _is_valid_host(self, propSets):
propdict = self._propset_dict(propSets)
return ((propdict.get('runtime.inMaintenanceMode') is not True) and
propdict.get('runtime.connectionState') == 'connected')
def _process_host_data(self, host_stats, managed_cluster):
shared_hosts = self._find_entity_mor(host_stats, managed_cluster)
cluster_hosts_data = {'numCpuThreads': 0,
'memorySizeMb': 0,
'cpuMhz': 0}
self.log.debug("Polling for the hosts: %s" % (str(shared_hosts)))
if shared_hosts:
propSets = shared_hosts.propSet
for propSet in propSets:
if propSet.name == 'host':
host_ret = propSet.val
host_mors = host_ret.ManagedObjectReference
if host_mors:
method_call = '_get_properties_for_a_collection_of_objects'
result = self.session.invoke_api(self,
method_call,
self.session.vim,
"HostSystem",
host_mors,
CLUSTER_HOST_PROPERTIES)
for obj_contents in result:
for obj_content in obj_contents[1]:
propSets = obj_content.propSet
if self._is_valid_host(propSets):
for propSet in propSets:
if propSet.name == "summary.hardware.numCpuThreads":
cluster_hosts_data['numCpuThreads'] += propSet.val
elif propSet.name == 'summary.hardware.memorySize':
cluster_hosts_data['memorySizeMb'] += int(
(propSet.val) / (1024 * 1024))
elif propSet.name == 'summary.hardware.cpuMhz':
cluster_hosts_data['cpuMhz'] += propSet.val
return cluster_hosts_data
def check(self, instance):
try:
if self.is_new_session:
self.instance = instance
self.vcenter_ip = instance.get('vcenter_ip', None)
self.user = instance.get('username', None)
self.password = instance.get('password', None)
self.port = instance.get('port', 443)
self.clusters = instance.get('clusters', None)
if not self.vcenter_ip:
self.log.warn("vCenter not configured")
return
if not self.clusters:
self.log.warn("No clusters configured to monitor")
return
self.session = self._get_api_session()
self.vc_uuid = (self.session.vim.service_content.
about.instanceUuid)
self._ops = VcenterOperations(self.session,
self._max_objects,
self.log)
self._ops._properties_updated_event()
self.perf_query_specs = self._ops._get_perf_query_spec(
CLUSTER_PERF_COUNTERS,
SAMPLE_RATE,
NUM_SAMPLES)
self.is_new_session = False
for managed_cluster in self.clusters:
try:
res_dict = self._get_resource_dict_by_name(managed_cluster)
if not res_dict:
self.log.error("Configured cluster not found "
"in vCenter")
continue
self.log.debug("=" * 30 + " Start of cycle" + "=" * 30)
self.log.debug("Collecting for cluster: %s (%s)" %
(managed_cluster,
str(res_dict.get('moid'))))
cluster_samples = self._ops.\
_get_perf_stats(res_dict,
self.perf_query_specs)
if not cluster_samples:
self.log.error("Activated cluster not found in"
" vCenter")
continue
host_stats = self._ops._get_host_stats_from_cluster()
cluster_host_stats = self._process_host_data(
host_stats,
managed_cluster)
cpu_total_logical_cores = cluster_host_stats.get(
'numCpuThreads')
cpu_usage = self._get_sample(cluster_samples,
'cpu.usagemhz.average')
cpu_total = self._get_sample(cluster_samples,
'cpu.totalmhz.average')
cpu_usage_percent = float(0.0)
if cpu_total > 0:
cpu_usage_percent = round(
(float(cpu_usage) / cpu_total) * 100, 2)
# Considering host level stats for RAM
mem_mb_total = cluster_host_stats.get('memorySizeMb')
mem_kb_consumed = self._get_sample(cluster_samples,
'mem.consumed.average')
# convert this to MB
mem_mb_consumed = mem_kb_consumed / 1024
mem_used_percent = float(0.0)
if mem_mb_total > 0:
mem_used_percent = round(
(float(mem_mb_consumed) / mem_mb_total) * 100, 2)
datastore_stats = self._ops._read_storage_data()
(capacity_bytes, usedSpace_bytes) = \
self._process_storage_data(
datastore_stats,
managed_cluster)
storage_used_percent = float(0.0)
if capacity_bytes > 0:
storage_used_percent = round(
float(usedSpace_bytes) / capacity_bytes, 2) * 100
capacity_mb = capacity_bytes / (1024 * 1024)
usedSpace_mb = usedSpace_bytes / (1024 * 1024)
self.log.debug(managed_cluster +
", CPU used (avg MHz): %s" % str(cpu_usage))
self.log.debug(managed_cluster +
", CPU total (avg MHz): %s" %
str(cpu_total))
self.log.debug(managed_cluster +
", Memory total (MB): %s" %
str(mem_mb_total))
self.log.debug(managed_cluster +
", Memory consumed (MB): %s" %
str(mem_mb_consumed))
self.log.debug(managed_cluster +
", Storage total (MB): %s" %
str(capacity_mb))
self.log.debug(managed_cluster +
", Storage used (MB): %s" %
str(usedSpace_mb))
self.log.debug(managed_cluster +
", CPU cores : %s" %
str(cpu_total_logical_cores))
self.log.debug(managed_cluster +
", CPU cores: %s" %
str(cpu_total_logical_cores) +
", CPU usage percent: %s" %
str(cpu_usage_percent) +
", Memory usage percent: %s" %
str(mem_used_percent) +
", Storage usage percent: %s" %
str(storage_used_percent))
data = {
CPU_TOTAL_MHZ: cpu_total,
CPU_USED_MHZ: cpu_usage,
CPU_USED_PERCENT: cpu_usage_percent,
CPU_TOTAL_LOGICAL_CORES: cpu_total_logical_cores,
MEMORY_TOTAL_MB: mem_mb_total,
MEMORY_USED_MB: mem_mb_consumed,
MEMORY_USED_PERCENT: mem_used_percent,
DISK_TOTAL_SPACE_MB: capacity_mb,
DISK_TOTAL_USED_SPACE_MB: usedSpace_mb,
DISK_TOTAL_USED_SPACE_PERCENT: storage_used_percent
}
self._set_metrics(managed_cluster,
data,
res_dict.get("moid"))
self.log.debug("=" * 30 + " End of cycle" + "=" * 30)
except Exception:
self.log.error(traceback.format_exc())
self.log.error("Exception occurred while polling for %s"
% managed_cluster)
except Exception as e:
self.is_new_session = False
self.log.error("Exception: %s" % str(e))
try:
self.session.close()
except Exception:
self.log.error(traceback.format_exc())
raise e
def _set_metrics(self, managed_cluster, data, mor_id):
dimensions = self._get_dims(managed_cluster, mor_id)
for key in data.keys():
self.gauge(key, data.get(key), dimensions=dimensions)
self.log.debug("Post metric data for %s, %s: %d" %
(managed_cluster, key, data.get(key)))
def _get_dims(self, cluster, mor_id):
cluster_id = mor_id + "." + self.vc_uuid
local_dimensions = {
"vcenter_ip": self.vcenter_ip,
"esx_cluster_id": cluster_id
}
final_dimensions = self._set_dimensions(local_dimensions,
self.instance)
return final_dimensions
@staticmethod
def parse_agent_config(agentConfig):
if not agentConfig.get('vcenter_ip'):
return False
vcenter_ip = agentConfig.get('vcenter_ip')
user = agentConfig.get('username')
paswd = agentConfig.get('password')
config = {
'instances': [{
'vcenter_ip': vcenter_ip,
'username': user,
'password': paswd,
}]
}
return config
class VcenterOperations(object):
"""Class to invoke vCenter APIs calls.
vCenter APIs calls are required by various pollsters, collecting data from
VMware infrastructure.
"""
def __init__(self, session, max_objects, logger):
self.session = session
self.max_objects = max_objects
self.log = logger
self.counters = {}
def _get_perf_counters(self):
try:
self.log.info("Loading VmWare performance counters")
perf_counters = []
property_dict = {'perf_counter': ['perfCounter']}
prop_collector = self.session.vim.service_content.propertyCollector
self.client_factory = self.session.vim.client.factory
perf_manager = self.session.vim.service_content.perfManager
options = self.client_factory.create('ns0:RetrieveOptions')
options.maxObjects = 1
prop_spec = vim_util.build_property_spec(
self.client_factory,
PERF_MANAGER_TYPE,
property_dict.get('perf_counter'))
obj_spec = vim_util.build_object_spec(
self.client_factory,
perf_manager,
None)
filter_spec = vim_util.build_property_filter_spec(
self.client_factory, [prop_spec], [obj_spec])
object_contents = self.session.invoke_api(
self.session.vim,
"RetrievePropertiesEx",
prop_collector,
specSet=[filter_spec],
options=options)
if object_contents is not None:
for object_content in object_contents:
dynamic_properties = object_content[1][0].propSet
for dynamic_property in dynamic_properties:
perf_counters = dynamic_property.val.PerfCounterInfo
return perf_counters
except Exception as ex:
self.log.error("Exception in _get_perf_counters: %s"
% str(ex.message))
def _properties_updated_event(self):
perfCounters = self._get_perf_counters()
if perfCounters is None:
return
for perfCounter in perfCounters:
counter_id = perfCounter.key
counter_group = perfCounter.groupInfo.key
counter_name = perfCounter.nameInfo.key
counter_rollup_type = str(perfCounter.rollupType)
full_counter_name = counter_group + "." + \
counter_name + "." + counter_rollup_type
if (full_counter_name in CLUSTER_PERF_COUNTERS):
self.counters[full_counter_name] = perfCounter
self.counters[counter_id] = full_counter_name
def _get_perf_counter_ids(self, counter_names):
res = []
for counter_name in counter_names:
perf_counter_info = self.counters.get(counter_name)
if perf_counter_info is not None:
res.append(perf_counter_info.key)
return res
def _get_perf_query_spec(self,
counter_names,
sample_rate,
num_samples):
counter_ids = self._get_perf_counter_ids(counter_names)
perf_metric_ids = []
for cid in counter_ids:
metric = self.client_factory.create('ns0:PerfMetricId')
metric.counterId = cid
metric.instance = '*'
perf_metric_ids.append(metric)
return perf_metric_ids
def _get_perf_stats(self, entity, perf_metric_ids):
perf_query_spec = self.client_factory.create('ns0:PerfQuerySpec')
perf_query_spec.entity = entity.get('moid')
perf_query_spec.metricId = perf_metric_ids
perf_query_spec.intervalId = SAMPLE_RATE
perf_query_spec.maxSample = NUM_SAMPLES
perf_manager = self.session.vim.service_content.perfManager
perf_entity_metric_base = self.session.invoke_api(
self.session.vim,
'QueryPerf',
perf_manager,
querySpec=[perf_query_spec])
perf_result = {}
if perf_entity_metric_base:
for i in range(0, len(perf_entity_metric_base)):
perf_entity_metric_csv = perf_entity_metric_base[i]
perf_metric_series_csvs = []
perf_metric_series_csvs = perf_entity_metric_csv.value
if perf_metric_series_csvs is None or \
len(perf_metric_series_csvs) == 0:
continue
for j in range(0, len(perf_metric_series_csvs)):
perf_metric_series_csv = perf_metric_series_csvs[j]
name = self.counters[perf_metric_series_csv.id.counterId]
instance = perf_metric_series_csv.id.instance
if (instance is not None and
len(instance) > 0 and
instance is not "*"):
name += "." + instance
perf_result[name] = perf_metric_series_csv.value
return perf_result
def _read_storage_data(self):
datastore_stats = self.session.invoke_api(
vim_util,
"get_objects",
self.session.vim,
"Datastore",
self.max_objects,
STORAGE_VOLUME_PROPERTIES
)
return datastore_stats
def _get_host_stats_from_cluster(self):
prop_dict = self.session.invoke_api(vim_util,
"get_objects",
self.session.vim,
"ClusterComputeResource",
self.max_objects,
['host', 'name'])
return prop_dict