# Copyright 2016 Hewlett Packard Enterprise Development Company LP # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. from pyspark.sql import SQLContext import datetime from monasca_transform.component import Component from monasca_transform.component.component_utils import ComponentUtils from monasca_transform.component.setter import SetterComponent from monasca_transform.transform.transform_utils import InstanceUsageUtils import json class RollupQuantityException(Exception): """Exception thrown when doing quantity rollup Attributes: value: string representing the error """ def __init__(self, value): self.value = value def __str__(self): return repr(self.value) class RollupQuantity(SetterComponent): @staticmethod def _supported_rollup_operations(): return ["sum", "max", "min", "avg"] @staticmethod def _is_valid_rollup_operation(operation): if operation in RollupQuantity._supported_rollup_operations(): return True else: return False @staticmethod def _rollup_quantity(instance_usage_df, transform_spec_df, setter_rollup_group_by_list, setter_rollup_operation): instance_usage_data_json_list = [] # check if operation is valid if not RollupQuantity.\ _is_valid_rollup_operation(setter_rollup_operation): raise RollupQuantityException( "Operation %s is not supported" % setter_rollup_operation) # call required operation on grouped data # e.g. sum, max, min, avg etc agg_operations_map = { "quantity": str(setter_rollup_operation), "firstrecord_timestamp_unix": "min", "lastrecord_timestamp_unix": "max", "record_count": "sum"} # do a group by grouped_data = instance_usage_df.groupBy( *setter_rollup_group_by_list) rollup_df = grouped_data.agg(agg_operations_map) for row in rollup_df.collect(): # first record timestamp earliest_record_timestamp_unix = getattr( row, "min(firstrecord_timestamp_unix)", Component.DEFAULT_UNAVAILABLE_VALUE) earliest_record_timestamp_string = \ datetime.datetime.fromtimestamp( earliest_record_timestamp_unix).strftime( '%Y-%m-%d %H:%M:%S') # last record_timestamp latest_record_timestamp_unix = getattr( row, "max(lastrecord_timestamp_unix)", Component.DEFAULT_UNAVAILABLE_VALUE) latest_record_timestamp_string = \ datetime.datetime.fromtimestamp( latest_record_timestamp_unix).strftime('%Y-%m-%d %H:%M:%S') # record count record_count = getattr(row, "sum(record_count)", 0.0) # quantity # get expression that will be used to select quantity # from rolled up data select_quant_str = "".join((setter_rollup_operation, "(quantity)")) quantity = getattr(row, select_quant_str, 0.0) # create a new instance usage dict instance_usage_dict = {"tenant_id": getattr(row, "tenant_id", "all"), "user_id": getattr(row, "user_id", "all"), "resource_uuid": getattr(row, "resource_uuid", "all"), "geolocation": getattr(row, "geolocation", "all"), "region": getattr(row, "region", "all"), "zone": getattr(row, "zone", "all"), "host": getattr(row, "host", "all"), "project_id": getattr(row, "tenant_id", "all"), "aggregated_metric_name": getattr(row, "aggregated_metric_name", "all"), "quantity": quantity, "firstrecord_timestamp_unix": earliest_record_timestamp_unix, "firstrecord_timestamp_string": earliest_record_timestamp_string, "lastrecord_timestamp_unix": latest_record_timestamp_unix, "lastrecord_timestamp_string": latest_record_timestamp_string, "record_count": record_count, "service_group": getattr(row, "service_group", "all"), "service_id": getattr(row, "service_id", "all"), "usage_date": getattr(row, "usage_date", "all"), "usage_hour": getattr(row, "usage_hour", "all"), "usage_minute": getattr(row, "usage_minute", "all"), "aggregation_period": getattr(row, "aggregation_period", "all") } instance_usage_data_json = json.dumps(instance_usage_dict) instance_usage_data_json_list.append(instance_usage_data_json) # convert to rdd spark_context = instance_usage_df.rdd.context return spark_context.parallelize(instance_usage_data_json_list) @staticmethod def setter(transform_context, instance_usage_df): transform_spec_df = transform_context.transform_spec_df_info # get fields we want to group by for a rollup agg_params = transform_spec_df.select( "aggregation_params_map.setter_rollup_group_by_list").\ collect()[0].asDict() setter_rollup_group_by_list = agg_params["setter_rollup_group_by_list"] # get aggregation period agg_params = transform_spec_df.select( "aggregation_params_map.aggregation_period").collect()[0].asDict() aggregation_period = agg_params["aggregation_period"] group_by_period_list = \ ComponentUtils._get_instance_group_by_period_list( aggregation_period) # group by columns list group_by_columns_list = group_by_period_list + \ setter_rollup_group_by_list # get rollup operation (sum, max, avg, min) agg_params = transform_spec_df.select( "aggregation_params_map.setter_rollup_operation").\ collect()[0].asDict() setter_rollup_operation = agg_params["setter_rollup_operation"] # perform rollup operation instance_usage_json_rdd = RollupQuantity._rollup_quantity( instance_usage_df, transform_spec_df, group_by_columns_list, str(setter_rollup_operation)) sql_context = SQLContext.getOrCreate(instance_usage_df.rdd.context) instance_usage_trans_df = InstanceUsageUtils.create_df_from_json_rdd( sql_context, instance_usage_json_rdd) return instance_usage_trans_df