# Copyright 2016 Hewlett Packard Enterprise Development Company LP # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. from oslo_config import cfg from monasca_transform.component.insert import InsertComponent from tests.functional.messaging.adapter import DummyAdapter class DummyInsertPreHourly(InsertComponent): """Insert component that writes metric data to to kafka queue """ @staticmethod def insert(transform_context, instance_usage_df): """write instance usage data to kafka""" transform_spec_df = transform_context.transform_spec_df_info agg_params = transform_spec_df.select("metric_id" ).collect()[0].asDict() metric_id = agg_params['metric_id'] cfg.CONF.set_override('adapter', 'tests.unit.messaging.adapter:DummyAdapter', group='messaging') # Approach 1 # using foreachPartition to iterate through elements in an # RDD is the recommended approach so as to not overwhelm kafka with the # zillion connections (but in our case the MessageAdapter does # store the adapter_impl so we should not create many producers) # using foreachpartitions was causing some serialization (cpickle) # problems where few libs like kafka.SimpleProducer and oslo_config.cfg # were not available # # removing _write_metrics_from_partition for now in favor of # Approach 2 # # instance_usage_df_agg_params = instance_usage_df.rdd.map( # lambda x: InstanceUsageDataAggParams(x, # agg_params)) # instance_usage_df_agg_params.foreachPartition( # DummyInsert._write_metrics_from_partition) # # Approach # 2 # # using collect() to fetch all elements of an RDD # and write to kafka # for instance_usage_row in instance_usage_df.collect(): instance_usage_dict = InsertComponent\ ._get_instance_usage_pre_hourly(instance_usage_row, metric_id) DummyAdapter.send_metric(instance_usage_dict) return instance_usage_df