Merge "HBase storage driver, initial version."

2013-03-05 10:38:38 +00:00 · 2013-03-05 10:38:38 +00:00 · 37bc0a7915
parent d9c4de87af 6f7525755f
commit 37bc0a7915
4 changed files with 954 additions and 0 deletions
--- a/ceilometer/storage/impl_hbase.py
+++ b/ceilometer/storage/impl_hbase.py
@ -0,0 +1,661 @@
+# -*- encoding: utf-8 -*-
+#
+# Copyright © 2012, 2013 Dell Inc.
+#
+# Author: Stas Maksimov <Stanislav_M@dell.com>
+# Author: Shengjie Min <Shengjie_Min@dell.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+"""Openstack Ceilometer HBase storage backend
+
+.. note::
+  This driver is designed to enable Ceilometer store its data in HBase.
+  The implementation is using HBase Thrift interface so it's necessary to have
+  the HBase Thrift server installed and started:
+  (https://ccp.cloudera.com/display/CDHDOC/HBase+Installation)
+
+  This driver has been tested against HBase 0.92.1/CDH 4.1.1,
+  HBase 0.94.4/HDP 1.2 and HBase 0.94.5/Apache.
+  Versions earlier than 0.92.1 are not supported due to feature
+  incompatibility.
+
+  Due to limitations of HBase the driver implements its own data aggregations
+  which may harm its performance. It is likely that the performance could be
+  improved if co-processors were used, however at the moment the co-processor
+  support is not exposed through Thrift API.
+
+  The following four tables are expected to exist in HBase:
+    create 'project', {NAME=>'f'}
+    create 'user', {NAME=>'f'}
+    create 'resource', {NAME=>'f'}
+    create 'meter', {NAME=>'f'}
+
+  The driver is using HappyBase which is a wrapper library used to interact
+  with HBase via Thrift protocol:
+  http://happybase.readthedocs.org/en/latest/index.html#
+
+"""
+
+from urlparse import urlparse
+import json
+import hashlib
+import copy
+import datetime
+import happybase
+from collections import defaultdict
+
+from oslo.config import cfg
+
+from ceilometer.openstack.common import log, timeutils
+from ceilometer.storage import base
+
+LOG = log.getLogger(__name__)
+
+
+class HBaseStorage(base.StorageEngine):
+    """Put the data into a HBase database
+
+    Collections:
+
+    - user
+      - { _id: user id
+          source: [ array of source ids reporting for the user ]
+          }
+    - project
+      - { _id: project id
+          source: [ array of source ids reporting for the project ]
+          }
+    - meter
+      - the raw incoming data
+    - resource
+      - the metadata for resources
+      - { _id: uuid of resource,
+          metadata: metadata dictionaries
+          timestamp: datetime of last update
+          user_id: uuid
+          project_id: uuid
+          meter: [ array of {counter_name: string, counter_type: string} ]
+        }
+    """
+
+    OPTIONS = [
+        cfg.StrOpt('table_prefix',
+                   default='',
+                   help='Database table prefix',
+                   ),
+    ]
+
+    def register_opts(self, conf):
+        """Register any configuration options used by this engine.
+        """
+        conf.register_opts(self.OPTIONS)
+
+    @staticmethod
+    def get_connection(conf):
+        """Return a Connection instance based on the configuration settings.
+        """
+        return Connection(conf)
+
+
+class Connection(base.Connection):
+    """HBase connection.
+    """
+
+    def __init__(self, conf):
+        '''
+        Hbase Connection Initialization
+        '''
+        opts = self._parse_connection_url(conf.database_connection)
+        opts['table_prefix'] = conf.table_prefix
+        self.conn = self._get_connection(opts)
+        self.conn.open()
+        self.project = self.conn.table('project')
+        self.user = self.conn.table('user')
+        self.resource = self.conn.table('resource')
+        self.meter = self.conn.table('meter')
+
+    def upgrade(self, version=None):
+        pass
+
+    def clear(self):
+        pass
+
+    @staticmethod
+    def _get_connection(conf):
+        """Return a connection to the database.
+
+        .. note::
+
+          The tests use a subclass to override this and return an
+          in-memory connection.
+        """
+        LOG.debug('connecting to HBase on %s:%s', conf['host'], conf['port'])
+        return happybase.Connection(host=conf['host'], port=conf['port'],
+                                    table_prefix=conf['table_prefix'])
+
+    @staticmethod
+    def _parse_connection_url(url):
+        """Parse connection parameters from a database url.
+
+        .. note::
+
+        HBase Thrift does not support authentication and there is no
+        database name, so we are not looking for these in the url.
+        """
+        opts = {}
+        result = urlparse(url)
+        opts['dbtype'] = result.scheme
+        if ':' in result.netloc:
+            opts['host'], port = result.netloc.split(':')
+        else:
+            opts['host'] = result.netloc
+            port = 9090
+        opts['port'] = port and int(port) or 9090
+        return opts
+
+    def record_metering_data(self, data):
+        """Write the data to the backend storage system.
+
+        :param data: a dictionary such as returned by
+                     ceilometer.meter.meter_message_from_counter
+        """
+        # Make sure we know about the user and project
+        if data['user_id']:
+            user = self.user.row(data['user_id'])
+            sources = _load_hbase_list(user, 's')
+            # Update if source is new
+            if data['source'] not in sources:
+                user['f:s_%s' % data['source']] = "1"
+                self.user.put(data['user_id'], user)
+
+        project = self.project.row(data['project_id'])
+        sources = _load_hbase_list(project, 's')
+        # Update if source is new
+        if data['source'] not in sources:
+            project['f:s_%s' % data['source']] = "1"
+            self.project.put(data['project_id'], project)
+
+        # Record the updated resource metadata.
+        received_timestamp = timeutils.utcnow()
+
+        resource = self.resource.row(data['resource_id'])
+        new_meter = "%s!%s!%s" % (
+            data['counter_name'], data['counter_type'], data['counter_unit'])
+        new_resource = {'f:resource_id': data['resource_id'],
+                        'f:project_id': data['project_id'],
+                        'f:user_id': data['user_id'],
+                        'f:timestamp': timeutils.strtime(data['timestamp']),
+                        'f:received_timestamp': timeutils.strtime(
+                            received_timestamp),
+                        'f:metadata': json.dumps(data['resource_metadata']),
+                        'f:source': data["source"],
+                        'f:m_%s' % new_meter: "1",
+                        }
+        # Update if resource has new information
+        if new_resource != resource:
+            meters = _load_hbase_list(resource, 'm')
+            if new_meter not in meters:
+                new_resource['f:m_%s' % new_meter] = "1"
+
+            self.resource.put(data['resource_id'], new_resource)
+
+        # Rowkey consists of reversed timestamp, meter and an md5 of
+        # user+resource+project for purposes of uniqueness
+        m = hashlib.md5()
+        m.update("%s%s%s" % (data['user_id'], data['resource_id'],
+                             data['project_id']))
+
+        # We use reverse timestamps in rowkeys as they are sorted
+        # alphabetically.
+        rts = reverse_timestamp(data['timestamp'])
+        row = "%s_%d_%s" % (data['counter_name'], rts, m.hexdigest())
+
+        # Convert timestamp to string as json.dumps won't
+        ts = timeutils.strtime(data['timestamp'])
+
+        record = {'f:timestamp': ts,
+                  'f:counter_name': data['counter_name'],
+                  'f:counter_type': data['counter_type'],
+                  'f:counter_volume': str(data['counter_volume']),
+                  'f:counter_unit': data['counter_unit'],
+                  # TODO(shengjie) consider using QualifierFilter
+                  # keep dimensions as column qualifier for quicker look up
+                  # TODO(shengjie) extra dimensions need to be added as CQ
+                  'f:user_id': data['user_id'],
+                  'f:project_id': data['project_id'],
+                  'f:resource_id': data['resource_id'],
+                  'f:source': data['source'],
+                  # add in reversed_ts here for time range scan
+                  'f:rts': str(rts)
+                  }
+        # Don't want to be changing the original data object
+        data = copy.copy(data)
+        data['timestamp'] = ts
+        # Save original event
+        record['f:message'] = json.dumps(data)
+        self.meter.put(row, record)
+
+    def get_users(self, source=None):
+        """Return an iterable of user id strings.
+
+        :param source: Optional source filter.
+        """
+        LOG.debug("source: %s" % source)
+        scan_args = {}
+        if source:
+            scan_args['columns'] = ['f:s_%s' % source]
+        return sorted(key for key, ignored in self.user.scan(**scan_args))
+
+    def get_projects(self, source=None):
+        """Return an iterable of project id strings.
+
+        :param source: Optional source filter.
+        """
+        LOG.debug("source: %s" % source)
+        scan_args = {}
+        if source:
+            scan_args['columns'] = ['f:s_%s' % source]
+        return (key for key, ignored in self.project.scan(**scan_args))
+
+    def get_resources(self, user=None, project=None, source=None,
+                      start_timestamp=None, end_timestamp=None,
+                      metaquery={}):
+        """Return an iterable of dictionaries containing resource information.
+
+        :type end_timestamp: object
+        { 'resource_id': UUID of the resource,
+          'project_id': UUID of project owning the resource,
+          'user_id': UUID of user owning the resource,
+          'timestamp': UTC datetime of last update to the resource,
+          'metadata': most current metadata for the resource,
+          'meter': list of the meters reporting data for the resource,
+          }
+
+        :param user: Optional ID for user that owns the resource.
+        :param project: Optional ID for project that owns the resource.
+        :param source: Optional source filter.
+        :param start_timestamp: Optional modified timestamp start range.
+        :param end_timestamp: Optional modified timestamp end range.
+        """
+        q, start_row, end_row = make_query(user=user,
+                                           project=project,
+                                           source=source,
+                                           start=start_timestamp,
+                                           end=end_timestamp,
+                                           require_meter=False)
+        LOG.debug("q: %s" % q)
+        # TODO implement metaquery support
+        if len(metaquery) > 0:
+            raise NotImplementedError('metaquery not implemented')
+
+        resource_ids = {}
+        if start_timestamp or end_timestamp:
+            # Look for resources matching the above criteria and with
+            # events in the time range we care about, then change the
+            # resource query to return just those resources by id.
+            g = self.meter.scan(filter=q, row_start=start_row,
+                                row_stop=end_row)
+            for ignored, data in g:
+                resource_ids[data['f:resource_id']] = data['f:resource_id']
+
+        q = make_query(user=user, project=project, source=source,
+                       query_only=True, require_meter=False)
+        LOG.debug("q: %s" % q)
+        for resource_id, data in self.resource.scan(filter=q):
+            if not resource_ids or resource_id in resource_ids:
+                r = {'resource_id': resource_id,
+                     'metadata': json.loads(data['f:metadata']),
+                     'project_id': data['f:project_id'],
+                     'received_timestamp': data['f:received_timestamp'],
+                     'source': data['f:source'],
+                     'timestamp':
+                     timeutils.parse_strtime(data['f:timestamp']),
+                     'user_id': data['f:user_id'],
+                     'meter': []}
+
+                for m in data:
+                    if m.startswith('f:m_'):
+                        name, type, unit = m[4:].split("!")
+                        r['meter'].append({"counter_name": name,
+                                           "counter_type": type,
+                                           "counter_unit": unit})
+
+                yield r
+
+    def get_meters(self, user=None, project=None, resource=None, source=None,
+                   metaquery={}):
+        """Return an iterable of dictionaries containing meter information.
+
+        { 'name': name of the meter,
+          'type': type of the meter (guage, counter),
+          'unit': unit of the meter,
+          'resource_id': UUID of the resource,
+          'project_id': UUID of project owning the resource,
+          'user_id': UUID of user owning the resource,
+          }
+
+        :param user: Optional ID for user that owns the resource.
+        :param project: Optional ID for project that owns the resource.
+        :param resource: Optional resource filter.
+        :param source: Optional source filter.
+        :param metaquery: Optional dict with metadata to match on.
+        """
+        q, ignored, ignored = make_query(user=user, project=project,
+                                         resource=resource, source=source,
+                                         require_meter=False)
+        LOG.debug("q: %s" % q)
+        # TODO implement metaquery support
+        if len(metaquery) > 0:
+            raise NotImplementedError('metaquery not implemented')
+
+        gen = self.resource.scan(filter=q)
+
+        for ignored, data in gen:
+            # Meter columns are stored like this:
+            # "m_{counter_name}|{counter_type}|{counter_unit}" => "1"
+            # where 'm' is a prefix (m for meter), value is always set to 1
+            meter = None
+            for m in data:
+                if m.startswith('f:m_'):
+                    meter = m
+                    break
+            if meter is None:
+                continue
+            name, type, unit = meter[4:].split("!")
+            m = {'name': name,
+                 'type': type,
+                 'unit': unit,
+                 'resource_id': data['f:resource_id'],
+                 'project_id': data['f:project_id'],
+                 'user_id': data['f:user_id'],
+                 }
+            yield m
+
+    def get_raw_events(self, event_filter):
+        """Return an iterable of raw event data as created by
+        :func:`ceilometer.meter.meter_message_from_counter`.
+        """
+        q, start, stop = make_query_from_filter(event_filter,
+                                                require_meter=False)
+        LOG.debug("q: %s" % q)
+
+        gen = self.meter.scan(filter=q, row_start=start, row_stop=stop)
+        meters = []
+        for ignored, meter in gen:
+            meter = json.loads(meter['f:message'])
+            meter['timestamp'] = timeutils.parse_strtime(meter['timestamp'])
+            meters.append(meter)
+        return meters
+
+    def _update_meter_stats(self, stat, meter):
+        """Do the stats calculation on a requested time bucket in stats dict
+
+        :param stats: dict where aggregated stats are kept
+        :param index: time bucket index in stats
+        :param meter: meter record as returned from HBase
+        :param start_time: query start time
+        :param period: length of the time bucket
+        """
+        vol = int(meter['f:counter_volume'])
+        ts = timeutils.parse_strtime(meter['f:timestamp'])
+        stat['min'] = min(vol, stat['min'] or vol)
+        stat['max'] = max(vol, stat['max'])
+        stat['sum'] = vol + (stat['sum'] or 0)
+        stat['count'] += 1
+        stat['avg'] = (stat['sum'] / float(stat['count']))
+        stat['duration_start'] = min(ts, stat['duration_start'] or ts)
+        stat['duration_end'] = max(ts, stat['duration_end'] or ts)
+        stat['duration'] = \
+            timeutils.delta_seconds(stat['duration_start'],
+                                    stat['duration_end'])
+
+    def get_meter_statistics(self, event_filter, period=None):
+        """Return a dictionary containing meter statistics.
+        described by the query parameters.
+
+        The filter must have a meter value set.
+
+        { 'min':
+          'max':
+          'avg':
+          'sum':
+          'count':
+          'period':
+          'period_start':
+          'period_end':
+          'duration':
+          'duration_start':
+          'duration_end':
+          }
+
+        .. note::
+
+        Due to HBase limitations the aggregations are implemented
+        in the driver itself, therefore this method will be quite slow
+        because of all the Thrift traffic it is going to create.
+        """
+        q, start, stop = make_query_from_filter(event_filter)
+
+        meters = list(meter for (ignored, meter) in
+                      self.meter.scan(filter=q,
+                                      row_start=start,
+                                      row_stop=stop)
+                      )
+
+        start_time = event_filter.start \
+            or timeutils.parse_strtime(meters[-1]['f:timestamp'])
+        end_time = event_filter.end \
+            or timeutils.parse_strtime(meters[0]['f:timestamp'])
+
+        results = []
+
+        if not period:
+            period = 0
+            period_start = start_time
+            period_end = end_time
+
+        # As our HBase meters are stored as newest-first, we need to iterate
+        # in the reverse order
+        for meter in meters[::-1]:
+            ts = timeutils.parse_strtime(meter['f:timestamp'])
+            if period:
+                offset = int(timeutils.delta_seconds(
+                    start_time, ts) / period) * period
+                period_start = start_time + datetime.timedelta(0, offset)
+
+            if not len(results) or not results[-1]['period_start'] == \
+                    period_start:
+                if period:
+                    period_end = period_start + datetime.timedelta(
+                        0, period)
+                results.append({'count': 0,
+                                'min': 0,
+                                'max': 0,
+                                'avg': 0,
+                                'sum': 0,
+                                'period': period,
+                                'period_start': period_start,
+                                'period_end': period_end,
+                                'duration': None,
+                                'duration_start': None,
+                                'duration_end': None,
+                                })
+            self._update_meter_stats(results[-1], meter)
+        return list(results)
+
+    def get_volume_sum(self, event_filter):
+        """Return the sum of the volume field for the events
+        described by the query parameters.
+        """
+        q, start, stop = make_query_from_filter(event_filter)
+        LOG.debug("q: %s" % q)
+        gen = self.meter.scan(filter=q, row_start=start, row_stop=stop)
+        results = defaultdict(int)
+        for ignored, meter in gen:
+            results[meter['f:resource_id']] \
+                += int(meter['f:counter_volume'])
+
+        return ({'resource_id': k, 'value': v}
+                for (k, v) in results.iteritems())
+
+    def get_volume_max(self, event_filter):
+        """Return the maximum of the volume field for the events
+        described by the query parameters.
+        """
+
+        q, start, stop = make_query_from_filter(event_filter)
+        LOG.debug("q: %s" % q)
+        gen = self.meter.scan(filter=q, row_start=start, row_stop=stop)
+        results = defaultdict(int)
+        for ignored, meter in gen:
+            results[meter['f:resource_id']] = \
+                max(results[meter['f:resource_id']],
+                    int(meter['f:counter_volume']))
+        return ({'resource_id': k, 'value': v}
+                for (k, v) in results.iteritems())
+
+    def get_event_interval(self, event_filter):
+        """Return the min and max timestamps from events,
+        using the event_filter to limit the events seen.
+
+        ( datetime.datetime(), datetime.datetime() )
+        """
+        q, start, stop = make_query_from_filter(event_filter)
+        LOG.debug("q: %s" % q)
+        gen = self.meter.scan(filter=q, row_start=start, row_stop=stop)
+        a_min = None
+        a_max = None
+        for ignored, meter in gen:
+            timestamp = timeutils.parse_strtime(meter['f:timestamp'])
+            if a_min is None:
+                a_min = timestamp
+            else:
+                if timestamp < a_min:
+                    a_min = timestamp
+            if a_max is None:
+                a_max = timestamp
+            else:
+                if timestamp > a_max:
+                    a_max = timestamp
+
+        return a_min, a_max
+
+
+#################################################
+# Here be various HBase helpers
+def reverse_timestamp(dt):
+    """Reverse timestamp so that newer timestamps are represented by smaller
+    numbers than older ones.
+
+    Reverse timestamps is a technique used in HBase rowkey design. When period
+    queries are required the HBase rowkeys must include timestamps, but as
+    rowkeys in HBase are ordered lexicographically, the timestamps must be
+    reversed.
+    """
+    epoch = datetime.datetime(1970, 1, 1)
+    td = dt - epoch
+    ts = (td.microseconds +
+          (td.seconds + td.days * 24 * 3600) * 100000) / 100000
+    return 0x7fffffffffffffff - ts
+
+
+def make_query(user=None, project=None, meter=None,
+               resource=None, source=None, start=None, end=None,
+               require_meter=True, query_only=False):
+    """Return a filter query based on the selected parameters.
+    :param user: Optional user-id
+    :param project: Optional project-id
+    :param meter: Optional counter-name
+    :param resource: Optional resource-id
+    :param source: Optional source-id
+    :param start: Optional start timestamp
+    :param end: Optional end timestamp
+    :param require_meter: If true and the filter does not have a meter,
+            raise an error.
+    :param query_only: If true only returns the filter query,
+            otherwise also returns start and stop rowkeys
+    """
+    q = []
+
+    if user:
+        q.append("SingleColumnValueFilter ('f', 'user_id', =, 'binary:%s')"
+                 % user)
+    if project:
+        q.append("SingleColumnValueFilter ('f', 'project_id', =, 'binary:%s')"
+                 % project)
+    if resource:
+        q.append("SingleColumnValueFilter ('f', 'resource_id', =, 'binary:%s')"
+                 % resource)
+    if source:
+        q.append("SingleColumnValueFilter "
+                 "('f', 'source', =, 'binary:%s')" % source)
+    # when start_time and end_time is provided,
+    #    if it's filtered by meter,
+    #         rowkey will be used in the query;
+    #    if it's non meter filter query(eg. project_id, user_id etc),
+    #         SingleColumnValueFilter against rts will be appended to the query
+    #    query other tables should have no start and end passed in
+    stopRow, startRow = "", ""
+    rts_start = str(reverse_timestamp(start) + 1) if start else ""
+    rts_end = str(reverse_timestamp(end) + 1) if end else ""
+
+    if meter:
+        # if it's meter filter without start and end,
+        # startRow = meter while stopRow = meter + MAX_BYTE
+        if not rts_start:
+            rts_start = chr(127)
+        stopRow = "%s_%s" % (meter, rts_start)
+        startRow = "%s_%s" % (meter, rts_end)
+    elif require_meter:
+        raise RuntimeError('Missing required meter specifier')
+    else:
+        if rts_start:
+            q.append("SingleColumnValueFilter ('f', 'rts', <=, 'binary:%s')" %
+                     rts_start)
+        if rts_end:
+            q.append("SingleColumnValueFilter ('f', 'rts', >=, 'binary:%s')" %
+                     rts_end)
+
+    query_filter = None
+    if len(q):
+        query_filter = " AND ".join(q)
+    if query_only:
+        return query_filter
+    else:
+        return query_filter, startRow, stopRow
+
+
+def make_query_from_filter(event_filter, require_meter=True):
+    """Return a query dictionary based on the settings in the filter.
+
+    :param filter: EventFilter instance
+    :param require_meter: If true and the filter does not have a meter,
+                          raise an error.
+    """
+    if event_filter.metaquery is not None and len(event_filter.metaquery) > 0:
+        raise NotImplementedError('metaquery not implemented')
+
+    return make_query(event_filter.user, event_filter.project,
+                      event_filter.meter, event_filter.resource,
+                      event_filter.source, event_filter.start,
+                      event_filter.end, require_meter)
+
+
+def _load_hbase_list(d, prefix):
+    """Deserialise dict stored as HBase column family
+    """
+    ret = []
+    prefix = 'f:%s_' % prefix
+    for key in (k for k in d if k.startswith(prefix)):
+        ret.append(key[len(prefix):])
+    return ret
--- a/setup.py
+++ b/setup.py
@ -129,6 +129,7 @@ setuptools.setup(
    postgresql = ceilometer.storage.impl_sqlalchemy:SQLAlchemyStorage
    sqlite = ceilometer.storage.impl_sqlalchemy:SQLAlchemyStorage
    test = ceilometer.storage.impl_test:TestDBStorage
+    hbase = ceilometer.storage.impl_hbase:HBaseStorage

    [ceilometer.compute.virt]
    libvirt = ceilometer.compute.virt.libvirt.inspector:LibvirtInspector
--- a/tests/storage/test_impl_hbase.py
+++ b/tests/storage/test_impl_hbase.py
@ -0,0 +1,291 @@
+# -*- encoding: utf-8 -*-
+#
+# Copyright © 2012, 2013 Dell Inc.
+#
+# Author: Stas Maksimov <Stanislav_M@dell.com>
+# Author: Shengjie Min <Shengjie_Min@dell.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+"""Tests for ceilometer/storage/impl_hbase.py
+
+.. note::
+  To run the tests using in-memory mocked HappyBase API,
+  set the environment variable CEILOMETER_TEST_LIVE=0 (this is the default
+  value)
+
+  In order to run the tests against real HBase server set the environment
+  variable CEILOMETER_TEST_LIVE=1 and set HBASE_URL below to
+  point to that HBase instance before running the tests. Make sure the Thrift
+  server is running on that server.
+
+"""
+
+from time import sleep
+import logging
+
+import os
+import copy
+import re
+
+from oslo.config import cfg
+
+from tests.storage import base
+from ceilometer.storage import impl_hbase
+
+from ceilometer.storage.impl_hbase import _load_hbase_list
+
+LOG = logging.getLogger(__name__)
+
+CEILOMETER_TEST_LIVE = bool(int(os.environ.get('CEILOMETER_TEST_LIVE', 0)))
+
+# Export this variable before running tests against real HBase
+# e.g. export CEILOMETER_TEST_HBASE_URL = hbase://192.168.1.100:9090
+CEILOMETER_TEST_HBASE_URL = os.environ.get('CEILOMETER_TEST_HBASE_URL')
+if CEILOMETER_TEST_LIVE:
+    if not CEILOMETER_TEST_HBASE_URL:
+        raise RuntimeError("CEILOMETER_TEST_LIVE is on, but "
+                           "CEILOMETER_TEST_HBASE_URL is not defined")
+PROJECT_TABLE = "project"
+USER_TABLE = "user"
+RESOURCE_TABLE = "resource"
+METER_TABLE = "meter"
+
+TABLES = [PROJECT_TABLE, USER_TABLE, RESOURCE_TABLE, METER_TABLE]
+
+
+class TestConnection(impl_hbase.Connection):
+
+    def __init__(self, conf):
+        if CEILOMETER_TEST_LIVE:
+            super(TestConnection, self).__init__(conf)
+        else:
+            self.conn = MConnection()
+            self.project = self.conn.table('project')
+            self.user = self.conn.table('user')
+            self.resource = self.conn.table('resource')
+            self.meter = self.conn.table('meter')
+
+    def create_schema(self):
+        LOG.debug('Creating HBase schema...')
+        self.conn.create_table(PROJECT_TABLE, {'f': dict()})
+        self.conn.create_table(USER_TABLE, {'f': dict()})
+        self.conn.create_table(RESOURCE_TABLE, {'f': dict()})
+        self.conn.create_table(METER_TABLE, {'f': dict()})
+        # Real HBase needs some time to propagate create_table changes
+        if CEILOMETER_TEST_LIVE:
+            sleep(10)
+
+    def drop_schema(self):
+        LOG.debug('Dropping HBase schema...')
+        for table in TABLES:
+            try:
+                self.conn.disable_table(table)
+            except:
+                None
+            try:
+                self.conn.delete_table(table)
+            except:
+                None
+            # Real HBase needs some time to propagate delete_table changes
+            if CEILOMETER_TEST_LIVE:
+                sleep(10)
+
+
+class HBaseEngine(base.DBEngineBase):
+
+    def get_connection(self):
+        self.conf = cfg.CONF
+
+        self.conf.database_connection = CEILOMETER_TEST_HBASE_URL
+        # use prefix so we don't affect any existing tables
+        self.conf.table_prefix = 't'
+
+        self.conn = TestConnection(self.conf)
+
+        self.conn.drop_schema()
+        self.conn.create_schema()
+
+        self.conn.upgrade()
+        return self.conn
+
+    def clean_up(self):
+        pass
+
+    def get_sources_by_project_id(self, id):
+        project = self.conn.project.row(id)
+        return _load_hbase_list(project, 's')
+
+    def get_sources_by_user_id(self, id):
+        user = self.conn.user.row(id)
+        return _load_hbase_list(user, 's')
+
+
+class HBaseEngineTestBase(base.DBTestBase):
+
+    def get_engine(cls):
+        return HBaseEngine()
+
+
+class UserTest(base.UserTest, HBaseEngineTestBase):
+    pass
+
+
+class ProjectTest(base.ProjectTest, HBaseEngineTestBase):
+    pass
+
+
+class ResourceTest(base.ResourceTest, HBaseEngineTestBase):
+    pass
+
+
+class MeterTest(base.MeterTest, HBaseEngineTestBase):
+    pass
+
+
+class RawEventTest(base.RawEventTest, HBaseEngineTestBase):
+    pass
+
+
+class TestGetEventInterval(base.TestGetEventInterval,
+                           HBaseEngineTestBase):
+    pass
+
+
+class SumTest(base.SumTest, HBaseEngineTestBase):
+    pass
+
+
+class MaxProjectTest(base.MaxProjectTest, HBaseEngineTestBase):
+    pass
+
+
+class MaxResourceTest(base.MaxResourceTest, HBaseEngineTestBase):
+    pass
+
+
+class StatisticsTest(base.StatisticsTest, HBaseEngineTestBase):
+    pass
+
+
+###############
+# This is a very crude version of "in-memory HBase", which implements just
+# enough functionality of HappyBase API to support testing of our driver.
+#
+class MTable():
+    """HappyBase.Table mock
+    """
+    def __init__(self, name, families):
+        self.name = name
+        self.families = families
+        self.rows = {}
+
+    def row(self, key):
+        return self.rows[key] if key in self.rows else {}
+
+    def put(self, key, data):
+        self.rows[key] = data
+
+    def scan(self, filter=None, columns=[], row_start=None, row_stop=None):
+        sorted_keys = sorted(self.rows)
+        # copy data into a sorted dict
+        rows = {}
+        for row in sorted_keys:
+            if row_start:
+                if row < row_start:
+                    continue
+            if row_stop:
+                if row > row_stop:
+                    break
+            rows[row] = copy.copy(self.rows[row])
+        if columns:
+            ret = {}
+            for row in rows.keys():
+                data = rows[row]
+                for key in data:
+                #    if all(key in columns for key in data):
+                    if key in columns:
+                        ret[row] = data
+            rows = ret
+        elif filter:
+            # TODO: we should really parse this properly, but at the moment we
+            # are only going to support AND here
+            filters = filter.split('AND')
+            for f in filters:
+                # Extract filter name and its arguments
+                g = re.search("(.*)\((.*),?\)", f)
+                fname = g.group(1).strip()
+                fargs = [s.strip().replace('\'', '').replace('\"', '')
+                         for s in g.group(2).split(',')]
+                m = getattr(self, fname)
+                if callable(m):
+                    # overwrite rows for filtering to take effect
+                    # in case of multiple filters
+                    rows = m(fargs, rows)
+                else:
+                    raise NotImplementedError("%s filter is not implemented, "
+                                              "you may want to add it!")
+        for k in sorted(rows):
+            yield k, rows[k]
+
+    def SingleColumnValueFilter(self, args, rows):
+        """This method is called from scan() when 'SingleColumnValueFilter'
+        is found in the 'filter' argument
+        """
+        op = args[2]
+        column = "%s:%s" % (args[0], args[1])
+        value = args[3]
+        if value.startswith('binary:'):
+            value = value[7:]
+        r = {}
+        for row in rows:
+            data = rows[row]
+
+            if op == '=':
+                if column in data and data[column] == value:
+                    r[row] = data
+            elif op == '<=':
+                if column in data and data[column] <= value:
+                    r[row] = data
+            elif op == '>=':
+                if column in data and data[column] >= value:
+                    r[row] = data
+            else:
+                raise NotImplementedError("In-memory "
+                                          "SingleColumnValueFilter "
+                                          "doesn't support the %s operation "
+                                          "yet" % op)
+        return r
+
+
+class MConnection():
+    """HappyBase.Connection mock
+    """
+    def __init__(self):
+        self.tables = {}
+
+    def open(self):
+        LOG.debug("Opening in-memory HBase connection")
+        return
+
+    def create_table(self, n, families={}):
+        if n in self.tables:
+            return self.tables[n]
+        t = MTable(n, families)
+        self.tables[n] = t
+        return t
+
+    def delete_table(self, name, use_prefix=True):
+        self.tables.remove(self.tables[name])
+
+    def table(self, name):
+        return self.create_table(name)
--- a/tools/pip-requires
+++ b/tools/pip-requires
@ -22,3 +22,4 @@ extras
 wsme>=0.5b1
 pyyaml
 http://tarballs.openstack.org/oslo-config/oslo-config-2013.1b4.tar.gz#egg=oslo-config
+happybase>=0.4