implementation of FacetSet for client side nested facets

this is an implementation of facets, client side, with elastic
search results. This will let us get rid of a bunch of the
uniquify code in the graph and check_success scripts, and make
it simpler to analyze by other dimensions in web console additions.

Also make Hit implement __getitem__ for easier dynamic access of
contents. Useful for programatically accessing tags.

Change-Id: Ib63ff887eb82cff0ba00109471ee48d210fda571
This commit is contained in:
Sean Dague 2013-12-03 15:21:55 -05:00
parent 8852f0d979
commit 32d98ae233
2 changed files with 68 additions and 0 deletions

View File

@ -14,6 +14,7 @@
"""Elastic search wrapper to make handling results easier."""
import copy
import pprint
import pyelasticsearch
@ -90,6 +91,40 @@ class ResultSet(list):
return self._results[attr]
class FacetSet(dict):
"""A dictionary like collection for creating faceted ResultSets.
Elastic Search doesn't support nested facets, which are incredibly
useful for things like faceting by build_status then by build_uuid.
This is a client side implementation that processes a ResultSet
with an ordered list of facets, and turns it into a data structure
which is FacetSet -> FacetSet ... -> ResultSet (arbitrary nesting
of FaceSets with ResultSet as the leaves.
Treat this basically like a dictionary (which it inherits from).
"""
def detect_facets(self, results, facets):
if len(facets) > 0:
facet = facets.pop(0)
for hit in results:
attr = hit[facet]
if attr not in self:
dict.setdefault(self, attr, ResultSet())
self[attr].append(hit)
else:
self[attr].append(hit)
# if we still have more facets to go, recurse down
if len(facets) > 0:
newkeys = {}
for key in self:
fs = FacetSet()
fs.detect_facets(self[key], copy.deepcopy(facets))
newkeys[key] = fs
self.update(newkeys)
class Hit(object):
def __init__(self, hit):
self._hit = hit
@ -97,6 +132,9 @@ class Hit(object):
def index(self):
return self._hit['_index']
def __getitem__(self, key):
return self.__getattr__(key)
def __getattr__(self, attr):
"""flatten out our attr space into a few key types

View File

@ -45,3 +45,33 @@ class TestBasicParsing(tests.TestCase):
for result in result_set:
self.assertEqual(result.build_status, "FAILURE")
def test_facet_one_level(self):
data = load_sample(1218391)
result_set = results.ResultSet(data)
facets = results.FacetSet()
facets.detect_facets(result_set, ["build_uuid"])
self.assertEqual(len(facets.keys()), 20)
facets = results.FacetSet()
facets.detect_facets(result_set, ["build_status"])
self.assertEqual(facets.keys(), ['FAILURE'])
data = load_sample(1226337)
result_set = results.ResultSet(data)
facets = results.FacetSet()
facets.detect_facets(result_set, ["build_status"])
self.assertEqual(len(facets.keys()), 2)
self.assertIn('FAILURE', facets.keys())
self.assertIn('SUCCESS', facets.keys())
self.assertEqual(len(facets['FAILURE']), 202)
self.assertEqual(len(facets['SUCCESS']), 27)
def test_facet_multi_level(self):
data = load_sample(1226337)
result_set = results.ResultSet(data)
facets = results.FacetSet()
facets.detect_facets(result_set, ["build_status", "build_uuid"])
self.assertEqual(len(facets.keys()), 2)
self.assertEqual(len(facets['FAILURE'].keys()), 12)
self.assertEqual(len(facets['SUCCESS'].keys()), 3)