implementation of FacetSet for client side nested facets
this is an implementation of facets, client side, with elastic search results. This will let us get rid of a bunch of the uniquify code in the graph and check_success scripts, and make it simpler to analyze by other dimensions in web console additions. Also make Hit implement __getitem__ for easier dynamic access of contents. Useful for programatically accessing tags. Change-Id: Ib63ff887eb82cff0ba00109471ee48d210fda571
This commit is contained in:
parent
8852f0d979
commit
32d98ae233
|
@ -14,6 +14,7 @@
|
|||
|
||||
"""Elastic search wrapper to make handling results easier."""
|
||||
|
||||
import copy
|
||||
import pprint
|
||||
import pyelasticsearch
|
||||
|
||||
|
@ -90,6 +91,40 @@ class ResultSet(list):
|
|||
return self._results[attr]
|
||||
|
||||
|
||||
class FacetSet(dict):
|
||||
"""A dictionary like collection for creating faceted ResultSets.
|
||||
|
||||
Elastic Search doesn't support nested facets, which are incredibly
|
||||
useful for things like faceting by build_status then by build_uuid.
|
||||
This is a client side implementation that processes a ResultSet
|
||||
with an ordered list of facets, and turns it into a data structure
|
||||
which is FacetSet -> FacetSet ... -> ResultSet (arbitrary nesting
|
||||
of FaceSets with ResultSet as the leaves.
|
||||
|
||||
Treat this basically like a dictionary (which it inherits from).
|
||||
"""
|
||||
|
||||
def detect_facets(self, results, facets):
|
||||
if len(facets) > 0:
|
||||
facet = facets.pop(0)
|
||||
for hit in results:
|
||||
attr = hit[facet]
|
||||
if attr not in self:
|
||||
dict.setdefault(self, attr, ResultSet())
|
||||
self[attr].append(hit)
|
||||
else:
|
||||
self[attr].append(hit)
|
||||
|
||||
# if we still have more facets to go, recurse down
|
||||
if len(facets) > 0:
|
||||
newkeys = {}
|
||||
for key in self:
|
||||
fs = FacetSet()
|
||||
fs.detect_facets(self[key], copy.deepcopy(facets))
|
||||
newkeys[key] = fs
|
||||
self.update(newkeys)
|
||||
|
||||
|
||||
class Hit(object):
|
||||
def __init__(self, hit):
|
||||
self._hit = hit
|
||||
|
@ -97,6 +132,9 @@ class Hit(object):
|
|||
def index(self):
|
||||
return self._hit['_index']
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.__getattr__(key)
|
||||
|
||||
def __getattr__(self, attr):
|
||||
"""flatten out our attr space into a few key types
|
||||
|
||||
|
|
|
@ -45,3 +45,33 @@ class TestBasicParsing(tests.TestCase):
|
|||
|
||||
for result in result_set:
|
||||
self.assertEqual(result.build_status, "FAILURE")
|
||||
|
||||
def test_facet_one_level(self):
|
||||
data = load_sample(1218391)
|
||||
result_set = results.ResultSet(data)
|
||||
facets = results.FacetSet()
|
||||
facets.detect_facets(result_set, ["build_uuid"])
|
||||
self.assertEqual(len(facets.keys()), 20)
|
||||
|
||||
facets = results.FacetSet()
|
||||
facets.detect_facets(result_set, ["build_status"])
|
||||
self.assertEqual(facets.keys(), ['FAILURE'])
|
||||
|
||||
data = load_sample(1226337)
|
||||
result_set = results.ResultSet(data)
|
||||
facets = results.FacetSet()
|
||||
facets.detect_facets(result_set, ["build_status"])
|
||||
self.assertEqual(len(facets.keys()), 2)
|
||||
self.assertIn('FAILURE', facets.keys())
|
||||
self.assertIn('SUCCESS', facets.keys())
|
||||
self.assertEqual(len(facets['FAILURE']), 202)
|
||||
self.assertEqual(len(facets['SUCCESS']), 27)
|
||||
|
||||
def test_facet_multi_level(self):
|
||||
data = load_sample(1226337)
|
||||
result_set = results.ResultSet(data)
|
||||
facets = results.FacetSet()
|
||||
facets.detect_facets(result_set, ["build_status", "build_uuid"])
|
||||
self.assertEqual(len(facets.keys()), 2)
|
||||
self.assertEqual(len(facets['FAILURE'].keys()), 12)
|
||||
self.assertEqual(len(facets['SUCCESS'].keys()), 3)
|
||||
|
|
Loading…
Reference in New Issue