Stats: break out job runtimes and counts by result

For every job completed, record the result of that job separately to statsd. For successful and failed jobs, record the runtimes of the jobs separately by result (others are not interesting). Also, substitute '_' for '.' in job names in statsd keys. This is backwards-incompatible with current statsd keys. Change-Id: I7b6152bcc7ea5ce6e37bf90ed41aee89baa29309
2013-07-27 17:44:32 -07:00 · 2013-07-27 17:44:32 -07:00 · 66eeebfa4d
parent 212f6127c3
commit 66eeebfa4d
4 changed files with 35 additions and 14 deletions
--- a/NEWS.rst
+++ b/NEWS.rst
@ -1,4 +1,4 @@
-Since 1.2.0:
+Since 1.3.0:

 * The Jenkins launcher is replaced with Gearman launcher.  An internal
  Gearman server is provided, and there is a Gearman plugin for
@ -17,3 +17,10 @@ Since 1.2.0:
  the old behavior (which would wait until the conflicting change was
  at the head before dequeuing it), see the new "dequeue-on-conflict"
  option.
+
+* Some statsd keys have changed in a backwards incompatible way:
+  * The counters and timers of the form zuul.job.{name} is now split
+    into several keys of the form:
+    zuul.pipeline.{pipeline-name}.job.{job-name}.{result}
+  * Job names in statsd keys now have the '_' character substituted
+    for the '.' character.
--- a/tests/test_scheduler.py
+++ b/tests/test_scheduler.py
@ -1013,16 +1013,21 @@ class TestScheduler(testtools.TestCase):
                    print 'heads', queue.severed_heads
                self.assertEqual(len(queue.severed_heads), 0)

-    def assertReportedStat(self, key, value=None):
+    def assertReportedStat(self, key, value=None, kind=None):
        start = time.time()
        while time.time() < (start + 5):
            for stat in self.statsd.stats:
+                pprint.pprint(self.statsd.stats)
                k, v = stat.split(':')
                if key == k:
-                    if value is None:
-                        return
-                    if value == v:
+                    if value is None and kind is None:
                        return
+                    elif value:
+                        if value == v:
+                            return
+                    elif kind:
+                        if v.endswith('|' + kind):
+                            return
            time.sleep(0.1)

        pprint.pprint(self.statsd.stats)
@ -1044,15 +1049,20 @@ class TestScheduler(testtools.TestCase):
        self.assertEqual(A.data['status'], 'MERGED')
        self.assertEqual(A.reported, 2)

-        self.assertReportedStat('gerrit.event.comment-added', '1|c')
-        self.assertReportedStat('zuul.pipeline.gate.current_changes', '1|g')
-        self.assertReportedStat('zuul.job.project-merge')
-        self.assertReportedStat('zuul.pipeline.gate.resident_time')
-        self.assertReportedStat('zuul.pipeline.gate.total_changes', '1|c')
+        self.assertReportedStat('gerrit.event.comment-added', value='1|c')
+        self.assertReportedStat('zuul.pipeline.gate.current_changes',
+                                value='1|g')
+        self.assertReportedStat('zuul.pipeline.gate.job.project-merge.SUCCESS',
+                                kind='ms')
+        self.assertReportedStat('zuul.pipeline.gate.job.project-merge.SUCCESS',
+                                value='1|c')
+        self.assertReportedStat('zuul.pipeline.gate.resident_time', kind='ms')
+        self.assertReportedStat('zuul.pipeline.gate.total_changes',
+                                value='1|c')
        self.assertReportedStat(
-            'zuul.pipeline.gate.org.project.resident_time')
+            'zuul.pipeline.gate.org.project.resident_time', kind='ms')
        self.assertReportedStat(
-            'zuul.pipeline.gate.org.project.total_changes', '1|c')
+            'zuul.pipeline.gate.org.project.total_changes', value='1|c')

    def test_duplicate_pipelines(self):
        "Test that a change matching multiple pipelines works"
--- a/zuul/model.py
+++ b/zuul/model.py
@ -534,6 +534,7 @@ class Build(object):
        self.start_time = None
        self.end_time = None
        self.estimated_time = None
+        self.pipeline = None
        self.parameters = {}

    def __repr__(self):
@ -604,6 +605,7 @@ class QueueItem(object):

    def addBuild(self, build):
        self.current_build_set.addBuild(build)
+        build.pipeline = self.pipeline

    def setReportedResult(self, result):
        self.current_build_set.result = result
--- a/zuul/scheduler.py
+++ b/zuul/scheduler.py
@ -315,8 +315,10 @@ class Scheduler(threading.Thread):
        self.log.debug("Adding complete event for build: %s" % build)
        build.end_time = time.time()
        try:
-            if statsd:
-                key = 'zuul.job.%s' % build.job.name
+            if statsd and build.pipeline:
+                jobname = build.job.name.replace('.', '_')
+                key = 'zuul.pipeline.%s.job.%s.%s' % (build.pipeline.name,
+                                                      jobname, build.result)
                if build.result in ['SUCCESS', 'FAILURE'] and build.start_time:
                    dt = int((build.end_time - build.start_time) * 1000)
                    statsd.timing(key, dt)