From 00a66b00c083d80c1976ba434b26b9df5d98b660 Mon Sep 17 00:00:00 2001
From: Tom Fifield <tom@openstack.org>
Date: Tue, 5 Apr 2016 16:35:16 +0800
Subject: [PATCH] Initial import of scripts

Adds scripts to work with Ask OpenStack and Eavesdrop.
---
 README.md                      |   8 ++
 tools/get_active_moderator.py  |  79 +++++++++++++++++
 tools/get_active_wg_members.py | 152 +++++++++++++++++++++++++++++++++
 tools/get_meeting_data.sh      |  18 ++++
 4 files changed, 257 insertions(+)
 create mode 100644 tools/get_active_moderator.py
 create mode 100644 tools/get_active_wg_members.py
 create mode 100755 tools/get_meeting_data.sh

diff --git a/README.md b/README.md
index b8a9a89..0973817 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,10 @@
 # uc-recognition
 This repository contains scripts and useful references to track contributions to OpenStack by users
+
+Find active moderators on Ask OpenStack:
+* get_active_moderator.py
+
+Uses IRC logs to attempt to determine active working group members:
+* get_meeting_data.sh
+* get_active_wg_members.py 
+
diff --git a/tools/get_active_moderator.py b/tools/get_active_moderator.py
new file mode 100644
index 0000000..03dd32b
--- /dev/null
+++ b/tools/get_active_moderator.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2016 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+import json
+import requests
+
+
+user_list = 'https://ask.openstack.org/en/api/v1/users/'
+
+params = dict(
+    sort='reputation',
+    page=1
+)
+
+
+def get_user_data(karma_level):
+    """
+    Loop through the user list to find users that have greater karma than
+    karma level.
+    Returns a list of user data dicts.
+    """
+    page = 1
+    session = requests.Session()
+    response = session.get(user_list, params=params)
+    user_data = json.loads(response.text)['users']
+    while user_data[-1]['reputation'] >= karma_level:
+        page = page + 1
+        params.update({'page': page})
+        print "Getting page: %d" % page
+        response = session.get(user_list, params=params)
+        user_data.extend(json.loads(response.text)['users'])
+
+    # since pages are big chunks, we will have some users that are
+    # having karma lower than karma_level in the last page. Remove them.
+    while user_data[-1]['reputation'] < karma_level:
+        user_data.pop()
+
+    return user_data
+
+
+def get_active_users(user_data, last_active_days=180):
+    """
+    Give a list of user dict objects, return the ones that
+    were active within the number of days specificed by
+    last_active days.
+    Prints a list of usernames, reputations and IDs
+    """
+
+    now = datetime.datetime.now()
+    active_threshold = now - datetime.timedelta(days=last_active_days)
+    for user in user_data:
+        last_seen_at = datetime.datetime.fromtimestamp(
+                          int(user['last_seen_at']))
+        if last_seen_at > active_threshold:
+            print "{: <20} {: <20}".format(user['username'], str(user['id']))
+
+
+def main():
+    user_data = get_user_data(karma_level=200)
+    get_active_users(user_data, last_active_days=180)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/get_active_wg_members.py b/tools/get_active_wg_members.py
new file mode 100644
index 0000000..94e7841
--- /dev/null
+++ b/tools/get_active_wg_members.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2016 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from datetime import datetime
+from datetime import timedelta
+import operator
+import os
+
+meeting_mappings = {
+'uc': 'user_committee',
+'product_team': 'product_working_group',
+'large_deployments_team_december_2015_meeting': 'large_deployment_team',
+'large_deployments_team_february_2016_meeting': 'large_deployment_team',
+'large_deployments_team_monthly_meeting': 'large_deployment_team',
+'large_deployments_team_january_2016_meeting': 'large_deployment_team',
+'large_deployments_team_october_2015_meeting': 'large_deployment_team'
+}
+
+
+def get_recent_meets(log_dir, last_active_days=180):
+    """
+    takes a directory heirachy that only contains meetbot
+    txt summary files, determines the users active within
+    the threshold. Returns a dictionary that has
+    one entry per meeting category, containing information about
+    who attended which meetings and how much they said.
+    """
+    meetings = {}
+    now = datetime.now()
+    active_threshold = now - timedelta(days=last_active_days)
+
+    # get our list of meetings and timestamps
+    for root, dirs, files in os.walk(log_dir):
+        if len(files) > 0:
+            for txt_summary in files:
+                (meet_name, meet_date) = txt_summary.split('.', 1)
+                meet_date = meet_date[0:-4]  # drop the .txt at the end
+                if meet_name in meeting_mappings.keys():
+                    meet_name = meeting_mappings[meet_name]
+                meet_timestamp = datetime.strptime(meet_date, "%Y-%m-%d-%H.%M")
+                if meet_timestamp > active_threshold:
+                    if meet_name not in meetings.keys():
+                        meetings[meet_name] = []
+                    meet_file = root + "/" + txt_summary
+                    meetings[meet_name].append(get_people_in_meeting(meet_file))
+
+    return meetings
+
+
+def get_people_in_meeting(meeting_txt):
+    """
+    takes a meetbot summary file that has a section with the following format
+    and returns a dict with username<->lines said mapping
+
+    People present (lines said)
+    ---------------------------
+
+    * username (117)
+    * username2 (50)
+    """
+    meeting_people = []
+    in_people = False
+    txt_file = open(meeting_txt)
+    for line in txt_file:
+        if line == "People present (lines said)\n":
+            in_people = True
+        elif not in_people:
+            next
+        elif in_people and '*' not in line:
+            next
+        elif in_people and 'openstack' not in line:
+            ircnic, linessaid = line[2:-2].split('(')
+            ircnic = ircnic.strip(" _").lower()
+            meeting_people.append((ircnic, linessaid))
+
+    txt_file.close()
+    return meeting_people
+
+
+def get_meeting_aggregates(meeting_data):
+    """
+    Aggregates the attendance counts and lines said for users across
+    a meeting category
+    """
+    meeting_aggregate = {}
+    for meeting_name in meeting_data.keys():
+        meeting_users = {}
+        for meeting in meeting_data[meeting_name]:
+            for user_tuple in meeting:
+                if user_tuple[0] not in meeting_users.keys():
+                    meeting_users[user_tuple[0]] = {'attendance_count': 1,
+                                                    'lines_said': int(user_tuple[1])}
+                else:
+                    meeting_users[user_tuple[0]]["attendance_count"] += 1
+                    meeting_users[user_tuple[0]]["lines_said"] += int(user_tuple[1])
+        meeting_aggregate[meeting_name] = meeting_users
+    return meeting_aggregate
+
+
+def print_meet_stats(meeting_data):
+    for meeting_name in meeting_data.keys():
+        print "\n" + meeting_name + "\n=====================================\n"
+        sorted_users = sorted(meeting_data[meeting_name].items(), reverse=True,
+                              key=operator.itemgetter(1))
+        for user in sorted_users:
+            print "{: <20} {: <20} {: <20}".format(user[0],
+                                                   user[1]["attendance_count"],
+                                                   user[1]["lines_said"])
+
+
+def print_eligible_usernames(meeting_data, num_meetings=1, lines_said=1):
+    user_aggregate = {}
+    for meeting_name in meeting_data.keys():
+        for user_tuple in meeting_data[meeting_name].items():
+            if user_tuple[0] not in user_aggregate.keys():
+                user_aggregate[user_tuple[0]] = user_tuple[1]
+            else:
+                user_aggregate[user_tuple[0]]["lines_said"] += user_tuple[1]["lines_said"]
+                user_aggregate[user_tuple[0]]["attendance_count"] += user_tuple[1]["attendance_count"]
+
+    print "\n OVERALL STATS \n=====================================\n"
+    sorted_users = sorted(user_aggregate.items(), reverse=True,
+                          key=operator.itemgetter(1))
+    for user in sorted_users:
+        if user[1]["attendance_count"] >= num_meetings or user[1]["lines_said"] >= lines_said:
+            print "{: <20} {: <20} {: <20}".format(user[0],
+                                                   user[1]["attendance_count"],
+                                                   user[1]["lines_said"])
+
+
+def main():
+    meeting_data = get_recent_meets("./eavesdrop.openstack.org/meetings", 183)
+    meeting_aggregate = get_meeting_aggregates(meeting_data)
+    print_meet_stats(meeting_aggregate)
+    print_eligible_usernames(meeting_aggregate, 2, 10)
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/get_meeting_data.sh b/tools/get_meeting_data.sh
new file mode 100755
index 0000000..f41c550
--- /dev/null
+++ b/tools/get_meeting_data.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Downloads the TXT summary file from the meetbot records
+# at OpenStack for specific meetings
+
+MEETINGS="operators_ops_tools_monitoring ops_tags _operator_tags large_deployments_team_august_2015_meeting"
+MEETINGS="$MEETINGS large_deployment_team large_deployments_team large_deployment_team_january_2015_meeting"
+MEETINGS="$MEETINGS large_deployments_team_december_2015_meeting large_deployments_team_february_2016_meeting"
+MEETINGS="$MEETINGS large_deployments_team_january_2016_meeting large_deployments_team_monthly_meeting"
+MEETINGS="$MEETINGS large_deployments_team_october_2015_meeting large_deployments_team_september_2015_meeting"
+MEETINGS="$MEETINGS log_wg openstack_operators"
+MEETINGS="$MEETINGS product_team product_work_group product_working_group"
+MEETINGS="$MEETINGS telcowg uc user_committee"
+
+for meeting in $MEETINGS
+do
+  wget --no-parent --recursive --accept "*.txt" --reject="*.log.txt" http://eavesdrop.openstack.org/meetings/$meeting/
+done