Add a file that can detail the requirements files

From the ML discussion there was a desire to provide a more detailed version of the requirements that has information such as license, author... This script gathers such information from the pypi json API and writes it to corresponding files that can be used by automated programs that want to determine this information without having to go to pypi themselves. This can be useful for doing requirement license auditing for example. To run: $ python detail.py global-requirements.txt This will gather the information about the requirements from pypi (if possible) and then write a new file located at global-requirements.json with this information. Change-Id: I710feb7038eec0c916593cce0837823e7f4fce15
2014-03-19 11:00:48 -07:00 · 2014-03-19 11:00:48 -07:00 · 10fb194613
parent eb52e76fd5
commit 10fb194613
1 changed files with 104 additions and 0 deletions
--- a/detail.py
+++ b/detail.py
@ -0,0 +1,104 @@
+# -*- coding: utf-8 -*-
+
+#    Copyright (C) 2014 Yahoo! Inc. All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+from __future__ import print_function
+
+import contextlib
+import json
+import os
+import pkg_resources
+import sys
+import traceback
+import urllib
+
+try:
+    PYPI_LOCATION = os.environ['PYPI_LOCATION']
+except KeyError:
+    PYPI_LOCATION = 'http://pypi.python.org/pypi'
+
+
+KEEP_KEYS = frozenset([
+    'author',
+    'author_email',
+    'maintainer',
+    'maintainer_email',
+    'license',
+    'summary',
+    'home_page',
+])
+
+
+def iter_names(req):
+    for k in (req.key, req.project_name):
+        yield k
+        yield k.title()
+        yield k.replace("-", "_")
+        yield k.replace("-", "_").title()
+
+
+def release_data(req):
+    # Try to find it with various names...
+    attempted = []
+    for name in iter_names(req):
+        url = PYPI_LOCATION + "/%s/json" % (urllib.quote(name))
+        if url in attempted:
+            continue
+        with contextlib.closing(urllib.urlopen(url)) as uh:
+            if uh.getcode() != 200:
+                attempted.append(url)
+                continue
+            return json.loads(uh.read())
+    attempted = [" * %s" % url for url in attempted]
+    raise IOError("Could not find '%s' on pypi\nAttempted urls:\n%s"
+                  % (req.key, "\n".join(attempted)))
+
+
+def main():
+    if len(sys.argv) == 1:
+        print("%s requirement-file ..." % (sys.argv[0]), file=sys.stderr)
+        sys.exit(1)
+    for filename in sys.argv[1:]:
+        print("Analyzing file: %s" % (filename))
+        details = {}
+        with open(filename, "rb") as fh:
+            for line in fh.read().splitlines():
+                line = line.strip()
+                if line.startswith("#") or not line:
+                    continue
+                req = pkg_resources.Requirement.parse(line)
+                print(" - processing: %s" % (req))
+                try:
+                    raw_req_data = release_data(req)
+                except IOError:
+                    traceback.print_exc()
+                    details[req.key] = None
+                else:
+                    req_info = {}
+                    for (k, v) in raw_req_data.get('info', {}).items():
+                        if k not in KEEP_KEYS:
+                            continue
+                        req_info[k] = v
+                    details[req.key] = {
+                        'requirement': str(req),
+                        'info': req_info,
+                    }
+        filename, _ext = os.path.splitext(filename)
+        with open("%s.json" % (filename), "wb") as fh:
+            fh.write(json.dumps(details, sort_keys=True, indent=4))
+
+
+if __name__ == '__main__':
+    main()