Add a file that can detail the requirements files

From the ML discussion there was a desire to provide
a more detailed version of the requirements that has
information such as license, author... This script
gathers such information from the pypi json API and
writes it to corresponding files that can be used by
automated programs that want to determine this information
without having to go to pypi themselves.

This can be useful for doing requirement license
auditing for example.

To run:

$ python detail.py global-requirements.txt

This will gather the information about the requirements
from pypi (if possible) and then write a new file located
at global-requirements.json with this information.

Change-Id: I710feb7038eec0c916593cce0837823e7f4fce15
This commit is contained in:
Joshua Harlow 2014-03-19 11:00:48 -07:00
parent eb52e76fd5
commit 10fb194613
1 changed files with 104 additions and 0 deletions

104
detail.py Normal file
View File

@ -0,0 +1,104 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2014 Yahoo! Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from __future__ import print_function
import contextlib
import json
import os
import pkg_resources
import sys
import traceback
import urllib
try:
PYPI_LOCATION = os.environ['PYPI_LOCATION']
except KeyError:
PYPI_LOCATION = 'http://pypi.python.org/pypi'
KEEP_KEYS = frozenset([
'author',
'author_email',
'maintainer',
'maintainer_email',
'license',
'summary',
'home_page',
])
def iter_names(req):
for k in (req.key, req.project_name):
yield k
yield k.title()
yield k.replace("-", "_")
yield k.replace("-", "_").title()
def release_data(req):
# Try to find it with various names...
attempted = []
for name in iter_names(req):
url = PYPI_LOCATION + "/%s/json" % (urllib.quote(name))
if url in attempted:
continue
with contextlib.closing(urllib.urlopen(url)) as uh:
if uh.getcode() != 200:
attempted.append(url)
continue
return json.loads(uh.read())
attempted = [" * %s" % url for url in attempted]
raise IOError("Could not find '%s' on pypi\nAttempted urls:\n%s"
% (req.key, "\n".join(attempted)))
def main():
if len(sys.argv) == 1:
print("%s requirement-file ..." % (sys.argv[0]), file=sys.stderr)
sys.exit(1)
for filename in sys.argv[1:]:
print("Analyzing file: %s" % (filename))
details = {}
with open(filename, "rb") as fh:
for line in fh.read().splitlines():
line = line.strip()
if line.startswith("#") or not line:
continue
req = pkg_resources.Requirement.parse(line)
print(" - processing: %s" % (req))
try:
raw_req_data = release_data(req)
except IOError:
traceback.print_exc()
details[req.key] = None
else:
req_info = {}
for (k, v) in raw_req_data.get('info', {}).items():
if k not in KEEP_KEYS:
continue
req_info[k] = v
details[req.key] = {
'requirement': str(req),
'info': req_info,
}
filename, _ext = os.path.splitext(filename)
with open("%s.json" % (filename), "wb") as fh:
fh.write(json.dumps(details, sort_keys=True, indent=4))
if __name__ == '__main__':
main()