diff --git a/docker.py b/docker.py deleted file mode 100644 index b4e0368f..00000000 --- a/docker.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python3 -""" -Wrapper around the docker command -Copyright (C) 2017 Kunal Mehta - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see . -""" - -import os -import subprocess -import time - -CONCURRENT = 6 -DOCKER_IMAGE = 'libraryupgrader' -if os.path.exists('/srv/data'): - DATA_ROOT = '/srv/data' -else: - DATA_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data')) - - -def run(name: str, env: dict, mounts=None, rm=False, entrypoint=None, - extra_args=None, background=True): - """ - :param name: Name of container - :param env: Environment values - :param entrypoint: Entrypoint to use - :param extra_args: Args to pass onto the command - :param background: Run in background or not - """ - args = ['docker', 'run', '--name=' + name] - for key, value in env.items(): - args.extend(['--env', '%s=%s' % (key, value)]) - if rm: - args.append('--rm') - if entrypoint is not None: - args.extend(['--entrypoint', entrypoint]) - args.extend([ - '-v', DATA_ROOT + '/cache:/cache', - ]) - if mounts is not None: - for outside, inside in mounts.items(): - args.extend(['-v', '%s:%s' % (outside, inside)]) - if background: - args.append('-d') - args.append(DOCKER_IMAGE) - if extra_args is not None: - args.extend(extra_args) - subprocess.check_call(args) - - -def get_running_containers() -> list: - out = subprocess.check_output(['docker', 'ps', '-q']).decode().strip() - if not out: - return [] - return out.split('\n') - - -def wait_for_containers(count: int): - while len(get_running_containers()) > count: - print('Waiting...') - time.sleep(2) - - -def logs(name: str) -> str: - out = subprocess.check_output( - ['docker', 'logs', name], - stderr=subprocess.STDOUT - ) - return out.decode() - - -def remove_container(name: str): - subprocess.check_call(['docker', 'rm', name]) diff --git a/libup/__init__.py b/libup/__init__.py index 429a67c5..361c9fe2 100644 --- a/libup/__init__.py +++ b/libup/__init__.py @@ -1,25 +1,38 @@ """ Copyright (C) 2019 Kunal Mehta This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . """ +from datetime import datetime import os +import requests if os.path.exists('/srv/data'): DATA_ROOT = '/srv/data' else: DATA_ROOT = os.path.abspath(os.path.join( os.path.dirname(os.path.dirname(__file__)), 'data')) LOGS = os.path.join(DATA_ROOT, 'logs') +MANAGERS = ['composer', 'npm'] +TYPES = ['deps', 'dev'] + +session = requests.Session() + + +def date_log_dir(): + log_dir = os.path.join(LOGS, datetime.utcnow().strftime('%Y-%m-%d')) + if not os.path.isdir(log_dir): + os.mkdir(log_dir) + return log_dir diff --git a/libup/data.py b/libup/data.py index eab3713a..f0225101 100644 --- a/libup/data.py +++ b/libup/data.py @@ -1,48 +1,62 @@ """ Copyright (C) 2019 Kunal Mehta This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . """ +from collections import defaultdict import json import os -from . import DATA_ROOT +from . import DATA_ROOT, MANAGERS, TYPES +from .library import Library class Data: def __init__(self): self.current = os.path.join(DATA_ROOT, 'current') def find_files(self): for fname in os.listdir(self.current): if fname.endswith('.json'): yield os.path.join(self.current, fname) def get_data(self): data = {} for fname in self.find_files(): with open(fname) as f: j = json.load(f) data[j['repo']] = j return data def get_repo_data(self, repo): expected = os.path.join(self.current, repo.replace('/', '_') + '.json') # Sanity check? if expected not in set(self.find_files()): raise ValueError("Didn't find %s" % repo) with open(expected) as f: return json.load(f) + + def get_deps(self, info): + deps = defaultdict(lambda: defaultdict(list)) + for manager in MANAGERS: + if info['%s-deps' % manager]: + minfo = info['%s-deps' % manager] + for type_ in TYPES: + if minfo[type_]: + for name, version in minfo[type_].items(): + deps[manager][type_].append(Library(manager, name, version)) + + return deps diff --git a/gerrit.py b/libup/gerrit.py similarity index 92% rename from gerrit.py rename to libup/gerrit.py index ec46810d..2f026d09 100644 --- a/gerrit.py +++ b/libup/gerrit.py @@ -1,76 +1,75 @@ #!/usr/bin/env python3 """ Common functions for Gerrit things Copyright (C) 2017 Kunal Mehta This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . """ import json -import requests import time -s = requests.Session() +from . import session def make_request(method, path, **kwargs): base = 'https://gerrit.wikimedia.org/r/' if 'auth' in kwargs: base += 'a/' - r = s.request(method, base + path, **kwargs) + r = session.request(method, base + path, **kwargs) r.raise_for_status() return json.loads(r.text[4:]) def list_projects(prefix=None): params = {} if prefix is not None: params['p'] = prefix data = make_request('GET', 'projects/', params=params) repos = set() for repo, info in data.items(): if info['state'] != 'ACTIVE': continue repos.add(repo) yield from sorted(repos) def zuul_queue_length(q='gate-and-submit'): # ?time is for cache busting, just like jQuery does - r = s.get('https://integration.wikimedia.org/zuul/status.json?' + str(time.time())) + r = session.get('https://integration.wikimedia.org/zuul/status.json?' + str(time.time())) r.raise_for_status() data = r.json() for pipeline in data['pipelines']: if pipeline['name'] != q: continue count = 0 for change_q in pipeline['change_queues']: if change_q['heads']: count += sum(len(head) for head in change_q['heads']) return count # We never found the gate-and-submit queue? return 0 def wait_for_zuul_test_gate(count: int): zuul = zuul_queue_length('gate-and-submit') + zuul_queue_length('test') while zuul > count: print('test+gate-and-submit has %s jobs, waiting...' % zuul) time.sleep(10) zuul = zuul_queue_length('gate-and-submit') + zuul_queue_length('test') diff --git a/libup/library.py b/libup/library.py index 866ea613..8d47ffff 100644 --- a/libup/library.py +++ b/libup/library.py @@ -1,130 +1,128 @@ """ Copyright (C) 2019 Kunal Mehta This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . """ from distutils.version import LooseVersion import functools -import requests import semver - -s = requests.Session() +from . import session class Library: def __init__(self, manager: str, name: str, version: str): self.manager = manager self.name = name # TODO: should version be optional? self.version = version def __lt__(self, other): return self.name < other.name @property def link(self) -> str: return { 'composer': 'https://packagist.org/packages/%s', 'npm': 'https://www.npmjs.com/package/%s', }[self.manager] % self.name def _metadata(self) -> dict: return { 'composer': _get_composer_metadata, 'npm': _get_npm_metadata, }[self.manager](self.name) def latest_version(self) -> str: return self._metadata()['latest'] def description(self) -> str: return self._metadata()['description'] def safe_versions(self) -> list: safes = _get_good_releases() try: return safes[self.manager][self.name] except KeyError: return [] def is_safe_upgrade(self, version) -> bool: """whether the specified version is a good release""" return version in self.safe_versions() def is_latest_safe(self) -> bool: """is the latest version a good release""" return self.is_safe_upgrade(self.latest_version()) def is_newer(self) -> bool: """if a newer version is available""" # Try and detect some operators to see if the current is a constraint # TODO: I don't think semver supports ^ if any(True for x in '^><=|' if x in self.version): try: # Split on | since semver doesn't support that if any( semver.match(self.latest_version(), part) for part in self.version.split('|') ): return True except ValueError: pass return False # Just do a safer/more basic semver comparison return LooseVersion(self.latest_version()) > LooseVersion(self.version) # FIXME Don't use functools/lru_cache @functools.lru_cache() def _get_composer_metadata(package: str) -> dict: - r = s.get('https://packagist.org/packages/%s.json' % package) + r = session.get('https://packagist.org/packages/%s.json' % package) resp = r.json()['package'] normalized = set() for ver in resp['versions']: if not ver.startswith('dev-') and not ver.endswith('-dev'): if ver.startswith('v'): normalized.add(ver[1:]) else: normalized.add(ver) version = max(normalized) for normal in normalized: try: if LooseVersion(normal) > LooseVersion(version): version = normal except ValueError: pass # print('Latest %s: %s' % (package, version)) return { 'latest': version, 'description': resp['description'], } @functools.lru_cache() def _get_npm_metadata(package: str) -> dict: - r = s.get('https://registry.npmjs.org/%s' % package) + r = session.get('https://registry.npmjs.org/%s' % package) resp = r.json() # print('Latest %s: %s' % (package, version)) return { 'latest': resp['dist-tags']['latest'], 'description': resp['description'], } @functools.lru_cache() def _get_good_releases() -> dict: - r = s.get('https://www.mediawiki.org/w/index.php?title=Libraryupgrader/Good_releases.json&action=raw') + r = session.get('https://www.mediawiki.org/w/index.php?title=Libraryupgrader/Good_releases.json&action=raw') return r.json() diff --git a/mw.py b/libup/mw.py similarity index 77% rename from mw.py rename to libup/mw.py index 7e5a7fae..b284d574 100644 --- a/mw.py +++ b/libup/mw.py @@ -1,74 +1,68 @@ #!/usr/bin/env python3 """ Common functions for MediaWiki stuff things. Copyright (C) 2017-2018 Kunal Mehta This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . """ -import json import wikimediaci_utils as ci +from .data import Data + BLACKLIST = [ # Per https://gerrit.wikimedia.org/r/375513 'mediawiki/extensions/MediaWikiFarm', ] def get_extension_list(library: str, version_match=None, exclude=[]): repos = set() skip = BLACKLIST + exclude for repo in ci.mw_things_repos(): if repo not in skip: repos.add(repo) yield from filter_repo_list(sorted(repos), library, version_match=version_match) def filter_repo_list(repos, library, version_match=None): for repo in repos: version = repo_info(repo, library) if version: # Skip codesniffer 19.x.0 if library == 'mediawiki/mediawiki-codesniffer' and version.startswith('19.'): continue elif library == 'mediawiki/mediawiki-phan-config' and version == '0.3.0': # Requires manual intervention to upgrade continue if not version_match or version_match != version: yield {'repo': repo, 'version': version} def repo_info(repo: str, library: str): - if library == 'npm-audit-fix': - return get_gerrit_file(repo, 'package.json') is not None - phab = get_gerrit_file(repo, 'composer.json') - if phab: - version = phab.get('require-dev', {}).get(library) - if version: - return version - if 'extra' in phab: - suffix = library.split('/')[-1] - version = phab['extra'].get(suffix) - if version: - return version - return None - - -def get_gerrit_file(gerrit_name: str, path: str): - content = ci.get_gerrit_file(gerrit_name, path) + data = Data() try: - return json.loads(content) + info = data.get_repo_data(repo) except ValueError: return None + deps = data.get_deps(info) + + if library == 'npm-audit-fix': + # Any npm deps + return bool(deps['npm']['dev'] or deps['npm']['deps']) + for lib in (deps['composer']['deps'] + deps['composer']['dev']): + if lib.name == library: + return lib.version + return None diff --git a/libup/run.py b/libup/run.py index 8a63a68c..a99e2847 100755 --- a/libup/run.py +++ b/libup/run.py @@ -1,38 +1,33 @@ #!/usr/bin/env python3 """ Builds a dashboard for PHPCS runs Copyright (C) 2017 Kunal Mehta This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . """ -from datetime import datetime -import os import wikimediaci_utils as ci -from . import DATA_ROOT, LOGS +from . import DATA_ROOT, date_log_dir from .tasks import run_check def main(): - log_dir = os.path.join(LOGS, datetime.utcnow().strftime('%Y-%m-%d')) - if not os.path.isdir(log_dir): - os.mkdir(log_dir) for repo in sorted(ci.mw_things_repos()): print(repo) - run_check.delay(repo, DATA_ROOT, log_dir) + run_check.delay(repo, DATA_ROOT, date_log_dir()) if __name__ == '__main__': main() diff --git a/upgrade.py b/libup/upgrade.py similarity index 95% rename from upgrade.py rename to libup/upgrade.py index 81a7a2bc..0de68dc0 100755 --- a/upgrade.py +++ b/libup/upgrade.py @@ -1,154 +1,150 @@ #!/usr/bin/env python3 """ Upgrades libraries! Copyright (C) 2017 Kunal Mehta This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . """ -import datetime import getpass import os import sys -import docker -import gerrit -import mw +from . import date_log_dir +from . import docker, gerrit, mw GERRIT_USER = 'libraryupgrader' CANARIES = [ 'mediawiki/extensions/Linter', 'mediawiki/extensions/MassMessage', 'mediawiki/extensions/VisualEditor', 'mediawiki/skins/MonoBook', 'oojs/ui', ] # Gerrit repos not under mediawiki/libs/ OTHER_LIBRARIES = [ 'AhoCorasick', 'CLDRPluralRuleParser', 'HtmlFormatter', 'IPSet', 'RelPath', 'RunningStat', 'VisualEditor/VisualEditor', 'WrappedString', 'at-ease', 'base-convert', 'cdb', 'css-sanitizer', 'integration/docroot', 'labs/tools/stewardbots', 'mediawiki/oauthclient-php', 'mediawiki/services/parsoid', 'mediawiki/tools/codesniffer', 'mediawiki/tools/minus-x', 'mediawiki/tools/phan', 'mediawiki/tools/phan/SecurityCheckPlugin', 'mediawiki/tools/phpunit-patch-coverage', 'oojs', 'oojs/ui', 'php-session-serializer', 'purtle', 'testing-access-wrapper', 'unicodejs', 'utfnormal', 'wikimedia/lucene-explain-parser', 'wikimedia/textcat', ] def run(repo: str, library: str, version: str, pw: str) -> str: env = { 'MODE': 'upgrade', 'REPO': repo, 'PACKAGE': library, 'VERSION': version, 'GERRIT_USER': GERRIT_USER, 'GERRIT_PW': pw, } name = repo.replace('/', '_') + library.split('/')[-1] docker.run(name, env) return name def get_safe_logs(name: str, pw: str) -> str: logs = docker.logs(name) # Prevent the password from accidentally leaking if pw: logs = logs.replace(pw, '') return logs def preprocess_filter(gen): for info in gen: yield info['repo'] def get_library_list(): yield from gerrit.list_projects('mediawiki/libs/') yield from OTHER_LIBRARIES def main(): if len(sys.argv) < 3: print('Usage: upgrade.py library version repo [limit]') sys.exit(1) library = sys.argv[1] version = sys.argv[2] repo = sys.argv[3] try: limit = int(sys.argv[4]) except IndexError: limit = None pw = getpass.getpass('HTTP Password for %s: ' % GERRIT_USER) if repo == 'extensions': repos = preprocess_filter( mw.get_extension_list(library, version_match=version, exclude=CANARIES) ) elif repo == 'canaries': repos = preprocess_filter( mw.filter_repo_list(CANARIES, library, version_match=version) ) elif repo == 'libraries': repos = preprocess_filter( mw.filter_repo_list(get_library_list(), library, version_match=version) ) else: repos = [repo] processed = set() - log_dir = os.path.join('logs', datetime.datetime.utcnow().strftime('%Y-%m-%d')) - if not os.path.isdir(log_dir): - os.mkdir(log_dir) + log_dir = date_log_dir() for repo in repos: name = run(repo, library, version, pw) processed.add(name) docker.wait_for_containers(count=0) logs = get_safe_logs(name, pw) with open(os.path.join(log_dir, name + '.log'), 'w') as f: f.write(logs) print('Saved logs to %s.log' % name) docker.remove_container(name) gerrit.wait_for_zuul_test_gate(count=3) if limit is not None and len(processed) > limit: print('Passed limit of %s, breaking' % limit) break if __name__ == '__main__': main() diff --git a/libup/web/__init__.py b/libup/web/__init__.py index 8be1b76d..fbbeca1e 100644 --- a/libup/web/__init__.py +++ b/libup/web/__init__.py @@ -1,184 +1,169 @@ """ Copyright (C) 2019 Kunal Mehta This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . """ from collections import defaultdict, OrderedDict from flask import Flask, render_template, make_response from flask_bootstrap import Bootstrap import json from markdown import markdown import os -from .. import LOGS +from .. import LOGS, MANAGERS, TYPES from ..data import Data -from ..library import Library - -MANAGERS = ['composer', 'npm'] -TYPES = ['deps', 'dev'] app = Flask(__name__) app.config['BOOTSTRAP_SERVE_LOCAL'] = True Bootstrap(app) SEVERITIES = ['critical', 'high', 'moderate', 'low', 'info'] # TODO: find some more colors? COLORS = ['danger', 'danger', 'warning', 'warning', 'info'] @app.context_processor def inject_to_templates(): return { 'sorted': sorted, 'len': len, } @app.route('/') def index(): count = len(set(Data().find_files())) return render_template('index.html', count=count) -def _get_deps(info): - deps = defaultdict(lambda: defaultdict(list)) - for manager in MANAGERS: - if info['%s-deps' % manager]: - minfo = info['%s-deps' % manager] - for type_ in TYPES: - if minfo[type_]: - for name, version in minfo[type_].items(): - deps[manager][type_].append(Library(manager, name, version)) - - return deps - - @app.route('/r/') def r(repo): + data = Data() try: - info = Data().get_repo_data(repo) + info = data.get_repo_data(repo) except ValueError: return make_response('Sorry, I don\'t know this repository.', 404) - deps = _get_deps(info) + deps = data.get_deps(info) return render_template( 'r.html', repo=repo, deps=deps, logs=find_logs(repo) ) @app.route('/library//') def library_(manager, name): if manager not in MANAGERS: return make_response('Unknown manager.', 404) used = {'deps': defaultdict(set), 'dev': defaultdict(set)} found = None - for repo, info in Data().get_data().items(): - deps = _get_deps(info) + data = Data() + for repo, info in data.get_data().items(): + deps = data.get_deps(info) if manager in deps: mdeps = deps[manager] for type_ in TYPES: for lib in mdeps[type_]: if lib.name == name: used[type_][lib.version].add(repo) found = lib if not found: return make_response('Unknown repository.', 404) return render_template( 'library.html', manager=manager, name=name, used=used, library=found, ) @app.route('/logs') def logs(): return 'Not yet implemented' def find_logs(repo): for date in os.listdir(LOGS): if date.startswith('.'): continue path = os.path.join(LOGS, date) files = os.listdir(path) old_repo = repo.replace('/', '_') yield from [os.path.join(path, x) for x in files if x.startswith(old_repo)] yield from _new_log_search( repo, [os.path.join(path, x) for x in files if x.endswith('.json')] ) def _new_log_search(repo, files): for fname in files: with open(fname) as f: if json.load(f)['repo'] == repo: yield fname @app.route('/vulns/npm') def vulns_npm(): data = Data().get_data() advisories = {} affected = defaultdict(dict) for repo, info in data.items(): if not info['npm-audit']: continue if 'error' in info['npm-audit']: # TODO: Use proper logging print(repo, info['npm-audit']) continue for a_id, a_info in info['npm-audit']['advisories'].items(): affected[int(a_id)][repo] = a_info if a_id not in advisories: advisories[a_id] = a_info advisories = OrderedDict(sorted( advisories.items(), key=lambda x: (SEVERITIES.index(x[1]['severity']), x[0]) )) def via(findings): ret = set() for finding in findings: for path in finding['paths']: ret.add(path.split('>', 1)[0]) return sorted(ret) return render_template( 'vulns_npm.html', advisories=advisories, affected=affected, markdown=markdown, SEVERITIES=SEVERITIES, COLORS=COLORS, dev_all=lambda x: all(y['dev'] for y in x), via=via, ) if __name__ == '__main__': app.run(debug=True) diff --git a/setup.py b/setup.py index 6b8e09fa..3cc054bc 100644 --- a/setup.py +++ b/setup.py @@ -1,29 +1,30 @@ from setuptools import setup setup( name='libup', version='0.0.1', packages=['libup'], url='https://www.mediawiki.org/wiki/Libraryupgrader', license='AGPL-3.0-or-later', author='Kunal Mehta', author_email='legoktm@member.fsf.org', description='semi-automated tool that manages upgrades of libraries', include_package_data=True, install_requires=[ 'requests', 'wikimediaci-utils', 'flask', 'flask-bootstrap', 'gunicorn', 'markdown', 'semver', 'celery', ], entry_points={ 'console_scripts': [ - 'libup-run = libup.run:main' + 'libup-run = libup.run:main', + 'libup-upgrade = libup.upgrade:main', ] } )