diff --git a/app.py b/app.py index 1656e18..8b9e804 100644 --- a/app.py +++ b/app.py @@ -1,287 +1,286 @@ # -*- coding: utf-8 -*- import bs4 import decorator import flask import mwapi import mwoauth import os import random import requests import requests_oauthlib import string import toolforge import yaml app = flask.Flask(__name__) app.before_request(toolforge.redirect_to_https) toolforge.set_user_agent('speedpatrolling', email='mail@lucaswerkmeister.de') user_agent = requests.utils.default_user_agent() __dir__ = os.path.dirname(__file__) try: with open(os.path.join(__dir__, 'config.yaml')) as config_file: app.config.update(yaml.safe_load(config_file)) except FileNotFoundError: print('config.yaml file not found, assuming local development setup') app.secret_key = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(64)) if 'oauth' in app.config: consumer_token = mwoauth.ConsumerToken(app.config['oauth']['consumer_key'], app.config['oauth']['consumer_secret']) @decorator.decorator def memoize(func, *args, **kwargs): if args or kwargs: raise TypeError('only memoize functions with no arguments') key = '_memoize_' + func.__name__ if key not in flask.g: setattr(flask.g, key, func()) return getattr(flask.g, key) @app.template_global() def csrf_token(): if 'csrf_token' not in flask.session: flask.session['csrf_token'] = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(64)) return flask.session['csrf_token'] @app.template_global() def form_value(name): if 'repeat_form' in flask.g and name in flask.request.form: return (flask.Markup(r' value="') + flask.Markup.escape(flask.request.form[name]) + flask.Markup(r'" ')) else: return flask.Markup() @app.template_global() def form_attributes(name): return (flask.Markup(r' id="') + flask.Markup.escape(name) + flask.Markup(r'" name="') + flask.Markup.escape(name) + flask.Markup(r'" ') + form_value(name)) @app.template_filter() def user_link(user_name): return (flask.Markup(r'') + flask.Markup(r'') + flask.Markup.escape(user_name) + flask.Markup(r'') + flask.Markup(r'')) @app.template_global() def user_logged_in(): return 'oauth_access_token' in flask.session @app.template_global() def authentication_area(): if 'oauth' not in app.config: return flask.Markup() if not user_logged_in(): return (flask.Markup(r'Log in')) access_token = mwoauth.AccessToken(**flask.session['oauth_access_token']) identity = mwoauth.identify('https://www.wikidata.org/w/index.php', consumer_token, access_token) return (flask.Markup(r'Logged in as ') + user_link(identity['username']) + flask.Markup(r'')) @memoize def authenticated_session(): if 'oauth_access_token' in flask.session: access_token = mwoauth.AccessToken(**flask.session['oauth_access_token']) auth = requests_oauthlib.OAuth1(client_key=consumer_token.key, client_secret=consumer_token.secret, resource_owner_key=access_token.key, resource_owner_secret=access_token.secret) return mwapi.Session(host='https://www.wikidata.org', auth=auth, user_agent=user_agent) else: return None def unpatrolled_changes(): session = authenticated_session() for result in session.get(action='query', list='recentchanges', rcprop=['ids'], rcshow='unpatrolled', rctype=['edit'], # TODO consider including 'new' as well rclimit='max', continuation=True): for change in result['query']['recentchanges']: yield change['revid'] @memoize def user_rights(): session = authenticated_session() if session is None: return [] return session.get(action='query', meta='userinfo', uiprop='rights')['query']['userinfo']['rights'] @app.template_global() def user_can_patrol(): return 'patrol' in user_rights() @app.template_global() def user_can_rollback(): return 'rollback' in user_rights() @app.route('/') def index(): return flask.render_template('index.html') @app.route('/diff/') def any_diff(): if not user_logged_in(): return flask.redirect(flask.url_for('login')) skipped_ids = flask.session.get('skipped_ids', []) skipped_ids.sort(reverse=True) - print(skipped_ids) del skipped_ids[1000:] flask.session['skipped_ids'] = skipped_ids for id in unpatrolled_changes(): if id in skipped_ids: continue return flask.redirect(flask.url_for('diff', id=id)) @app.route('/diff//') def diff(id): session = authenticated_session() results = session.get(action='compare', fromrev=id, torelative='prev', prop=['title', 'user', 'parsedcomment', 'diff'], formatversion=2)['compare'] return flask.render_template('diff.html', id=id, title=results['totitle'], old_user=results['fromuser'], new_user=results['touser'], old_comment=fix_markup(results['fromparsedcomment']), new_comment=fix_markup(results['toparsedcomment']), body=fix_markup(results['body'])) @app.route('/diff//skip', methods=['POST']) def diff_skip(id): if not submitted_request_valid(): return 'CSRF error', 400 skipped_ids = flask.session.get('skipped_ids', []) skipped_ids.append(id) flask.session['skipped_ids'] = skipped_ids return flask.redirect(flask.url_for('any_diff')) @app.route('/diff//patrol', methods=['POST']) def diff_patrol(id): if not submitted_request_valid(): return 'CSRF error', 400 session = authenticated_session() token = session.get(action='query', meta='tokens', type='patrol')['query']['tokens']['patroltoken'] session.post(action='patrol', revid=id, token=token) return flask.redirect(flask.url_for('any_diff')) @app.route('/diff//rollback', methods=['POST']) def diff_rollback(id): if not submitted_request_valid(): return 'CSRF error', 400 session = authenticated_session() results = session.get(action='query', meta='tokens', type='rollback', revids=[str(id)], prop='revisions', rvprop='user', formatversion='2') token = results['query']['tokens']['rollbacktoken'] page = results['query']['pages'][0] pageid = page['pageid'] user = page['revisions'][0]['user'] session.post(action='rollback', pageid=pageid, user=user, token=token) return flask.redirect(flask.url_for('any_diff')) @app.route('/login') def login(): redirect, request_token = mwoauth.initiate('https://www.wikidata.org/w/index.php', consumer_token, user_agent=user_agent) flask.session['oauth_request_token'] = dict(zip(request_token._fields, request_token)) return flask.redirect(redirect) @app.route('/oauth/callback') def oauth_callback(): request_token = mwoauth.RequestToken(**flask.session['oauth_request_token']) access_token = mwoauth.complete('https://www.wikidata.org/w/index.php', consumer_token, request_token, flask.request.query_string, user_agent=user_agent) flask.session['oauth_access_token'] = dict(zip(access_token._fields, access_token)) return flask.redirect(flask.url_for('index')) def fix_markup(html): soup = bs4.BeautifulSoup(html, 'html.parser') for link in soup.select('a[href]'): href = link['href'] if href.startswith('/') and not href.startswith('//'): link['href'] = 'https://www.wikidata.org' + href return flask.Markup(str(soup)) def full_url(endpoint, **kwargs): scheme=flask.request.headers.get('X-Forwarded-Proto', 'http') return flask.url_for(endpoint, _external=True, _scheme=scheme, **kwargs) def submitted_request_valid(): """Check whether a submitted POST request is valid. If this method returns False, the request might have been issued by an attacker as part of a Cross-Site Request Forgery attack; callers MUST NOT process the request in that case. """ real_token = flask.session.pop('csrf_token', None) submitted_token = flask.request.form.get('csrf_token', None) if not real_token: # we never expected a POST return False if not submitted_token: # token got lost or attacker did not supply it return False if submitted_token != real_token: # incorrect token (could be outdated or incorrectly forged) return False if not flask.request.referrer.startswith(full_url('index')): # correct token but not coming from the correct page; for # example, JS running on https://tools.wmflabs.org/tool-a is # allowed to access https://tools.wmflabs.org/tool-b and # extract CSRF tokens from it (since both of these pages are # hosted on the https://tools.wmflabs.org domain), so checking # the Referer header is our only protection against attackers # from other Toolforge tools return False return True @app.after_request def deny_frame(response): """Disallow embedding the tool’s pages in other websites. If other websites can embed this tool’s pages, e. g. in