diff --git a/app.py b/app.py index d2aea5d..6ecb883 100644 --- a/app.py +++ b/app.py @@ -1,316 +1,317 @@ # -*- coding: utf-8 -*- import flask from io import StringIO import mwoauth import os import pymysql import random import requests import requests_oauthlib import string import toolforge import urllib.parse import yaml try: from pygments.lexers import ShExCLexer except ImportError: have_pygments = False else: import pygments from pygments.formatters import HtmlFormatter have_pygments = True from job import Job, null_job from job_store import SqlJobStore, LocalFileJobStore from job_runner import GridEngineJobRunner from job_manager import JobManager, RejectJobDueToBlocks, RejectJobDueToPendingJobs app = flask.Flask(__name__) app.before_request(toolforge.redirect_to_https) toolforge.set_user_agent('wd-shex-infer', email='mail@lucaswerkmeister.de') user_agent = requests.utils.default_user_agent() __dir__ = os.path.dirname(__file__) try: with open(os.path.join(__dir__, 'config.yaml'), 'r', encoding='utf-8') as config_file: app.config.update(yaml.safe_load(config_file)) except FileNotFoundError: print('config.yaml file not found, assuming local development setup') app.secret_key = 'fake secret key so we can still use flask.session' if 'oauth' in app.config: consumer_token = mwoauth.ConsumerToken(app.config['oauth']['consumer_key'], app.config['oauth']['consumer_secret']) else: consumer_token = None wikidata_url = 'https://www.wikidata.org/w/index.php' connection = pymysql.connect(charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor, **app.config['database']) job_store = LocalFileJobStore(SqlJobStore(connection), app.config['files']) # update require_connection() if implementations here change job_runner = GridEngineJobRunner(app.config['rdf2graph']) job_manager = JobManager(job_store, job_runner, app.config.get('blocks_directory')) connection.close() # this connection was only to initialize the database, web requests have their own connections @app.template_global() def csrf_token(): if '_csrf_token' not in flask.session: flask.session['_csrf_token'] = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(64)) return flask.session['_csrf_token'] @app.template_filter() def time_element(datetime, previous_datetime=None): date_format = '%d %b %Y' separator = ', ' time_format = '%H:%M' tz = ' UTC' if previous_datetime is not None: # no need to mention the time zone more than once tz = '' if datetime.date() == previous_datetime.date(): # no need to mention the same date again date_format = '' separator = '' text = (datetime.strftime(date_format) + separator + datetime.strftime(time_format) + tz) return (flask.Markup(r'')) @app.template_filter() def user_link(user_name): return (flask.Markup(r'') + flask.Markup(r'') + flask.Markup.escape(user_name) + flask.Markup(r'') + flask.Markup(r'')) @app.template_filter() def job_line(job): return (flask.Markup(r'') + flask.Markup.escape(job.title) + flask.Markup(r', started on ') + time_element(job.datetime_created) + flask.Markup(r' by ') + user_link(job.author_name)) def require_connection(function): def ping(*args, **kwargs): connection = pymysql.connect(charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor, **app.config['database']) job_store.job_store.connection = connection try: return function(*args, **kwargs) finally: connection.close() ping.__name__ = function.__name__ return ping def require_job(function): def return_if_not_job(id, *args, **kwargs): job = job_manager.get_by_id(id) if job is None: return 'no such job', 404 return function(job, *args, **kwargs) return_if_not_job = require_connection(return_if_not_job) return_if_not_job.__name__ = function.__name__ return return_if_not_job def require_finished_job(function): def return_if_not_finished(job, *args, **kwargs): if job.datetime_first_stopped is None: return 'not yet finished', 404 return function(job, *args, **kwargs) return_if_not_finished_job = require_job(return_if_not_finished) return_if_not_finished_job.__name__ = function.__name__ return return_if_not_finished_job +def render_template(template_name, add_manager_data=False, form_data=None, **kwargs): + if add_manager_data: + kwargs.setdefault('blocks', job_manager.get_blocks()) + kwargs.setdefault('pending_jobs', job_manager.get_pending_jobs()) + if form_data: + kwargs.setdefault('title', form_data.get('title')) + kwargs.setdefault('description', form_data.get('description')) + kwargs.setdefault('url', form_data.get('url')) + kwargs.setdefault('sparql', form_data.get('sparql')) + return flask.render_template(template_name, **kwargs) + + @app.route('/') @require_connection def index(): - return flask.render_template('index.html', - pending_jobs=job_manager.get_pending_jobs(), - finished_jobs=job_manager.get_finished_jobs()) + return render_template('index.html', + add_manager_data=True, + finished_jobs=job_manager.get_finished_jobs()) @app.route('/job/new', methods=['GET', 'POST']) @require_connection def new_job(): response = if_needs_oauth_redirect() if response: return response identity = identify() if not identity['confirmed_email']: return 'must have confirmed email', 403 if identity['blocked']: return 'must not be blocked', 403 if flask.request.method == 'GET': - return flask.render_template('new-job.html', - blocks=job_manager.get_blocks(), - pending_jobs=job_manager.get_pending_jobs()) + return render_template('new-job.html', + add_manager_data=True) form_data = flask.request.form response = if_needs_csrf_redirect(form_data) if response: return response job = null_job._replace(author_name=identity['username'], title=form_data['title'], description=form_data.get('description'), url=form_data.get('url'), input_sparql=StringIO(form_data['sparql'])) try: job = job_manager.run(job) except RejectJobDueToBlocks as reject: - return flask.render_template('new-job.html', - blocks=reject.blocks, - pending_jobs=job_manager.get_pending_jobs(), - title=form_data.get('title'), - description=form_data.get('description'), - url=form_data.get('url'), - sparql=form_data.get('sparql'), - rejected_due_to_blocks=True) + return render_template('new_job.html', + add_manager_data=True, + blocks=reject.blocks, + form_data=form_data, + rejected_due_to_blocks=True) except RejectJobDueToPendingJobs as reject: - return flask.render_template('new-job.html', - blocks=job_manager.get_blocks(), - pending_jobs=reject.pending_jobs, - title=form_data.get('title'), - description=form_data.get('description'), - url=form_data.get('url'), - sparql=form_data.get('sparql'), - rejected_due_to_pending_jobs=True) + return render_template('new-job.html', + add_manager_data=True, + pending_jobs=reject.pending_jobs, + form_data=form_data, + rejected_due_to_pending_jobs=True) else: return flask.redirect(flask.url_for('view_job', id=job.id)) @app.route('/job/') @require_job def view_job(job): - return flask.render_template('job.html', - job=job, - wdqs_url=sparql_to_wdqs_url(job.input_sparql)) + return render_template('job.html', + job=job, + wdqs_url=sparql_to_wdqs_url(job.input_sparql)) @app.route('/job//sparql') @require_job def view_job_sparql(job): return flask.Response(job.input_sparql, mimetype='application/sparql-query') @app.route('/job//shex') @require_finished_job def view_job_shex(job): if job.output_shex is None: return 'this job did not produce any output', 410 if have_pygments and flask.request.accept_mimetypes.accept_html: shex = job.output_shex.read() job.output_shex.seek(0) formatter = HtmlFormatter() shexHtml = pygments.highlight(shex, ShExCLexer(), formatter) - return flask.render_template('shex.html', - title=job.title, - css=formatter.get_style_defs('#shex'), - shexHtml=shexHtml) + return render_template('shex.html', + title=job.title, + css=formatter.get_style_defs('#shex'), + shexHtml=shexHtml) else: return flask.Response(job.output_shex, mimetype='text/shex') @app.route('/job//stdout') @require_finished_job def view_job_stdout(job): return flask.Response(job.output_stdout, mimetype='text/plain') @app.route('/job//stderr') @require_finished_job def view_job_stderr(job): return flask.Response(job.output_stderr, mimetype='text/plain') def if_needs_oauth_redirect(): if not consumer_token: return None # development setup if 'oauth_access_token' in flask.session: return None # already authenticated redirect, request_token = mwoauth.initiate(wikidata_url, consumer_token, user_agent=user_agent) flask.session['oauth_request_token'] = dict(zip(request_token._fields, request_token)) flask.session['oauth_redirect_target'] = flask.url_for(flask.request.endpoint, **flask.request.view_args) return flask.redirect(redirect) def if_needs_csrf_redirect(form_data): token = flask.session.pop('_csrf_token', None) if not token or token != form_data.get('_csrf_token'): - return flask.render_template('new-job.html', - blocks=job_manager.get_blocks(), - pending_jobs=job_manager.get_pending_jobs(), - title=form_data.get('title'), - description=form_data.get('description'), - url=form_data.get('url'), - sparql=form_data.get('sparql'), - csrf_error=True) + return render_template('new-job.html', + add_manager_data=True, + form_data=form_data, + csrf_error=True) else: return None @app.route('/oauth/callback') def oauth_callback(): access_token = mwoauth.complete(wikidata_url, consumer_token, mwoauth.RequestToken(**flask.session['oauth_request_token']), flask.request.query_string, user_agent=user_agent) flask.session['oauth_access_token'] = dict(zip(access_token._fields, access_token)) return flask.redirect(flask.session['oauth_redirect_target']) def identify(): if not consumer_token: return {'username': '###TEST USER###', 'fake': True} # development setup access_token = mwoauth.AccessToken(**flask.session['oauth_access_token']) return mwoauth.identify(wikidata_url, consumer_token, access_token) def sparql_to_wdqs_url(sparql_io): sparql_str = sparql_io.read(4096) sparql_io.seek(0) if len(sparql_str) < 4096: return 'https://query.wikidata.org/#' + urllib.parse.quote(sparql_str) else: return None