diff --git a/app.py b/app.py
index d2aea5d..6ecb883 100644
--- a/app.py
+++ b/app.py
@@ -1,316 +1,317 @@
# -*- coding: utf-8 -*-
import flask
from io import StringIO
import mwoauth
import os
import pymysql
import random
import requests
import requests_oauthlib
import string
import toolforge
import urllib.parse
import yaml
try:
from pygments.lexers import ShExCLexer
except ImportError:
have_pygments = False
else:
import pygments
from pygments.formatters import HtmlFormatter
have_pygments = True
from job import Job, null_job
from job_store import SqlJobStore, LocalFileJobStore
from job_runner import GridEngineJobRunner
from job_manager import JobManager, RejectJobDueToBlocks, RejectJobDueToPendingJobs
app = flask.Flask(__name__)
app.before_request(toolforge.redirect_to_https)
toolforge.set_user_agent('wd-shex-infer', email='mail@lucaswerkmeister.de')
user_agent = requests.utils.default_user_agent()
__dir__ = os.path.dirname(__file__)
try:
with open(os.path.join(__dir__, 'config.yaml'), 'r', encoding='utf-8') as config_file:
app.config.update(yaml.safe_load(config_file))
except FileNotFoundError:
print('config.yaml file not found, assuming local development setup')
app.secret_key = 'fake secret key so we can still use flask.session'
if 'oauth' in app.config:
consumer_token = mwoauth.ConsumerToken(app.config['oauth']['consumer_key'], app.config['oauth']['consumer_secret'])
else:
consumer_token = None
wikidata_url = 'https://www.wikidata.org/w/index.php'
connection = pymysql.connect(charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor, **app.config['database'])
job_store = LocalFileJobStore(SqlJobStore(connection), app.config['files']) # update require_connection() if implementations here change
job_runner = GridEngineJobRunner(app.config['rdf2graph'])
job_manager = JobManager(job_store, job_runner, app.config.get('blocks_directory'))
connection.close() # this connection was only to initialize the database, web requests have their own connections
@app.template_global()
def csrf_token():
if '_csrf_token' not in flask.session:
flask.session['_csrf_token'] = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(64))
return flask.session['_csrf_token']
@app.template_filter()
def time_element(datetime, previous_datetime=None):
date_format = '%d %b %Y'
separator = ', '
time_format = '%H:%M'
tz = ' UTC'
if previous_datetime is not None:
# no need to mention the time zone more than once
tz = ''
if datetime.date() == previous_datetime.date():
# no need to mention the same date again
date_format = ''
separator = ''
text = (datetime.strftime(date_format) +
separator +
datetime.strftime(time_format) +
tz)
return (flask.Markup(r''))
@app.template_filter()
def user_link(user_name):
return (flask.Markup(r'') +
flask.Markup(r'') +
flask.Markup.escape(user_name) +
flask.Markup(r'') +
flask.Markup(r''))
@app.template_filter()
def job_line(job):
return (flask.Markup(r'') +
flask.Markup.escape(job.title) +
flask.Markup(r', started on ') +
time_element(job.datetime_created) +
flask.Markup(r' by ') +
user_link(job.author_name))
def require_connection(function):
def ping(*args, **kwargs):
connection = pymysql.connect(charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor, **app.config['database'])
job_store.job_store.connection = connection
try:
return function(*args, **kwargs)
finally:
connection.close()
ping.__name__ = function.__name__
return ping
def require_job(function):
def return_if_not_job(id, *args, **kwargs):
job = job_manager.get_by_id(id)
if job is None:
return 'no such job', 404
return function(job, *args, **kwargs)
return_if_not_job = require_connection(return_if_not_job)
return_if_not_job.__name__ = function.__name__
return return_if_not_job
def require_finished_job(function):
def return_if_not_finished(job, *args, **kwargs):
if job.datetime_first_stopped is None:
return 'not yet finished', 404
return function(job, *args, **kwargs)
return_if_not_finished_job = require_job(return_if_not_finished)
return_if_not_finished_job.__name__ = function.__name__
return return_if_not_finished_job
+def render_template(template_name, add_manager_data=False, form_data=None, **kwargs):
+ if add_manager_data:
+ kwargs.setdefault('blocks', job_manager.get_blocks())
+ kwargs.setdefault('pending_jobs', job_manager.get_pending_jobs())
+ if form_data:
+ kwargs.setdefault('title', form_data.get('title'))
+ kwargs.setdefault('description', form_data.get('description'))
+ kwargs.setdefault('url', form_data.get('url'))
+ kwargs.setdefault('sparql', form_data.get('sparql'))
+ return flask.render_template(template_name, **kwargs)
+
+
@app.route('/')
@require_connection
def index():
- return flask.render_template('index.html',
- pending_jobs=job_manager.get_pending_jobs(),
- finished_jobs=job_manager.get_finished_jobs())
+ return render_template('index.html',
+ add_manager_data=True,
+ finished_jobs=job_manager.get_finished_jobs())
@app.route('/job/new', methods=['GET', 'POST'])
@require_connection
def new_job():
response = if_needs_oauth_redirect()
if response:
return response
identity = identify()
if not identity['confirmed_email']:
return 'must have confirmed email', 403
if identity['blocked']:
return 'must not be blocked', 403
if flask.request.method == 'GET':
- return flask.render_template('new-job.html',
- blocks=job_manager.get_blocks(),
- pending_jobs=job_manager.get_pending_jobs())
+ return render_template('new-job.html',
+ add_manager_data=True)
form_data = flask.request.form
response = if_needs_csrf_redirect(form_data)
if response:
return response
job = null_job._replace(author_name=identity['username'],
title=form_data['title'],
description=form_data.get('description'),
url=form_data.get('url'),
input_sparql=StringIO(form_data['sparql']))
try:
job = job_manager.run(job)
except RejectJobDueToBlocks as reject:
- return flask.render_template('new-job.html',
- blocks=reject.blocks,
- pending_jobs=job_manager.get_pending_jobs(),
- title=form_data.get('title'),
- description=form_data.get('description'),
- url=form_data.get('url'),
- sparql=form_data.get('sparql'),
- rejected_due_to_blocks=True)
+ return render_template('new_job.html',
+ add_manager_data=True,
+ blocks=reject.blocks,
+ form_data=form_data,
+ rejected_due_to_blocks=True)
except RejectJobDueToPendingJobs as reject:
- return flask.render_template('new-job.html',
- blocks=job_manager.get_blocks(),
- pending_jobs=reject.pending_jobs,
- title=form_data.get('title'),
- description=form_data.get('description'),
- url=form_data.get('url'),
- sparql=form_data.get('sparql'),
- rejected_due_to_pending_jobs=True)
+ return render_template('new-job.html',
+ add_manager_data=True,
+ pending_jobs=reject.pending_jobs,
+ form_data=form_data,
+ rejected_due_to_pending_jobs=True)
else:
return flask.redirect(flask.url_for('view_job', id=job.id))
@app.route('/job/')
@require_job
def view_job(job):
- return flask.render_template('job.html',
- job=job,
- wdqs_url=sparql_to_wdqs_url(job.input_sparql))
+ return render_template('job.html',
+ job=job,
+ wdqs_url=sparql_to_wdqs_url(job.input_sparql))
@app.route('/job//sparql')
@require_job
def view_job_sparql(job):
return flask.Response(job.input_sparql,
mimetype='application/sparql-query')
@app.route('/job//shex')
@require_finished_job
def view_job_shex(job):
if job.output_shex is None:
return 'this job did not produce any output', 410
if have_pygments and flask.request.accept_mimetypes.accept_html:
shex = job.output_shex.read()
job.output_shex.seek(0)
formatter = HtmlFormatter()
shexHtml = pygments.highlight(shex, ShExCLexer(), formatter)
- return flask.render_template('shex.html',
- title=job.title,
- css=formatter.get_style_defs('#shex'),
- shexHtml=shexHtml)
+ return render_template('shex.html',
+ title=job.title,
+ css=formatter.get_style_defs('#shex'),
+ shexHtml=shexHtml)
else:
return flask.Response(job.output_shex,
mimetype='text/shex')
@app.route('/job//stdout')
@require_finished_job
def view_job_stdout(job):
return flask.Response(job.output_stdout,
mimetype='text/plain')
@app.route('/job//stderr')
@require_finished_job
def view_job_stderr(job):
return flask.Response(job.output_stderr,
mimetype='text/plain')
def if_needs_oauth_redirect():
if not consumer_token:
return None # development setup
if 'oauth_access_token' in flask.session:
return None # already authenticated
redirect, request_token = mwoauth.initiate(wikidata_url,
consumer_token,
user_agent=user_agent)
flask.session['oauth_request_token'] = dict(zip(request_token._fields, request_token))
flask.session['oauth_redirect_target'] = flask.url_for(flask.request.endpoint, **flask.request.view_args)
return flask.redirect(redirect)
def if_needs_csrf_redirect(form_data):
token = flask.session.pop('_csrf_token', None)
if not token or token != form_data.get('_csrf_token'):
- return flask.render_template('new-job.html',
- blocks=job_manager.get_blocks(),
- pending_jobs=job_manager.get_pending_jobs(),
- title=form_data.get('title'),
- description=form_data.get('description'),
- url=form_data.get('url'),
- sparql=form_data.get('sparql'),
- csrf_error=True)
+ return render_template('new-job.html',
+ add_manager_data=True,
+ form_data=form_data,
+ csrf_error=True)
else:
return None
@app.route('/oauth/callback')
def oauth_callback():
access_token = mwoauth.complete(wikidata_url,
consumer_token,
mwoauth.RequestToken(**flask.session['oauth_request_token']),
flask.request.query_string,
user_agent=user_agent)
flask.session['oauth_access_token'] = dict(zip(access_token._fields, access_token))
return flask.redirect(flask.session['oauth_redirect_target'])
def identify():
if not consumer_token:
return {'username': '###TEST USER###', 'fake': True} # development setup
access_token = mwoauth.AccessToken(**flask.session['oauth_access_token'])
return mwoauth.identify(wikidata_url,
consumer_token,
access_token)
def sparql_to_wdqs_url(sparql_io):
sparql_str = sparql_io.read(4096)
sparql_io.seek(0)
if len(sparql_str) < 4096:
return 'https://query.wikidata.org/#' + urllib.parse.quote(sparql_str)
else:
return None