diff --git a/README.md b/README.md index 88f5790..185cdf7 100644 --- a/README.md +++ b/README.md @@ -1,79 +1,81 @@ # Wikidata Image Positions [This tool](https://wd-image-positions.toolforge.org/) shows [relative position within image](https://www.wikidata.org/wiki/Property:P2677) qualifiers of [depicts](https://www.wikidata.org/wiki/Property:P180) statements on [Wikidata](https://www.wikidata.org/) items as areas on the item’s [image](https://www.wikidata.org/wiki/Property:P18) (or other property). +It also supports [Wikimedia Commons](https://commons.wikimedia.org/) files, +where the [named place on map](https://www.wikidata.org/wiki/Property:P9664) property is used in a similar way. Examples: * [The Coronation of Napoleon](https://wd-image-positions.toolforge.org/item/Q1231009) * [Situation Room](https://wd-image-positions.toolforge.org/item/Q2915674) For more usage information, please see the tool’s [on-wiki documentation page](https://www.wikidata.org/wiki/User:Lucas_Werkmeister/Wikidata_Image_Positions). ## Toolforge setup On Wikimedia Toolforge, this tool runs under the `wd-image-positions` tool name. Source code resides in `~/www/python/src/`, a virtual environment is set up in `~/www/python/venv/`, logs end up in `~/uwsgi.log`. If the web service is not running for some reason, run the following command: ``` webservice start ``` If it’s acting up, try the same command with `restart` instead of `start`. Both should pull their config from the `service.template` file in the source code directory. To update the service, run the following commands after becoming the tool account: ``` cd ~/www/python/src git fetch git diff @ @{u} # inspect changes git merge --ff-only @{u} webservice restart ``` If there were new changes in the Python environment (e.g. new dependencies), add the following steps before the `webservice restart`: ``` webservice shell source ~/www/python/venv/bin/activate pip-sync ~/www/python/src/requirements.txt ``` ## Local development setup You can also run the tool locally, which is much more convenient for development (for example, Flask will automatically reload the application any time you save a file). ``` git clone https://phabricator.wikimedia.org/source/tool-wd-image-positions.git cd tool-wd-image-positions pip3 install -r requirements.txt FLASK_ENV=development flask run ``` If you want, you can do this inside some virtualenv too. ## Contributing To send a patch, you can use any of the following methods: * [Submit a pull request on GitHub.](https://github.com/lucaswerkmeister/tool-wd-image-positions) * Use `git send-email`. (Send the patch(es) to the email address from the Git commit history.) * Upload the changes to a repository of your own and use `git request-pull` (same email address). * Upload a diff on [GitHub Gist](https://gist.github.com/) and send the link to the tool’s maintainer(s) via email, Twitter, on-wiki message, or whatever. * [Create a Diff on Phabricator.](https://phabricator.wikimedia.org/differential/diff/create/) Make sure to add @LucasWerkmeister as subscriber. They’re listed in the maintainer(s)’ order of preference, from most to least preferred, but feel free to use any of these methods as it best suits you. ## License The code in this repository is released under the AGPL v3, as provided in the `LICENSE` file. diff --git a/app.py b/app.py index 64673ba..0b6d4b8 100644 --- a/app.py +++ b/app.py @@ -1,881 +1,894 @@ # -*- coding: utf-8 -*- import collections import decorator import flask import iiif_prezi.factory import json import mwapi import mwoauth import os import random import requests import requests_oauthlib import stat import string import toolforge import yaml from exceptions import * import messages app = flask.Flask(__name__) app.jinja_env.add_extension('jinja2.ext.do') user_agent = toolforge.set_user_agent('lexeme-forms', email='mail@lucaswerkmeister.de') requests_session = requests.Session() requests_session.headers.update({ 'Accept': 'application/json', 'User-Agent': user_agent, }) default_property = 'P18' +depicted_properties = { + # labels are used in image.html with “… with no region specified” before a list + 'P180': 'Depicted', + 'P9664': 'Named places on map', + # note: currently, these must be item-type properties; + # support for other data types (e.g. P1684 inscription) needs more work +} +app.add_template_global(lambda: depicted_properties, 'depicted_properties') + @decorator.decorator def read_private(func, *args, **kwargs): try: f = args[0] fd = f.fileno() except AttributeError: pass except IndexError: pass else: mode = os.stat(fd).st_mode if (stat.S_IRGRP | stat.S_IROTH) & mode: raise ValueError(f'{getattr(f, "name", "config file")} is readable to others, ' 'must be exclusively user-readable!') return func(*args, **kwargs) has_config = app.config.from_file('config.yaml', load=read_private(yaml.safe_load), silent=True) if has_config: consumer_token = mwoauth.ConsumerToken(app.config['OAUTH']['consumer_key'], app.config['OAUTH']['consumer_secret']) else: print('config.yaml file not found, assuming local development setup') app.secret_key = 'fake' def anonymous_session(domain): host = 'https://' + domain return mwapi.Session(host=host, user_agent=user_agent, formatversion=2) def authenticated_session(domain): if 'oauth_access_token' not in flask.session: return None host = 'https://' + domain access_token = mwoauth.AccessToken(**flask.session['oauth_access_token']) auth = requests_oauthlib.OAuth1(client_key=consumer_token.key, client_secret=consumer_token.secret, resource_owner_key=access_token.key, resource_owner_secret=access_token.secret) return mwapi.Session(host=host, auth=auth, user_agent=user_agent, formatversion=2) @decorator.decorator def enableCORS(func, *args, **kwargs): rv = func(*args, **kwargs) response = flask.make_response(rv) response.headers['Access-Control-Allow-Origin'] = '*' return response @app.route('/', methods=['GET', 'POST']) def index(): if flask.request.method == 'POST': if 'item_id' in flask.request.form: item_id = flask.request.form['item_id'] property_id = flask.request.form.get('property_id') if 'manifest' in flask.request.form or 'preview' in flask.request.form: manifest_url = full_url('iiif_manifest_with_property', item_id=item_id, property_id=property_id or default_property) if 'manifest' in flask.request.form: return flask.redirect(manifest_url) else: mirador_protocol = 'https' if manifest_url.startswith('https') else 'http' mirador_url = mirador_protocol + '://tools.wmflabs.org/mirador/?manifest=' + manifest_url return flask.redirect(mirador_url) else: if property_id: return flask.redirect(flask.url_for('item_and_property', item_id=item_id, property_id=property_id)) else: return flask.redirect(flask.url_for('item', item_id=item_id)) if 'iiif_region' in flask.request.form: iiif_region = flask.request.form['iiif_region'] property_id = flask.request.form.get('property_id') if property_id: return flask.redirect(flask.url_for('iiif_region_and_property', iiif_region=iiif_region, property_id=property_id)) else: return flask.redirect(flask.url_for('iiif_region', iiif_region=iiif_region)) if 'image_title' in flask.request.form: image_title = flask.request.form['image_title'] if image_title.startswith('File:'): image_title = image_title[len('File:'):] image_title = image_title.replace(' ', '_') return flask.redirect(flask.url_for('file', image_title=image_title)) return flask.render_template('index.html') @app.route('/login') def login(): redirect, request_token = mwoauth.initiate('https://www.wikidata.org/w/index.php', consumer_token, user_agent=user_agent) flask.session['oauth_request_token'] = dict(zip(request_token._fields, request_token)) return flask.redirect(redirect) @app.route('/oauth/callback') def oauth_callback(): oauth_request_token = flask.session.pop('oauth_request_token', None) if oauth_request_token is None: return flask.render_template('oauth-callback-without-request-token.html', already_logged_in='oauth_access_token' in flask.session, query_string=flask.request.query_string.decode(flask.request.url_charset)) access_token = mwoauth.complete('https://www.wikidata.org/w/index.php', consumer_token, mwoauth.RequestToken(**oauth_request_token), flask.request.query_string, user_agent=user_agent) flask.session['oauth_access_token'] = dict(zip(access_token._fields, access_token)) flask.session.pop('_csrf_token', None) return flask.redirect(flask.url_for('index')) @app.route('/logout') def logout(): flask.session.pop('oauth_request_token', None) return flask.redirect(flask.url_for('index')) @app.route('/item/') def item(item_id): return item_and_property(item_id, property_id=default_property) @app.route('/item//') def item_and_property(item_id, property_id): item = load_item_and_property(item_id, property_id, include_depicteds=True) if 'image_title' not in item: return flask.render_template('item-without-image.html', **item) return flask.render_template('item.html', **item) @app.route('/iiif//manifest.json') def iiif_manifest(item_id): return flask.redirect(flask.url_for('iiif_manifest_with_property', item_id=item_id, property_id=default_property)) @app.route('/iiif///manifest.json') @enableCORS def iiif_manifest_with_property(item_id, property_id): item = load_item_and_property(item_id, property_id, include_description=True, include_metadata=True) if 'image_title' not in item: return '', 404 manifest = build_manifest(item) return flask.jsonify(manifest.toJSON(top=True)) @app.route('/iiif//list/annotations.json') def iiif_annotations(item_id): return iiif_annotations_with_property(item_id, property_id=default_property) @app.route('/iiif///list/annotations.json') @enableCORS def iiif_annotations_with_property(item_id, property_id): item = load_item_and_property(item_id, property_id, include_depicteds=True) url = flask.url_for('iiif_annotations_with_property', item_id=item_id, property_id=property_id, _external=True, _scheme=flask.request.headers.get('X-Forwarded-Proto', 'http')) annolist = { '@id': url, '@type': 'sc:AnnotationList', 'label': 'Annotations for ' + item['label']['value'], 'resources': [] } if 'image_title' not in item: return flask.jsonify(annolist) canvas_url = url[:-len('list/annotations.json')] + 'canvas/c0.json' # Although the pct canvas is OK for the image API, we need to target # canvas coordinates with the annotations, so we need the w,h image_info = load_image_info(item['image_title']) width, height = int(image_info['thumbwidth']), int(image_info['thumbheight']) for depicted in item['depicteds']: if 'item_id' not in depicted: continue # somevalue/novalue not supported for now link = 'http://www.wikidata.org/entity/' + flask.Markup.escape(depicted['item_id']) label = depicted['label']['value'] # We can put a lot more in here, but minimum for now, and ensure works in Mirador anno = { '@id': '#' + depicted['statement_id'], '@type': 'oa:Annotation', 'motivation': 'identifying', 'on': canvas_url, 'resource': { '@id': link, 'format': 'text/plain', 'chars': label } } iiif_region = depicted.get('iiif_region', None) if iiif_region: parts = iiif_region.replace('pct:', '').split(',') x = int(float(parts[0])*width/100) y = int(float(parts[1])*height/100) w = int(float(parts[2])*width/100) h = int(float(parts[3])*height/100) anno['on'] = anno['on'] + '#xywh=' + ','.join(str(d) for d in [x,y,w,h]) annolist['resources'].append(anno) return flask.jsonify(annolist) @app.route('/iiif_region/') def iiif_region(iiif_region): return iiif_region_and_property(iiif_region, default_property) @app.route('/iiif_region//') def iiif_region_and_property(iiif_region, property_id): + iiif_region_string = '"' + iiif_region.replace('\\', '\\\\').replace('"', '\\"') + '"' + property_claim_predicates = ' '.join(f'p:{property_id}' for property_id in depicted_properties) query = ''' SELECT DISTINCT ?item WHERE { - ?item p:P180/pq:P2677 "%s". + VALUES ?p { %s } + ?item ?p [ pq:P2677 %s ]. } - ''' % iiif_region.replace('\\', '\\\\').replace('"', '\\"') + ''' % (property_claim_predicates, iiif_region_string) query_results = requests_session.get('https://query.wikidata.org/sparql', params={'query': query}).json() items = [] items_without_image = [] for result in query_results['results']['bindings']: item_id = result['item']['value'][len('http://www.wikidata.org/entity/'):] item = load_item_and_property(item_id, property_id, include_depicteds=True) if 'image_title' not in item: items_without_image.append(item_id) else: items.append(item) return flask.render_template('iiif_region.html', items=items, items_without_image=items_without_image) @app.route('/file/') def file(image_title): image_title_ = image_title.replace(' ', '_') if image_title_.startswith('File:'): image_title_ = image_title_[len('File:'):] if image_title_ != image_title: return flask.redirect(flask.url_for('file', image_title=image_title_, **flask.request.args)) file = load_file(image_title.replace('_', ' ')) if not file: return flask.render_template('file-not-found.html', title=image_title), 404 return flask.render_template('file.html', **file) @app.route('/api/v1/depicteds_html/file/') @enableCORS def file_depicteds_html(image_title): file = load_file(image_title.replace('_', ' ')) if not file: return flask.render_template('file-not-found.html', title=image_title), 404 return flask.render_template('depicteds.html', depicteds=file['depicteds']) @app.route('/api/v1/add_statement/', methods=['POST']) def api_add_statement(domain): language_codes = request_language_codes() entity_id = flask.request.form.get('entity_id') snaktype = flask.request.form.get('snaktype') item_id = flask.request.form.get('item_id') csrf_token = flask.request.form.get('_csrf_token') if not entity_id or not snaktype or not csrf_token: return 'Incomplete form data', 400 if (snaktype == 'value') != (item_id is not None): return 'Inconsistent form data', 400 if snaktype not in {'value', 'somevalue', 'novalue'}: return 'Bad snaktype', 400 if csrf_token != flask.session['_csrf_token']: return 'Wrong CSRF token (try reloading the page).', 403 if not flask.request.referrer.startswith(full_url('index')): return 'Wrong Referer header', 403 if domain not in {'www.wikidata.org', 'commons.wikimedia.org'}: return 'Unsupported domain', 403 session = authenticated_session(domain) if session is None: return 'Not logged in', 403 token = session.get(action='query', meta='tokens', type='csrf')['query']['tokens']['csrftoken'] depicted = { 'snaktype': snaktype, 'property_id': 'P180', } if snaktype == 'value': value = json.dumps({'entity-type': 'item', 'id': item_id}) depicted['item_id'] = item_id labels = load_labels([item_id], language_codes) depicted['label'] = labels[item_id] else: value = None if snaktype == 'somevalue': depicted['label'] = messages.somevalue(language_codes[0]) elif snaktype == 'novalue': depicted['label'] = messages.novalue(language_codes[0]) else: raise ValueError('Unknown snaktype') try: response = session.post(action='wbcreateclaim', entity=entity_id, snaktype=snaktype, property='P180', value=value, token=token) except mwapi.errors.APIError as error: return str(error), 500 statement_id = response['claim']['id'] depicted['statement_id'] = statement_id return flask.jsonify(depicted=depicted, depicted_item_link=depicted_item_link(depicted)) @app.route('/api/v2/add_qualifier/', methods=['POST']) def api_add_qualifier(domain): statement_id = flask.request.form.get('statement_id') iiif_region = flask.request.form.get('iiif_region') csrf_token = flask.request.form.get('_csrf_token') qualifier_hash = flask.request.form.get('qualifier_hash') # optional if not statement_id or not iiif_region or not csrf_token: return 'Incomplete form data', 400 if csrf_token != flask.session['_csrf_token']: return 'Wrong CSRF token (try reloading the page).', 403 if not flask.request.referrer.startswith(full_url('index')): return 'Wrong Referer header', 403 if domain not in {'www.wikidata.org', 'commons.wikimedia.org'}: return 'Unsupported domain', 403 session = authenticated_session(domain) if session is None: return 'Not logged in', 403 token = session.get(action='query', meta='tokens', type='csrf')['query']['tokens']['csrftoken'] try: response = session.post(action='wbsetqualifier', claim=statement_id, property='P2677', snaktype='value', value=('"' + iiif_region + '"'), **({'snakhash': qualifier_hash} if qualifier_hash else {}), summary='region drawn manually using [[d:User:Lucas Werkmeister/Wikidata Image Positions|Wikidata Image Positions tool]]', token=token) except mwapi.errors.APIError as error: if error.code == 'no-such-qualifier': return 'This region does not exist (anymore) – it may have been edited in the meantime. Please try reloading the page.', 500 return str(error), 500 # find hash of qualifier for qualifier in response['claim']['qualifiers']['P2677']: if qualifier['snaktype'] == 'value' and qualifier['datavalue']['value'] == iiif_region: return flask.jsonify(qualifier_hash=qualifier['hash']) return flask.jsonify(qualifier_hash=None) # https://iiif.io/api/image/2.0/#region @app.template_filter() def iiif_region_to_style(iiif_region): if iiif_region == 'full': return 'left: 0px; top: 0px; width: 100%; height: 100%;' if iiif_region.startswith('pct:'): left, top, width, height = iiif_region[len('pct:'):].split(',') z_index = int(1_000_000 / (float(width)*float(height))) return 'left: %s%%; top: %s%%; width: %s%%; height: %s%%; z-index: %s;' % (left, top, width, height, z_index) left, top, width, height = iiif_region.split(',') z_index = int(1_000_000_000 / (int(width)*int(height))) return 'left: %spx; top: %spx; width: %spx; height: %spx; z-index: %s;' % (left, top, width, height, z_index) @app.template_filter() def user_link(user_name): return (flask.Markup(r'') + flask.Markup(r'') + flask.Markup.escape(user_name) + flask.Markup(r'') + flask.Markup(r'')) @app.template_global() def item_link(item_id, label): return (flask.Markup(r'') + flask.Markup.escape(label['value']) + flask.Markup(r'')) @app.template_filter() def depicted_item_link(depicted): if 'item_id' in depicted: return item_link(depicted['item_id'], depicted['label']) else: return (flask.Markup(r'') + flask.Markup.escape(depicted['label']['value']) + flask.Markup(r'')) @app.template_global() def authentication_area(): if 'OAUTH' not in app.config: return flask.Markup() session = authenticated_session('www.wikidata.org') if session is None: userinfo = None else: try: userinfo = session.get(action='query', meta='userinfo')['query']['userinfo'] except mwapi.errors.APIError as e: if e.code == 'mwoauth-invalid-authorization': # e. g. consumer version updated, treat as not logged in flask.session.pop('oauth_access_token') userinfo = None else: raise e if userinfo is None: return (flask.Markup(r'Log in')) csrf_token = flask.session.get('_csrf_token') if not csrf_token: csrf_token = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(64)) flask.session['_csrf_token'] = csrf_token return (flask.Markup(r'Logged in as ') + user_link(userinfo['name']) + flask.Markup(r'')) @app.template_global() def user_logged_in(): return 'oauth_access_token' in flask.session @app.errorhandler(WrongDataValueType) def handle_wrong_data_value_type(error): response = flask.render_template('wrong-data-value-type.html', expected_data_value_type=error.expected_data_value_type, actual_data_value_type=error.actual_data_value_type) return response, error.status_code def load_item_and_property(item_id, property_id, include_depicteds=False, include_description=False, include_metadata=False): language_codes = request_language_codes() props = ['claims'] if include_description: props.append('descriptions') session = anonymous_session('www.wikidata.org') api_response = session.get(action='wbgetentities', props=props, ids=item_id, languages=language_codes) item_data = api_response['entities'][item_id] item = { 'entity_id': item_id, } entity_ids = [item_id] if include_description: description = None for language_code in language_codes: if language_code in item_data['descriptions']: description = item_data['descriptions'][language_code] break item['description'] = description image_datavalue = best_value(item_data, property_id) if image_datavalue is not None: if image_datavalue['type'] != 'string': raise WrongDataValueType(expected_data_value_type='string', actual_data_value_type=image_datavalue['type']) image_title = image_datavalue['value'] item['image_title'] = image_title info_params = query_default_params() image_attribution_query_add_params(info_params, image_title, language_codes[0]) image_url_query_add_params(info_params, image_title) info_response = session.get(**info_params) item['image_attribution'] = image_attribution_query_process_response(info_response, image_title, language_codes[0]) item['image_url'] = image_url_query_process_response(info_response, image_title) if include_depicteds: depicteds = depicted_items(item_data) for depicted in depicteds: if 'item_id' in depicted: entity_ids.append(depicted['item_id']) if include_metadata: metadata = entity_metadata(item_data) entity_ids += metadata.keys() labels = load_labels(entity_ids, language_codes) item['label'] = labels[item_id] if include_depicteds: for depicted in depicteds: depicted['label'] = depicted_label(depicted, labels, language_codes) item['depicteds'] = depicteds if include_metadata: item['metadata'] = [] for property_id, values in metadata.items(): for value in values: item['metadata'].append({ 'label': labels[property_id], 'value': value }) return item def load_file(image_title): language_codes = request_language_codes() session = anonymous_session('commons.wikimedia.org') query_params = query_default_params() query_params.setdefault('titles', set()).update(['File:' + image_title]) image_attribution_query_add_params(query_params, image_title, language_codes[0]) image_url_query_add_params(query_params, image_title) query_response = session.get(**query_params) page = query_response_page(query_response, 'File:' + image_title) if page.get('missing', False) or page.get('invalid', False): return None page_id = page['pageid'] entity_id = 'M' + str(page_id) file = { 'entity_id': entity_id, 'image_title': image_title, 'image_attribution': image_attribution_query_process_response(query_response, image_title, language_codes[0]), 'image_url': image_url_query_process_response(query_response, image_title), } entity_ids = [] api_response = session.get(action='wbgetentities', props=['claims'], ids=[entity_id], languages=language_codes) file_data = api_response['entities'][entity_id] depicteds = depicted_items(file_data) for depicted in depicteds: if 'item_id' in depicted: entity_ids.append(depicted['item_id']) labels = load_labels(entity_ids, language_codes) for depicted in depicteds: depicted['label'] = depicted_label(depicted, labels, language_codes) file['depicteds'] = depicteds return file def depicted_label(depicted, labels, language_codes): if 'item_id' in depicted: return labels[depicted['item_id']] elif depicted['snaktype'] == 'somevalue': return messages.somevalue(language_codes[0]) elif depicted['snaktype'] == 'novalue': return messages.novalue(language_codes[0]) else: raise ValueError('depicted has neither item ID nor somevalue/novalue snaktype') def load_image_info(image_title): file_title = 'File:' + image_title.replace(' ', '_') session = anonymous_session('commons.wikimedia.org') response = session.get(action='query', prop='imageinfo', iiprop='url|mime', iiurlwidth=8000, titles=file_title) return response['query']['pages'][0]['imageinfo'][0] def full_url(endpoint, **kwargs): return flask.url_for(endpoint, _external=True, _scheme=flask.request.headers.get('X-Forwarded-Proto', 'http'), **kwargs) def current_url(): return full_url(flask.request.endpoint, **flask.request.view_args) def language_string_wikibase_to_iiif(language_string): if language_string is None: return None return {language_string['language']: language_string['value']} def build_manifest(item): base_url = current_url()[:-len('/manifest.json')] fac = iiif_prezi.factory.ManifestFactory() fac.set_base_prezi_uri(base_url) fac.set_debug('error') iiif_item_label = language_string_wikibase_to_iiif(item['label']) iiif_item_description = language_string_wikibase_to_iiif(item['description']) manifest = fac.manifest(ident='manifest.json') if iiif_item_label is not None: manifest.label = iiif_item_label if iiif_item_description is not None: manifest.description = iiif_item_description attribution = image_attribution(item['image_title'], request_language_codes()[0]) if attribution is not None: manifest.attribution = attribution['attribution_text'] manifest.license = attribution['license_url'] for metadata in item['metadata']: manifest.set_metadata({ 'label': language_string_wikibase_to_iiif(metadata['label']), 'value': metadata['value'], }) sequence = manifest.sequence(ident='normal', label='default order') canvas = sequence.canvas(ident='c0') if iiif_item_label is not None: canvas.label = iiif_item_label annolist = fac.annotationList(ident='annotations', label='Things depicted on this canvas') canvas.add_annotationList(annolist) populate_canvas(canvas, item, fac) return manifest def populate_canvas(canvas, item, fac): image_info = load_image_info(item['image_title']) width, height = image_info['thumbwidth'], image_info['thumbheight'] canvas.set_hw(height, width) anno = canvas.annotation(ident='a0') img = anno.image(ident=image_info['thumburl'], iiif=False) img.set_hw(height, width) img.format = image_info['mime'] # add a thumbnail to the canvas thumbs_path = image_info['thumburl'].replace('/wikipedia/commons/', '/wikipedia/commons/thumb/') thumb_400 = thumbs_path + '/400px-' + item['image_title'] canvas.thumbnail = fac.image(ident=thumb_400) canvas.thumbnail.format = image_info['mime'] thumbwidth, thumbheight = 400, int(height*(400/width)) canvas.thumbnail.set_hw(thumbheight, thumbwidth) def request_language_codes(): """Determine the MediaWiki language codes to use from the request context.""" # this could be made more accurate by using meta=languageinfo to match MediaWiki and BCP 47 language_codes = flask.request.args.getlist('uselang') for accept_language in flask.request.headers.get('Accept-Language', '').split(','): language_code = accept_language.split(';')[0].strip() if language_code == '*' or not language_code: continue language_code = language_code.lower() if '-' in language_code: # these almost never match between MediaWiki and BCP 47: # https://gist.github.com/lucaswerkmeister/3469d5e7edbc59a8d03f347d35eed585 language_codes.append(language_code.split('-')[0]) else: # these often match between MediaWiki and BCP 47, just assume they do language_codes.append(language_code) language_codes.append('en') return language_codes def best_value(entity_data, property_id): if property_id not in entity_data['claims']: return None statements = entity_data['claims'][property_id] normal_value = None deprecated_value = None for statement in statements: if statement['mainsnak']['snaktype'] != 'value': continue datavalue = statement['mainsnak']['datavalue'] if statement['rank'] == 'preferred': return datavalue if statement['rank'] == 'normal': normal_value = datavalue else: deprecated_value = datavalue return normal_value or deprecated_value def best_values(entity_data, property_id): if property_id not in entity_data['claims']: return [] statements = entity_data['claims'][property_id] preferred_values = [] normal_values = [] deprecated_values = [] for statement in statements: if statement['mainsnak']['snaktype'] != 'value': continue datavalue = statement['mainsnak']['datavalue'] if statement['rank'] == 'preferred': preferred_values.append(datavalue) elif statement['rank'] == 'normal': normal_values.append(datavalue) else: deprecated_values.append(datavalue) return preferred_values or normal_values or deprecated_values def depicted_items(entity_data): depicteds = [] statements = entity_data.get('claims', entity_data.get('statements', {})) if statements == []: statements = {} # T222159 - for statement in statements.get('P180', []): - snaktype = statement['mainsnak']['snaktype'] - depicted = { - 'snaktype': snaktype, - 'statement_id': statement['id'], - 'property_id': 'P180', - } - if snaktype == 'value': - depicted['item_id'] = statement['mainsnak']['datavalue']['value']['id'] - - for qualifier in statement.get('qualifiers', {}).get('P2677', []): - if qualifier['snaktype'] != 'value': - continue - depicted['iiif_region'] = qualifier['datavalue']['value'] - depicted['qualifier_hash'] = qualifier['hash'] - break + for property_id in depicted_properties: + for statement in statements.get(property_id, []): + snaktype = statement['mainsnak']['snaktype'] + depicted = { + 'snaktype': snaktype, + 'statement_id': statement['id'], + 'property_id': property_id, + } + if snaktype == 'value': + depicted['item_id'] = statement['mainsnak']['datavalue']['value']['id'] + + for qualifier in statement.get('qualifiers', {}).get('P2677', []): + if qualifier['snaktype'] != 'value': + continue + depicted['iiif_region'] = qualifier['datavalue']['value'] + depicted['qualifier_hash'] = qualifier['hash'] + break - depicteds.append(depicted) + depicteds.append(depicted) return depicteds def entity_metadata(entity_data): # property IDs based on https://www.wikidata.org/wiki/Wikidata:WikiProject_Visual_arts/Item_structure#Describing_individual_objects property_ids = [ 'P170', # creator 'P1476', # title 'P571', # inception 'P186', # material used 'P2079', # fabrication method 'P2048', # height 'P2049', # width 'P2610', # thickness 'P88', # commissioned by 'P1071', # location of final assembly 'P127', # owned by 'P1259', # coordinates of the point of view 'P195', # collection 'P276', # location 'P635', # coordinate location 'P1684', # inscription 'P136', # genre 'P135', # movement 'P921', # main subject 'P144', # based on 'P941', # inspired by ] metadata = collections.defaultdict(list) session = anonymous_session('www.wikidata.org') for property_id in property_ids: for value in best_values(entity_data, property_id): response = session.get(action='wbformatvalue', generate='text/html', datavalue=json.dumps(value), property=property_id) metadata[property_id].append(response['result']) return metadata def load_labels(entity_ids, language_codes): entity_ids = list(set(entity_ids)) labels = {} session = anonymous_session('www.wikidata.org') for chunk in [entity_ids[i:i+50] for i in range(0, len(entity_ids), 50)]: items_data = session.get(action='wbgetentities', props='labels', languages=language_codes, ids=chunk)['entities'] for entity_id, item_data in items_data.items(): labels[entity_id] = {'language': 'zxx', 'value': entity_id} for language_code in language_codes: if language_code in item_data['labels']: labels[entity_id] = item_data['labels'][language_code] break return labels def image_attribution(image_title, language_code): params = query_default_params() image_attribution_query_add_params(params, image_title, language_code) session = anonymous_session('commons.wikimedia.org') response = session.get(**params) return image_attribution_query_process_response(response, image_title, language_code) def image_attribution_query_add_params(params, image_title, language_code): params.setdefault('prop', set()).update(['imageinfo']) params.setdefault('iiprop', set()).update(['extmetadata']) params['iiextmetadatalanguage'] = language_code params.setdefault('titles', set()).update(['File:' + image_title]) def image_attribution_query_process_response(response, image_title, language_code): page = query_response_page(response, 'File:' + image_title) imageinfo = page['imageinfo'][0] metadata = imageinfo['extmetadata'] no_value = {'value': None} attribution_required = metadata.get('AttributionRequired', no_value)['value'] if attribution_required != 'true': return None attribution = flask.Markup() artist = metadata.get('Artist', no_value)['value'] if artist: attribution += flask.Markup(r', ') + flask.Markup(artist) license_short_name = metadata.get('LicenseShortName', no_value)['value'] license_url = metadata.get('LicenseUrl', no_value)['value'] if license_short_name and license_url: attribution += (flask.Markup(r', ') + flask.Markup.escape(license_short_name) + flask.Markup(r'')) credit = metadata.get('Credit', no_value)['value'] if credit: attribution += flask.Markup(r' (') + flask.Markup(credit) + flask.Markup(r')') attribution = attribution[len(', '):] return { 'license_url': license_url, 'attribution_text': attribution.striptags(), 'attribution_html': attribution, } def image_url(image_title): params = query_default_params() image_url_query_add_params(params, image_title) session = anonymous_session('commons.wikimedia.org') response = session.get(**params) return image_url_query_process_response(response, image_title) def image_url_query_add_params(params, image_title): params.setdefault('prop', set()).update(['imageinfo']) params.setdefault('iiprop', set()).update(['url']) params.setdefault('titles', set()).update(['File:' + image_title]) def image_url_query_process_response(response, image_title): page = query_response_page(response, 'File:' + image_title) imageinfo = page['imageinfo'][0] url = imageinfo['url'] return url def query_default_params(): return {'action': 'query', 'formatversion': 2} def query_response_page(response, title): """Get the page corresponding to a title from a query response.""" for normalized in response['query'].get('normalized', []): if normalized['from'] == title: title = normalized['to'] break pages = response['query']['pages'] return next(page for page in pages if page['title'] == title) @app.after_request def denyFrame(response): """Disallow embedding the tool’s pages in other websites. If other websites can embed this tool’s pages, e. g. in