diff --git a/parse_wikitext.py b/parse_wikitext.py index b1d84a9..4cfec02 100644 --- a/parse_wikitext.py +++ b/parse_wikitext.py @@ -1,32 +1,37 @@ import bs4 # type: ignore import cachetools import flask import mwapi # type: ignore import threading from typing import Tuple summary_cache = cachetools.LRUCache(maxsize=1024) # type: cachetools.LRUCache[Tuple[str, str], flask.Markup] summary_cache_lock = threading.RLock() @cachetools.cached(cache=summary_cache, key=lambda session, summary: (session.host, summary), lock=summary_cache_lock) def parse_summary(session: mwapi.Session, summary: str) -> flask.Markup: """Parses a summary text or fragment into HTML.""" - response = session.get(action='parse', - summary=summary, - prop=[], - formatversion=2) - summary_html = response['parse']['parsedsummary'] - return fix_markup(summary_html, session.host) + try: + response = session.get(action='parse', + summary=summary, + prop=[], + formatversion=2) + except mwapi.errors.APIError as e: + print("Error formatting summary {!r}: {}".format(summary, e)) + return flask.Markup.escape(summary) + else: + summary_html = response['parse']['parsedsummary'] + return fix_markup(summary_html, session.host) def fix_markup(html: str, host: str) -> flask.Markup: soup = bs4.BeautifulSoup(html, 'html.parser') for link in soup.select('a[href]'): href = link['href'] if href.startswith('/') and not href.startswith('//'): link['href'] = host + href return flask.Markup(str(soup)) diff --git a/test_parse_wikitext.py b/test_parse_wikitext.py index 8bd287a..86d1de7 100644 --- a/test_parse_wikitext.py +++ b/test_parse_wikitext.py @@ -1,25 +1,33 @@ import flask +import mwapi # type: ignore import parse_wikitext from test_utils import FakeSession def test_parse_summary_two_wikis() -> None: title = '[[Kategorie:Wikimedia]]' summary1 = 'Kategorie:Wikimedia' session1 = FakeSession({ 'parse': { 'parsedsummary': summary1, }, }) session1.host = 'https://en.wikipedia.org' assert parse_wikitext.parse_summary(session1, title) == flask.Markup(summary1) summary2 = 'Kategorie:Wikimedia' session2 = FakeSession({ 'parse': { 'parsedsummary': summary2, }, }) session2.host = 'https://de.wikipedia.org' assert parse_wikitext.parse_summary(session2, title) == flask.Markup(summary2) + + +def test_parse_summary_error() -> None: + summary = '' + session = FakeSession(mwapi.errors.APIError('fake', 'XSS detected!', 'for more information see the mailing list blah blah')) + session.host = 'https://en.wikipedia.org' + assert parse_wikitext.parse_summary(session, summary) == flask.Markup('<script>alert("xss")</script>') diff --git a/test_utils.py b/test_utils.py index 6da4821..dcbc9fc 100644 --- a/test_utils.py +++ b/test_utils.py @@ -1,28 +1,31 @@ import requests import requests_oauthlib # type: ignore from typing import Any, Optional, Union class FakeSession: host: Optional[str] - def __init__(self, get_response: dict, post_response: Optional[Union[dict, BaseException]] = None) -> None: + def __init__(self, get_response: Union[dict, BaseException], post_response: Optional[Union[dict, BaseException]] = None) -> None: self.get_response = get_response self.post_response = post_response self.host = None self.session = requests.Session() self.session.auth = requests_oauthlib.OAuth1(client_key='fake client key', client_secret='fake client secret', resource_owner_key='fake resource owner key', resource_owner_secret='fake resource owner secret') def get(self, *args: Any, **kwargs: Any) -> dict: - return self.get_response + if isinstance(self.get_response, BaseException): + raise self.get_response + else: + return self.get_response def post(self, *args: Any, **kwargs: Any) -> dict: if self.post_response: if isinstance(self.post_response, BaseException): raise self.post_response else: return self.post_response else: raise NotImplementedError