diff --git a/parse_wikitext.py b/parse_wikitext.py
index b1d84a9..4cfec02 100644
--- a/parse_wikitext.py
+++ b/parse_wikitext.py
@@ -1,32 +1,37 @@
import bs4 # type: ignore
import cachetools
import flask
import mwapi # type: ignore
import threading
from typing import Tuple
summary_cache = cachetools.LRUCache(maxsize=1024) # type: cachetools.LRUCache[Tuple[str, str], flask.Markup]
summary_cache_lock = threading.RLock()
@cachetools.cached(cache=summary_cache,
key=lambda session, summary: (session.host, summary),
lock=summary_cache_lock)
def parse_summary(session: mwapi.Session, summary: str) -> flask.Markup:
"""Parses a summary text or fragment into HTML."""
- response = session.get(action='parse',
- summary=summary,
- prop=[],
- formatversion=2)
- summary_html = response['parse']['parsedsummary']
- return fix_markup(summary_html, session.host)
+ try:
+ response = session.get(action='parse',
+ summary=summary,
+ prop=[],
+ formatversion=2)
+ except mwapi.errors.APIError as e:
+ print("Error formatting summary {!r}: {}".format(summary, e))
+ return flask.Markup.escape(summary)
+ else:
+ summary_html = response['parse']['parsedsummary']
+ return fix_markup(summary_html, session.host)
def fix_markup(html: str, host: str) -> flask.Markup:
soup = bs4.BeautifulSoup(html, 'html.parser')
for link in soup.select('a[href]'):
href = link['href']
if href.startswith('/') and not href.startswith('//'):
link['href'] = host + href
return flask.Markup(str(soup))
diff --git a/test_parse_wikitext.py b/test_parse_wikitext.py
index 8bd287a..86d1de7 100644
--- a/test_parse_wikitext.py
+++ b/test_parse_wikitext.py
@@ -1,25 +1,33 @@
import flask
+import mwapi # type: ignore
import parse_wikitext
from test_utils import FakeSession
def test_parse_summary_two_wikis() -> None:
title = '[[Kategorie:Wikimedia]]'
summary1 = 'Kategorie:Wikimedia'
session1 = FakeSession({
'parse': {
'parsedsummary': summary1,
},
})
session1.host = 'https://en.wikipedia.org'
assert parse_wikitext.parse_summary(session1, title) == flask.Markup(summary1)
summary2 = 'Kategorie:Wikimedia'
session2 = FakeSession({
'parse': {
'parsedsummary': summary2,
},
})
session2.host = 'https://de.wikipedia.org'
assert parse_wikitext.parse_summary(session2, title) == flask.Markup(summary2)
+
+
+def test_parse_summary_error() -> None:
+ summary = ''
+ session = FakeSession(mwapi.errors.APIError('fake', 'XSS detected!', 'for more information see the mailing list blah blah'))
+ session.host = 'https://en.wikipedia.org'
+ assert parse_wikitext.parse_summary(session, summary) == flask.Markup('<script>alert("xss")</script>')
diff --git a/test_utils.py b/test_utils.py
index 6da4821..dcbc9fc 100644
--- a/test_utils.py
+++ b/test_utils.py
@@ -1,28 +1,31 @@
import requests
import requests_oauthlib # type: ignore
from typing import Any, Optional, Union
class FakeSession:
host: Optional[str]
- def __init__(self, get_response: dict, post_response: Optional[Union[dict, BaseException]] = None) -> None:
+ def __init__(self, get_response: Union[dict, BaseException], post_response: Optional[Union[dict, BaseException]] = None) -> None:
self.get_response = get_response
self.post_response = post_response
self.host = None
self.session = requests.Session()
self.session.auth = requests_oauthlib.OAuth1(client_key='fake client key', client_secret='fake client secret',
resource_owner_key='fake resource owner key', resource_owner_secret='fake resource owner secret')
def get(self, *args: Any, **kwargs: Any) -> dict:
- return self.get_response
+ if isinstance(self.get_response, BaseException):
+ raise self.get_response
+ else:
+ return self.get_response
def post(self, *args: Any, **kwargs: Any) -> dict:
if self.post_response:
if isinstance(self.post_response, BaseException):
raise self.post_response
else:
return self.post_response
else:
raise NotImplementedError