Page MenuHomePhabricator

0001-SECURITY-XSS-and-cache-pollution-issues-in-DoubleWik.patch

Authored By
Bawolff
Jun 1 2016, 10:09 PM
Size
13 KB
Referenced Files
None
Subscribers
None

0001-SECURITY-XSS-and-cache-pollution-issues-in-DoubleWik.patch

From 0c978485b389f649164a046c115801dc236e516b Mon Sep 17 00:00:00 2001
From: Brian Wolff <bawolff+wn@gmail.com>
Date: Tue, 31 May 2016 23:36:42 -0400
Subject: [PATCH] SECURITY: XSS and cache pollution issues in DoubleWiki
This fixes the following issues:
* In alignment <pre> tags, if the search text had a " in it, you
could break out of an attribute and add an event handler (XSS)
* If any HTML attributes can contain </p> or </dl> tags, you can
cause the table row to be broken in the middle of the html
attribute, leading to XSS. (Most of modern MW does not allow this)
* If the page contains a parser function or similar tag with private
information on it, which calls $parser->disableCache() in order to
keep the info per user, the way this extension does caching could
allow a malicious user to find this private information. This is
exasperated by the fact that the extension can perform DoubleWiki
matches from edit pages, allowing a different origin site to load
a page in the background which puts the secret info in the cache
with no user interaction beyond browsing to the malicious site
(In particular, this can leak rollback tokens on some versions
of MediaWiki, and it can leak suppressed usernames via
{{special:Listusers}}.
I also modified some code to be more paranoid about escaping
(for example, escaping language names, even though they should not
be under user control).
Bug: T131199
---
DoubleWiki_body.php | 180 ++++++++++++++++++++++++++++++++++++++++++++++++++--
i18n/en.json | 7 +-
i18n/qqq.json | 5 +-
3 files changed, 182 insertions(+), 10 deletions(-)
diff --git a/DoubleWiki_body.php b/DoubleWiki_body.php
index 77ab2fc..1f98dac 100644
--- a/DoubleWiki_body.php
+++ b/DoubleWiki_body.php
@@ -15,16 +15,39 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# http://www.gnu.org/copyleft/gpl.html
-class DoubleWiki {
+use MediaWiki\Logger\LoggerFactory;
+use Psr\Log\LoggerAwareInterface;
+use Psr\Log\LoggerInterface;
+
+class DoubleWiki implements LoggerAwareInterface {
+
/**
* Tags that must be closed. (list copied from Sanitizer.php)
*/
public $tags = '/<\/?(b|del|i|ins|u|font|big|small|sub|sup|h1|h2|h3|h4|h5|h6|cite|code|em|s|strike|strong|tt|tr|td|var|div|center|blockquote|ol|ul|dl|table|caption|pre|ruby|rt|rb|rp|p|span)([\s](.*?)>|>)/i';
/**
+ * Determine if the page has html type things which make it
+ * potentially unsafe to do these types of regex replacements on.
+ * Most of MW does not output html that will match this.
+ */
+ const UNSAFE_ATTRIBS = '/<[^><]*</';
+
+ protected $logger;
+
+ public function setLogger( LoggerInterface $logger ) {
+ $this->logger = $logger;
+ }
+
+ public function __construct() {
+ $this->setLogger( LoggerFactory::getInstance( 'doublewiki' ) );
+ }
+
+ /**
* Read the list of matched phrases and add tags to the html output.
*/
function addMatchingTags ( &$text, $lang ) {
+ $lang = htmlspecialchars( $lang );
$pattern = "/<div id=\"align-" . preg_quote( $lang, '/' ) . "\" style=\"display:none;\">\n*<pre>(.*?)<\/pre>\n*<\/div>/is";
$m = array();
if ( ! preg_match( $pattern, $text, $m ) ) {
@@ -35,7 +58,26 @@ class DoubleWiki {
$items = array();
preg_match_all( $line_pattern, $m[1], $items, PREG_SET_ORDER );
foreach ( $items as $n => $i ) {
- $text = str_replace( $i[1], "<span id=\"dw-" . preg_quote( $n, '/' ) . "\" title=\"{$i[2]}\"/>" . $i[1], $text );
+ if ( preg_match( '/^[^<>]*(?:<[^<>]*>[^<>]*)*$/', $i[1] ) !== 1 ) {
+ // unclosed html, skip.
+ $this->logger->warning( "Potentially unsafe replacement text '{$text}' skipped",
+ [
+ "text" => $i[1],
+ "replacement-title" => $i[2],
+ "method" => __METHOD__,
+ "target-lang" => $lang
+ ]
+ );
+ continue;
+ }
+ // If this replaces inside an html tag, the check in
+ // addMatchedText() for self::UNSAFE_ATTRIBS should catch it.
+ $id = "dw-" . htmlspecialchars( $n );
+ // It is particularly important that " is escaped, otherwise this
+ // won't match the regex later on, and won't be removed from the page
+ // source.
+ $title = Sanitizer::escapeHtmlAllowEntities( $i[2] );
+ $text = str_replace( $i[1], "<span id=\"$id\" title=\"$title\"/>" . $i[1], $text );
}
}
@@ -46,6 +88,62 @@ class DoubleWiki {
}
/**
+ * Verify that html meets certain assumptions
+ *
+ * Includes that translation only contains a text body
+ * and that both of them do not contain < or > in attributes.
+ *
+ * @param $text String HTML contents of text in this language
+ * @param $translation String HTML contents of translation
+ * @param $curTitle String Prefixed db key for current page
+ * @param $translationTitle Prefixed db key including interwiki for translation
+ * @throws FatalError If unsafe input detected.
+ */
+ private function verifyTextIsSane( $text, $translation, $curTitle, $translationTitle ) {
+ if ( preg_match( '/<(?:html|!doctype|head)/i', $translation ) !== 0 ) {
+ // This can happen for example, if you have an interwiki to a special
+ // page. This is probably not actually unsafe, but would give garbled
+ // output, and is almost certainly not intentional. Abort as precaution.
+ $this->logger->info( "DoubleWiki aborted due to full html in {page}",
+ [
+ "method" => __METHOD__,
+ "page" => $translationTitle,
+ "frompage" => $curTitle
+ ]
+ );
+ $msg = wfMessage( 'doublewiki-translationfullhtml' )
+ ->params( $translationTitle )
+ ->parse();
+ throw new FatalError( $msg );
+ }
+
+ $unsafePage = false;
+ if ( preg_match( self::UNSAFE_ATTRIBS, $text ) !== 0 ) {
+ $unsafePage = $curTitle;
+ } elseif ( preg_match( self::UNSAFE_ATTRIBS, $translation ) !== 0 ) {
+ $unsafePage = $translationTitle;
+ }
+
+ if ( $unsafePage !== false ) {
+ // It is assumed that this will basically
+ // never happen in a non-malicious situation
+ $this->logger->warning(
+ "Aborting DoubleWiki due to unsafe HTML on {unsafe} during match between {left} and {right}",
+ [
+ "unsafe" => $unsafePage,
+ "left" => $curTitle,
+ "right" => $translationTitle,
+ "method" => __METHOD__
+ ]
+ );
+ $msg = wfMessage( 'doublewiki-unsafeattribute' )
+ ->params( $unsafePage )
+ ->parse();
+ throw new FatalError( $msg );
+ }
+ }
+
+ /**
* Hook function called with &match=lang
* Transform $text into a bilingual version
* @param $out OutputPage
@@ -55,11 +153,27 @@ class DoubleWiki {
function addMatchedText( &$out, &$text ) {
global $wgContLang, $wgContLanguageCode, $wgMemc, $wgDoubleWikiCacheTime;
- $match_request = $out->getRequest()->getText( 'match' );
- if ( $match_request === '' ) {
+ $req = $out->getRequest();
+ $action = $req->getVal( 'action', 'view' );
+ if ( $action !== 'view' && $action !== 'render' ) {
+ // Sounds fishy. Bail
+ return true;
+ }
+
+ if ( $out->getTitle()->inNamespace( NS_SPECIAL ) ) {
+ // Sounds fishy, bail.
+ // Also probably wouldn't work anyhow
+ return true;
+ }
+
+ $match_request = $req->getText( 'match' );
+ if ( $match_request === ''
+ // This is paranoia, since checking it matches an interlanguage
+ // link should eliminate evil stuff, but paranoia never hurt.
+ || htmlspecialchars( $match_request ) !== $match_request
+ ) {
return true;
}
- $this->addMatchingTags ( $text, $match_request );
$langLinks = $out->getLanguageLinks();
foreach( $langLinks as $l ) {
@@ -78,16 +192,64 @@ class DoubleWiki {
$languageName = Language::fetchLanguageName( $iw );
$myLanguage = Language::fetchLanguageName( $wgContLang->getCode() );
$translation = Http::get( wfAppendQuery( $url, array( 'action' => 'render' ) ) );
+ // This is hacky. Giving null as the argument means return the
+ // current value without setting (See docs wfSetVar()).
+ // enableClientCache() will be set to false if the page contains
+ // something that is very per-user, like {{Special:RecentChanges}}.
+ // In which case we don't want to use the current user's version. (Could contain tokens)
+ // Maybe we should do this in all cases? Maybe we should use
+ // ParserCache::singleton()->get() w/ canonical parser options instead?
+ $po = null;
+ if ( $out->canUseWikiPage() ) {
+ $po = $out->getWikiPage()->makeParserOptions( $out );
+ $canonicalPo = $out->getWikiPage()->makeParserOptions( 'canonical' );
+ // caching already varries by user lang.
+ $canonicalPo->setUserLang( $out->getLanguage() );
+ }
+ if ( $out->enableClientCache( null ) === false
+ || !$po
+ || !$po->matches( $canonicalPo )
+ ) {
+ $curPageUrl = $out->getTitle()->getFullUrl(
+ [
+ 'action' => 'render',
+ 'uselang' => $out->getLanguage()->getCode()
+ ],
+ false,
+ PROTO_CANONICAL
+ );
+ $res = Http::get( $curPageUrl );
+ if ( $res === null ) {
+ // Wiki can't reach itself??
+ $this->logger->error( "Could not fetch {curPageUrl}",
+ [ 'method' => __METHOD__, 'curPageUrl' => $curPageUrl ]
+ );
+ $msg = wfMessage( 'doublewiki-cannotfetchself' )->parse();
+ throw new FatalError( $msg );
+ }
+ $text = $res;
+ }
if ( $translation !== null ) {
+ $this->addMatchingTags( $text, $match_request );
+
+ // Important that this be done after
+ // addMatchingTags()
+ $this->verifyTextIsSane(
+ $text,
+ $translation,
+ $out->getTitle()->getPrefixedDbKey(),
+ $nt->getPrefixedDbKey()
+ );
/**
* first find all links that have no 'class' parameter.
* these links are local so we add '?match=xx' to their url,
* unless it already contains a '?'
*/
+ $langCode = htmlspecialchars( $wgContLanguageCode );
$translation = preg_replace(
"/<a href=\"http:\/\/([^\"\?]*)\"(([\s]+)(c(?!lass=)|[^c\>\s])([^\>\s]*))*\>/i",
- "<a href=\"http://\\1?match={$wgContLanguageCode}\"\\2>", $translation );
+ "<a href=\"http://\\1?match={$langCode}\"\\2>", $translation );
// now add class='extiw' to these links
$translation = preg_replace(
"/<a href=\"http:\/\/([^\"]*)\"(([\s]+)(c(?!lass=)|[^c\>\s])([^\>\s]*))*\>/i",
@@ -113,7 +275,6 @@ class DoubleWiki {
$match_request_lang = wfGetLangObj( $match_request );
$text = $this->matchColumns( $text, $myLanguage, $myURL, $wgContLang,
$translation, $languageName, $url, $match_request_lang );
-
$wgMemc->set( $key, $text, $wgDoubleWikiCacheTime );
}
}
@@ -164,6 +325,9 @@ class DoubleWiki {
$found = true;
} else {
// look for requested tag in the text
+ // This is safe, since we only select up to next
+ // </p> or </dl> and the check in addMatchedText
+ // fatals if there is attributes with html tags inside.
$a = strpos ( $right_text, $tag );
if ( $a ) {
$found = true;
@@ -212,6 +376,8 @@ class DoubleWiki {
// format table head and return results
$left_url = htmlspecialchars( $left_url );
$right_url = htmlspecialchars( $right_url );
+ $left_title = htmlspecialchars( $left_title );
+ $right_title = htmlspecialchars( $right_title );
$head =
"<table id=\"doubleWikiTable\" width=\"100%\" border=\"0\" bgcolor=\"white\" rules=\"cols\" cellpadding=\"0\">
<colgroup><col width=\"50%\"/><col width=\"50%\"/></colgroup><thead>
diff --git a/i18n/en.json b/i18n/en.json
index 0acf273..64d6d3a 100644
--- a/i18n/en.json
+++ b/i18n/en.json
@@ -2,5 +2,8 @@
"@metadata": {
"authors": []
},
- "doublewiki-desc": "Displays a page and its translation from another wiki on two columns of the same page"
-}
\ No newline at end of file
+ "doublewiki-desc": "Displays a page and its translation from another wiki on two columns of the same page",
+ "doublewiki-unsafeattribute": "Cannot show DoubleWiki match for this page due to an HTML attribute containing an unescaped &lt; on [[:$1]]",
+ "doublewiki-translationfullhtml": "Cannot show DoubleWiki, since the page [[:$1]] appears to be a full HTML document and not a wiki page",
+ "doublewiki-cannotfetchself": "Cannot show DoubleWiki since the current page cannot be fetched. Check that the wiki is network accessible to itself."
+}
diff --git a/i18n/qqq.json b/i18n/qqq.json
index dcb8abf..b466d6b 100644
--- a/i18n/qqq.json
+++ b/i18n/qqq.json
@@ -4,5 +4,8 @@
"Raymond"
]
},
- "doublewiki-desc": "Extension description displayed on [[Special:Version]]"
+ "doublewiki-desc": "Extension description displayed on [[Special:Version]]",
+ "doublewiki-unsafeattribute": "Shown if DoubleWiki is disabled due to suspicious html in either the current page or the page being compared against. $1 - Page name with the unsafe html, possibly including an interlanguage prefix.",
+ "doublewiki-translationfullhtml": "Shown if the page being compared does not appear to be a wikipage. For example if the page being compared against is a special page, this error will be shown. $1 Name of page being compared, including interwiki prefix (e.g. 'de:Special:BlankPage')",
+ "doublewiki-cannotfetchself": "{{optional}} Shown if the current page is inaccessible over HTTP. Since it is very unlikely for this error to happen, it is probably optional to translate it."
}
--
2.0.1

File Metadata

Mime Type
text/x-diff
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3770204
Default Alt Text
0001-SECURITY-XSS-and-cache-pollution-issues-in-DoubleWik.patch (13 KB)

Event Timeline