Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F40500
T88310e.patch
csteipp (Chris Steipp)
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Authored By
•
csteipp
Feb 12 2015, 10:23 PM
2015-02-12 22:23:14 (UTC+0)
Size
17 KB
Referenced Files
None
Subscribers
None
T88310e.patch
View Options
From e9acb0351fc0d4b4906f31eee95f751f5cba5f79 Mon Sep 17 00:00:00 2001
From: csteipp <csteipp@wikimedia.org>
Date: Tue, 3 Feb 2015 17:45:05 -0800
Subject: [PATCH] SECURITY: Always expand xml entities when checking SVG's
Add an XmlReaderTypeCheck that is based on php's XMLReader instead of
xml_parse, and use it where XmlTypeCheck was being used for filtering
SVG's.
XmlTypeCheck's use of xml_parse for filtering SVG's sometimes leaves
xml entities unexpanded, which can lead to false-negatives.
Bug: T88310
Change-Id: I77c77a2d6d22f549e7ef969811f7edd77a45dbba
---
autoload.php | 1 +
includes/libs/XmlReaderTypeCheck.php | 337 +++++++++++++++++++++
includes/upload/UploadBase.php | 2 +-
.../includes/libs/XmlReaderTypeCheckTest.php | 49 +++
tests/phpunit/includes/libs/XmlTypeCheckTest.php | 19 ++
tests/phpunit/includes/upload/UploadBaseTest.php | 15 +-
6 files changed, 420 insertions(+), 3 deletions(-)
create mode 100644 includes/libs/XmlReaderTypeCheck.php
create mode 100644 tests/phpunit/includes/libs/XmlReaderTypeCheckTest.php
diff --git a/autoload.php b/autoload.php
index 11b5266..5facd2c 100644
--- a/autoload.php
+++ b/autoload.php
@@ -1334,6 +1334,7 @@ $wgAutoloadLocalClasses = array(
'XmlJsCode' => __DIR__ . '/includes/Xml.php',
'XmlSelect' => __DIR__ . '/includes/Xml.php',
'XmlTypeCheck' => __DIR__ . '/includes/libs/XmlTypeCheck.php',
+ 'XmlReaderTypeCheck' => __DIR__ . '/includes/libs/XmlReaderTypeCheck.php',
'ZhConverter' => __DIR__ . '/languages/classes/LanguageZh.php',
'ZipDirectoryReader' => __DIR__ . '/includes/utils/ZipDirectoryReader.php',
'ZipDirectoryReaderError' => __DIR__ . '/includes/utils/ZipDirectoryReader.php',
diff --git a/includes/libs/XmlReaderTypeCheck.php b/includes/libs/XmlReaderTypeCheck.php
new file mode 100644
index 0000000..c66aff5
--- /dev/null
+++ b/includes/libs/XmlReaderTypeCheck.php
@@ -0,0 +1,337 @@
+<?php
+/**
+ * XML syntax and type checker using XMLReader. This should function
+ * identically to XmlTypeCheck, except it uses XMLReader instead of
+ * xml_parse, which gives us more control over the expansion of XML
+ * entities.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+class XmlReaderTypeCheck {
+ /**
+ * Will be set to true or false to indicate whether the file is
+ * well-formed XML. Note that this doesn't check schema validity.
+ */
+ public $wellFormed = null;
+
+ /**
+ * Will be set to true if the optional element filter returned
+ * a match at some point.
+ */
+ public $filterMatch = false;
+
+ /**
+ * Name of the document's root element, including any namespace
+ * as an expanded URL.
+ */
+ public $rootElement = '';
+
+ /**
+ * A stack of strings containing the data of each xml element as it's processed. Append
+ * data to the top string of the stack, then pop off the string and process it when the
+ * element is closed.
+ */
+ protected $elementData = array();
+
+ /**
+ * A stack of element names and attributes, as we process them.
+ */
+ protected $elementDataContext = array();
+
+ /**
+ * Current depth of the data stack.
+ */
+ protected $stackDepth = 0;
+
+ /**
+ * Additional parsing options
+ */
+ private $parserOptions = array(
+ 'processing_instruction_handler' => '',
+ );
+
+ /**
+ * @param string $input a filename or string containing the XML element
+ * @param callable $filterCallback (optional)
+ * Function to call to do additional custom validity checks from the
+ * SAX element handler event. This gives you access to the element
+ * namespace, name, attributes, and text contents.
+ * Filter should return 'true' to toggle on $this->filterMatch
+ * @param bool $isFile (optional) indicates if the first parameter is a
+ * filename (default, true) or if it is a string (false)
+ * @param array $options list of additional parsing options:
+ * processing_instruction_handler: Callback for xml_set_processing_instruction_handler
+ */
+ function __construct( $input, $filterCallback = null, $isFile = true, $options = array() ) {
+ $this->filterCallback = $filterCallback;
+ $this->parserOptions = array_merge( $this->parserOptions, $options );
+ $this->validateFromInput( $input, $isFile );
+ }
+
+ /**
+ * Alternative constructor: from filename
+ *
+ * @param string $fname the filename of an XML document
+ * @param callable $filterCallback (optional)
+ * Function to call to do additional custom validity checks from the
+ * SAX element handler event. This gives you access to the element
+ * namespace, name, and attributes, but not to text contents.
+ * Filter should return 'true' to toggle on $this->filterMatch
+ * @return XmlReaderTypeCheck
+ */
+ public static function newFromFilename( $fname, $filterCallback = null ) {
+ return new self( $fname, $filterCallback, true );
+ }
+
+ /**
+ * Alternative constructor: from string
+ *
+ * @param string $string a string containing an XML element
+ * @param callable $filterCallback (optional)
+ * Function to call to do additional custom validity checks from the
+ * SAX element handler event. This gives you access to the element
+ * namespace, name, and attributes, but not to text contents.
+ * Filter should return 'true' to toggle on $this->filterMatch
+ * @return XmlReaderTypeCheck
+ */
+ public static function newFromString( $string, $filterCallback = null ) {
+ return new self( $string, $filterCallback, false );
+ }
+
+ /**
+ * Get the root element. Simple accessor to $rootElement
+ *
+ * @return string
+ */
+ public function getRootElement() {
+ return $this->rootElement;
+ }
+
+
+ /**
+ * @param string $fname the filename
+ */
+ private function validateFromInput( $xml, $isFile ) {
+ $reader = new XMLReader();
+ if ( $isFile ) {
+ $s = $reader->open( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
+ } else {
+ $s = $reader->XML( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
+ }
+ if ( $s !== true ) {
+ // Couldn't open the XML
+ wfDebugLog( "XRTC", __METHOD__ . ": malformed, source won't open" );
+ $this->wellFormed = false;
+ } else {
+ $oldDisable = libxml_disable_entity_loader( true );
+ $reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
+ try {
+ $this->validate( $reader );
+ } catch ( Exception $e ) {
+ // Calling this malformed, because we didn't parse the whole
+ // thing. Maybe just an external entity refernce.
+ wfDebugLog( "XRTC", __METHOD__ . ": malformed, exception during parse" );
+ $this->wellFormed = false;
+ $reader->close();
+ libxml_disable_entity_loader( $oldDisable );
+ throw $e;
+ }
+ $reader->close();
+ libxml_disable_entity_loader( $oldDisable );
+ }
+ }
+
+ private function readNext( XMLReader $reader ) {
+ set_error_handler ( array( $this, 'XmlErrorHandler' ) );
+ $ret = $reader->read();
+ restore_error_handler();
+ return $ret;
+ }
+
+ public function XmlErrorHandler( $errno, $errstr ) {
+ $this->wellFormed = false;
+ }
+
+ private function validate( $reader ) {
+
+ // First, move through anything that isn't an element, and
+ // handle any processing instructions with the callback
+ do {
+ if( !$this->readNext( $reader ) ) {
+ // Hit the end of the document before any elements
+ wfDebugLog( "XRTC", __METHOD__ . ": malformed, no elements" );
+ $this->wellFormed = false;
+ return;
+ }
+ if ( $reader->nodeType === XMLReader::PI ) {
+ $this->processingInstructionHandler( $reader->name, $reader->value );
+ }
+ } while ( $reader->nodeType != XMLReader::ELEMENT );
+
+ // Process the rest of the document
+ do {
+ switch ( $reader->nodeType ) {
+ case XMLReader::ELEMENT:
+ $name = $this->expandNS(
+ $reader->name,
+ $reader->namespaceURI
+ );
+ if ( $this->rootElement === '' ) {
+ $this->rootElement = $name;
+ }
+ $empty = $reader->isEmptyElement;
+ $attrs = $this->getAttributesArray( $reader );
+ $this->elementOpen( $name, $attrs );
+ if ( $empty ) {
+ $this->elementClose();
+ }
+ break;
+
+ case XMLReader::END_ELEMENT:
+ $this->elementClose();
+ break;
+
+ case XMLReader::WHITESPACE:
+ case XMLReader::SIGNIFICANT_WHITESPACE:
+ case XMLReader::CDATA:
+ case XMLReader::TEXT:
+ $this->elementData( $reader->value );
+ break;
+
+ case XMLReader::ENTITY_REF:
+ // Unexpanded entity (maybe external?),
+ // don't send to the filter (xml_parse didn't)
+ break;
+
+ case XMLReader::COMMENT:
+ // Don't send to the filter (xml_parse didn't)
+ break;
+
+ case XMLReader::PI:
+ // Processing instructions can happen after the header too
+ $this->processingInstructionHandler(
+ $reader->name,
+ $reader->value
+ );
+ break;
+ default:
+ // One of DOC, DOC_TYPE, ENTITY, END_ENTITY,
+ // NOTATION, or XML_DECLARATION
+ // xml_parse didn't send these to the filter, so we won't.
+ }
+
+ } while ( $this->readNext( $reader ) );
+
+ if ( $this->stackDepth !== 0 ) {
+ wfDebugLog( "XRTC", __METHOD__ . ": malformed, non-zero stack at end" );
+ $this->wellFormed = false;
+ } elseif ( $this->wellFormed === null ) {
+ $this->wellFormed = true;
+ }
+
+ }
+
+ /**
+ * Get all of the attributes for an XMLReader's current node
+ * @param $r XMLReader
+ * @return array of attributes
+ */
+ private function getAttributesArray( XMLReader $r ) {
+ $attrs = array();
+ while ( $r->moveToNextAttribute() ) {
+ if ( $r->namespaceURI === 'http://www.w3.org/2000/xmlns/' ) {
+ // XMLReader treats xmlns attributes as normal
+ // attributes, while xml_parse doesn't
+ continue;
+ }
+ $name = $this->expandNS( $r->name, $r->namespaceURI );
+ $attrs[$name] = $r->value;
+ }
+ return $attrs;
+ }
+
+ /**
+ * @param $name element or attribute name, maybe with a full or short prefix
+ * @param $namespaceURI the namespaceURI
+ * @return string the name prefixed with namespaceURI
+ */
+ private function expandNS( $name, $namespaceURI ) {
+ if ( $namespaceURI ) {
+ $parts = explode( ':', $name );
+ $localname = array_pop( $parts );
+ return "$namespaceURI:$localname";
+ }
+ return $name;
+ }
+
+ /**
+ * @param $name
+ * @param $attribs
+ */
+ private function elementOpen( $name, $attribs ) {
+ $this->elementDataContext[] = array( $name, $attribs );
+ $this->elementData[] = '';
+ $this->stackDepth++;
+ }
+
+ /**
+ */
+ private function elementClose() {
+ list( $name, $attribs ) = array_pop( $this->elementDataContext );
+ $data = array_pop( $this->elementData );
+ $this->stackDepth--;
+
+ if ( is_callable( $this->filterCallback )
+ && call_user_func(
+ $this->filterCallback,
+ $name,
+ $attribs,
+ $data
+ )
+ ) {
+ // Filter hit
+ $this->filterMatch = true;
+ }
+ }
+
+ /**
+ * @param $data
+ */
+ private function elementData( $data ) {
+ // Collect any data here, and we'll run the callback in elementClose
+ $this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
+ }
+
+ /**
+ * @param $target
+ * @param $data
+ */
+ private function processingInstructionHandler( $target, $data ) {
+ if ( $this->parserOptions['processing_instruction_handler'] ) {
+ if ( call_user_func(
+ $this->parserOptions['processing_instruction_handler'],
+ $target,
+ $data
+ ) ) {
+ // Filter hit!
+ $this->filterMatch = true;
+ }
+ }
+ }
+}
diff --git a/includes/upload/UploadBase.php b/includes/upload/UploadBase.php
index 63aa057..6e97de2 100644
--- a/includes/upload/UploadBase.php
+++ b/includes/upload/UploadBase.php
@@ -1249,7 +1249,7 @@ abstract class UploadBase {
*/
protected function detectScriptInSvg( $filename, $partial ) {
$this->mSVGNSError = false;
- $check = new XmlTypeCheck(
+ $check = new XmlReaderTypeCheck(
$filename,
array( $this, 'checkSvgScriptCallback' ),
true,
diff --git a/tests/phpunit/includes/libs/XmlReaderTypeCheckTest.php b/tests/phpunit/includes/libs/XmlReaderTypeCheckTest.php
new file mode 100644
index 0000000..ed1f58e
--- /dev/null
+++ b/tests/phpunit/includes/libs/XmlReaderTypeCheckTest.php
@@ -0,0 +1,49 @@
+<?php
+/**
+ * PHPUnit tests for XmlReaderTypeCheck.
+ * @author csteipp
+ * @group Xml
+ * @covers XmlReaderTypeCheck
+ */
+class XmlReaderTypeCheckTest extends PHPUnit_Framework_TestCase {
+ const WELL_FORMED_XML = "<root><child /></root>";
+ const MAL_FORMED_XML = "<root><child /></error>";
+ const XML_WITH_PIH = '<?xml version="1.0"?><?xml-stylesheet type="text/xsl" href="/w/index.php"?><svg><child /></svg>';
+
+ /**
+ * @covers XMLTypeCheck::newFromString
+ * @covers XMLTypeCheck::getRootElement
+ */
+ public function testWellFormedXML() {
+ $testXML = XmlReaderTypeCheck::newFromString( self::WELL_FORMED_XML );
+ $this->assertTrue( $testXML->wellFormed );
+ $this->assertEquals( 'root', $testXML->getRootElement() );
+ }
+
+ /**
+ * @covers XMLTypeCheck::newFromString
+ */
+ public function testMalFormedXML() {
+ $testXML = XmlReaderTypeCheck::newFromString( self::MAL_FORMED_XML );
+ $this->assertFalse( $testXML->wellFormed );
+ }
+
+ /**
+ * @covers XMLTypeCheck::processingInstructionHandler
+ */
+ public function testProcessingInstructionHandler() {
+ $called = false;
+ $testXML = new XmlReaderTypeCheck(
+ self::XML_WITH_PIH,
+ null,
+ false,
+ array(
+ 'processing_instruction_handler' => function() use ( &$called ) {
+ $called = true;
+ }
+ )
+ );
+ $this->assertTrue( $called );
+ }
+
+}
diff --git a/tests/phpunit/includes/libs/XmlTypeCheckTest.php b/tests/phpunit/includes/libs/XmlTypeCheckTest.php
index e7b3e77..f0ba934 100644
--- a/tests/phpunit/includes/libs/XmlTypeCheckTest.php
+++ b/tests/phpunit/includes/libs/XmlTypeCheckTest.php
@@ -8,6 +8,7 @@
class XmlTypeCheckTest extends PHPUnit_Framework_TestCase {
const WELL_FORMED_XML = "<root><child /></root>";
const MAL_FORMED_XML = "<root><child /></error>";
+ const XML_WITH_PIH = '<?xml version="1.0"?><?xml-stylesheet type="text/xsl" href="/w/index.php"?><svg><child /></svg>';
/**
* @covers XMLTypeCheck::newFromString
@@ -27,4 +28,22 @@ class XmlTypeCheckTest extends PHPUnit_Framework_TestCase {
$this->assertFalse( $testXML->wellFormed );
}
+ /**
+ * @covers XMLTypeCheck::processingInstructionHandler
+ */
+ public function testProcessingInstructionHandler() {
+ $called = false;
+ $testXML = new XmlTypeCheck(
+ self::XML_WITH_PIH,
+ null,
+ false,
+ array(
+ 'processing_instruction_handler' => function() use ( &$called ) {
+ $called = true;
+ }
+ )
+ );
+ $this->assertTrue( $called );
+ }
+
}
diff --git a/tests/phpunit/includes/upload/UploadBaseTest.php b/tests/phpunit/includes/upload/UploadBaseTest.php
index 4ea300c..a580b12 100644
--- a/tests/phpunit/includes/upload/UploadBaseTest.php
+++ b/tests/phpunit/includes/upload/UploadBaseTest.php
@@ -363,6 +363,18 @@ class UploadBaseTest extends MediaWikiTestCase {
true,
'SVG with data:text/html link target (firefox only)'
),
+ array(
+ '<?xml version="1.0" encoding="UTF-8" standalone="no"?> <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd" [ <!ENTITY lol "lol"> <!ENTITY lol2 "<script>alert('XSSED => '+document.domain);</script>"> ]> <svg xmlns="http://www.w3.org/2000/svg" width="68" height="68" viewBox="-34 -34 68 68" version="1.1"> <circle cx="0" cy="0" r="24" fill="#c8c8c8"/> <text x="0" y="0" fill="black">&lol2;</text> </svg>',
+ true,
+ true,
+ 'SVG with encoded script tag in internal entity (reported by Beyond Security)'
+ ),
+ array(
+ '<?xml version="1.0"?> <!DOCTYPE svg [ <!ENTITY foo SYSTEM "file:///etc/passwd"> ]> <svg xmlns="http://www.w3.org/2000/svg" version="1.1"> <desc>&foo;</desc> <rect width="300" height="100" style="fill:rgb(0,0,255);stroke-width:1;stroke:rgb(0,0,2)" /> </svg>',
+ false,
+ false,
+ 'SVG with external entity'
+ ),
// Test good, but strange files that we want to allow
array(
@@ -377,7 +389,6 @@ class UploadBaseTest extends MediaWikiTestCase {
false,
'SVG with local urls, including filter: in style'
),
-
);
}
}
@@ -401,7 +412,7 @@ class UploadTestHandler extends UploadBase {
* the result instead of interpreting them.
*/
public function checkSvgString( $svg ) {
- $check = new XmlTypeCheck(
+ $check = new XmlReaderTypeCheck(
$svg,
array( $this, 'checkSvgScriptCallback' ),
false,
--
1.8.4.5
File Metadata
Details
Attached
Mime Type
text/x-diff
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
38643
Default Alt Text
T88310e.patch (17 KB)
Attached To
Mode
T88310: xml_parse doesn't expand internal entities sometimes
Attached
Detach File
Event Timeline
Log In to Comment