Page MenuHomePhabricator

whitelistEncodings2.patch

Authored By
bzimport
Nov 22 2014, 1:42 AM
Size
4 KB
Referenced Files
None
Subscribers
None

whitelistEncodings2.patch

From 9e3674787f96cb94511f64bbb39e6b38215defb5 Mon Sep 17 00:00:00 2001
From: csteipp <csteipp@wikimedia.org>
Date: Tue, 23 Apr 2013 14:15:01 -0700
Subject: [PATCH] SECURITY: Check SVG xml encoding against whitelist
Some browsers will interpret obscure xml encodings as UTF-8, while
PHP/expat will interpret the given encoding in the xml declaration.
bug: 47304
Change-Id: I3b311a7078d977ae89c51e95e625d79fba183cfc
---
includes/upload/UploadBase.php | 67 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 67 insertions(+)
diff --git a/includes/upload/UploadBase.php b/includes/upload/UploadBase.php
index c98c445..2d09612 100644
--- a/includes/upload/UploadBase.php
+++ b/includes/upload/UploadBase.php
@@ -46,6 +46,8 @@ abstract class UploadBase {
protected $mBlackListedExtensions;
protected $mJavaDetected;
+ protected static $safeXmlEncodings = array( 'UTF-8', 'ISO-8859-1', 'ISO-8859-2', 'UTF-16', 'UTF-32' );
+
const SUCCESS = 0;
const OK = 0;
const EMPTY_FILE = 3;
@@ -979,6 +981,15 @@ abstract class UploadBase {
return true;
}
+ // Some browsers will interpret obscure xml encodings as UTF-8, while
+ // PHP/expat will interpret the given encoding in the xml declaration (bug 47304)
+ if ( $extension == 'svg' || strpos( $mime, 'image/svg' ) === 0 ) {
+ if ( self::checkXMLEncodingMissmatch( $file ) ) {
+ wfProfileOut( __METHOD__ );
+ return true;
+ }
+ }
+
/**
* Internet Explorer for Windows performs some really stupid file type
* autodetection which can cause it to interpret valid image files as HTML
@@ -1050,6 +1061,62 @@ abstract class UploadBase {
return false;
}
+
+ /**
+ * Check a whitelist of xml encodings that are known not to be interpreted differently
+ * by the server's xml parser (expat) and some common browsers.
+ *
+ * @param string $file pathname to the temporary upload file
+ * @return Boolean: true if the file contains an encoding that could be misinterpreted
+ */
+ public static function checkXMLEncodingMissmatch( $file ) {
+ global $wgSVGMetadataCutoff;
+ $contents = file_get_contents( $file, false, null, -1, $wgSVGMetadataCutoff );
+ $encodingRegex = '!encoding[ \t\n\r]*=[ \t\n\r]*[\'"](.*?)[\'"]!si';
+
+ if ( preg_match( "!<\?xml\b(.*?)\?>!si", $contents, $matches ) ) {
+ if ( preg_match( $encodingRegex, $matches[1], $encMatch )
+ && !in_array( strtoupper( $encMatch[1] ), self::$safeXmlEncodings )
+ ) {
+ wfDebug( __METHOD__ . ": Found unsafe XML encoding '{$encMatch[1]}'\n" );
+ return true;
+ }
+ } elseif ( preg_match( "!<\?xml\b!si", $contents ) ) {
+ // Start of XML declaration without an end in the first $wgSVGMetadataCutoff
+ // bytes. There shouldn't be a legitimate reason for this to happen.
+ wfDebug( __METHOD__ . ": Unmatched XML declaration start\n" );
+ return true;
+ } elseif ( substr( $contents, 0, 4) == "\x4C\x6F\xA7\x94" ) {
+ // EBCDIC encoded XML
+ wfDebug( __METHOD__ . ": EBCDIC Encoded XML\n" );
+ return true;
+ }
+
+ // It's possible the file is encoded with multi-byte encoding, so re-encode attempt to
+ // detect the encoding in case is specifies an encoding not whitelisted in self::$safeXmlEncodings
+ $attemptEncodings = array( 'UTF-16', 'UTF-16BE', 'UTF-32', 'UTF-32BE' );
+ foreach ( $attemptEncodings as $encoding ) {
+ wfSuppressWarnings();
+ $str = iconv( $encoding, 'UTF-8', $contents );
+ wfRestoreWarnings();
+ if ( $str != '' && preg_match( "!<\?xml\b(.*?)\?>!si", $str, $matches ) ) {
+ if ( preg_match( $encodingRegex, $matches[1], $encMatch )
+ && !in_array( strtoupper( $encMatch[1] ), self::$safeXmlEncodings )
+ ) {
+ wfDebug( __METHOD__ . ": Found unsafe XML encoding '{$encMatch[1]}'\n" );
+ return true;
+ }
+ } elseif ( $str != '' && preg_match( "!<\?xml\b!si", $str ) ) {
+ // Start of XML declaration without an end in the first $wgSVGMetadataCutoff
+ // bytes. There shouldn't be a legitimate reason for this to happen.
+ wfDebug( __METHOD__ . ": Unmatched XML declaration start\n" );
+ return true;
+ }
+ }
+
+ return false;
+ }
+
/**
* @param $filename string
* @return bool
--
1.8.1.4

File Metadata

Mime Type
text/x-diff
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
10493
Default Alt Text
whitelistEncodings2.patch (4 KB)

Event Timeline