Page MenuHomePhabricator

unicode-normalize.patch

Authored By
bzimport
Nov 21 2014, 11:32 PM
Size
5 KB
Referenced Files
None
Subscribers
None

unicode-normalize.patch

Index: includes/api/ApiPageSet.php
===================================================================
--- includes/api/ApiPageSet.php (revision 87498)
+++ includes/api/ApiPageSet.php (working copy)
@@ -655,15 +655,26 @@
* @return LinkBatch
*/
private function processTitlesArray( $titles ) {
+ global $wgContLang;
$linkBatch = new LinkBatch();
foreach ( $titles as $title ) {
- $titleObj = is_string( $title ) ? Title::newFromText( $title ) : $title;
+ if ( is_string( $title ) ) {
+ // Normalize title for Unicode and create a Title object
+ $normTitle = $wgContLang->normalize( $title );
+ $titleObj = Title::newFromText( $normTitle );
+ } else {
+ $titleObj = $title;
+ $normTitle = $titleObj->getPrefixedText();
+ }
if ( !$titleObj ) {
// Handle invalid titles gracefully
- $this->mAllpages[0][$title] = $this->mFakePageId;
- $this->mInvalidTitles[$this->mFakePageId] = $title;
+ $this->mAllpages[0][$normTitle] = $this->mFakePageId;
+ $this->mInvalidTitles[$this->mFakePageId] = $normTitle;
$this->mFakePageId--;
+ if ( is_string( $title ) && $title !== $normTitle ) {
+ $this->mNormalizedTitles[$title] = $normTitle;
+ }
continue; // There's nothing else we can do
}
$unconvertedTitle = $titleObj->getPrefixedText();
@@ -674,7 +685,6 @@
$this->mInterwikiTitles[$titleObj->getPrefixedText()] = $iw;
} else {
// Variants checking
- global $wgContLang;
if ( $this->mConvertTitles &&
count( $wgContLang->getVariants() ) > 1 &&
!$titleObj->exists() ) {
@@ -713,7 +723,8 @@
protected function getAllowedParams() {
return array(
'titles' => array(
- ApiBase::PARAM_ISMULTI => true
+ ApiBase::PARAM_ISMULTI => true,
+ ApiBase::PARAM_NORMALIZE_UNICODE => false,
),
'pageids' => array(
ApiBase::PARAM_TYPE => 'integer',
Index: includes/api/ApiBase.php
===================================================================
--- includes/api/ApiBase.php (revision 87498)
+++ includes/api/ApiBase.php (working copy)
@@ -53,6 +53,7 @@
const PARAM_DEPRECATED = 7; // Boolean, is the parameter deprecated (will show a warning)
const PARAM_REQUIRED = 8; // Boolean, is the parameter required?
const PARAM_RANGE_ENFORCE = 9; // Boolean, if MIN/MAX are set, enforce (die) these? Only applies if TYPE='integer' Use with extreme caution
+ const PARAM_NORMALIZE_UNICODE = 10; // Boolean, should we normalize Unicode for a parameter? Defaults to true
const LIMIT_BIG1 = 500; // Fast query, std user limit
const LIMIT_BIG2 = 5000; // Fast query, bot/sysop limit
@@ -672,6 +673,7 @@
$dupes = false;
$deprecated = false;
$required = false;
+ $normalizeUnicode = true;
} else {
$default = isset( $paramSettings[self::PARAM_DFLT] ) ? $paramSettings[self::PARAM_DFLT] : null;
$multi = isset( $paramSettings[self::PARAM_ISMULTI] ) ? $paramSettings[self::PARAM_ISMULTI] : false;
@@ -679,6 +681,7 @@
$dupes = isset( $paramSettings[self::PARAM_ALLOW_DUPLICATES] ) ? $paramSettings[self::PARAM_ALLOW_DUPLICATES] : false;
$deprecated = isset( $paramSettings[self::PARAM_DEPRECATED] ) ? $paramSettings[self::PARAM_DEPRECATED] : false;
$required = isset( $paramSettings[self::PARAM_REQUIRED] ) ? $paramSettings[self::PARAM_REQUIRED] : false;
+ $normalizeUnicode = isset( $paramSettings[self::PARAM_NORMALIZE_UNICODE] ) ? $paramSettings[self::PARAM_NORMALIZE_UNICODE] : true;
// When type is not given, and no choices, the type is the same as $default
if ( !isset( $type ) ) {
@@ -698,7 +701,7 @@
$value = $this->getMain()->getRequest()->getCheck( $encParamName );
} else {
- $value = $this->getMain()->getRequest()->getVal( $encParamName, $default );
+ $value = $this->getMain()->getRequest()->getVal( $encParamName, $default, $normalizeUnicode );
if ( isset( $value ) && $type == 'namespace' ) {
$type = MWNamespace::getValidNamespaces();
Index: includes/WebRequest.php
===================================================================
--- includes/WebRequest.php (revision 87498)
+++ includes/WebRequest.php (working copy)
@@ -238,9 +238,10 @@
* @param $arr Array
* @param $name String
* @param $default Mixed
+ * @param $normalizeUnicode Whether to normalize Unicode in the returned value
* @return mixed
*/
- private function getGPCVal( $arr, $name, $default ) {
+ private function getGPCVal( $arr, $name, $default, $normalizeUnicode = true ) {
# PHP is so nice to not touch input data, except sometimes:
# http://us2.php.net/variables.external#language.variables.external.dot-in-names
# Work around PHP *feature* to avoid *bugs* elsewhere.
@@ -254,7 +255,9 @@
$data = $wgContLang->checkTitleEncoding( $data );
}
}
- $data = $this->normalizeUnicode( $data );
+ if ( $normalizeUnicode ) {
+ $data = $this->normalizeUnicode( $data );
+ }
return $data;
} else {
taint( $default );
@@ -270,10 +273,11 @@
*
* @param $name String
* @param $default String: optional default (or NULL)
+ * @param $normalizeUnicode Whether to normalize Unicode in the returned value
* @return String
*/
- public function getVal( $name, $default = null ) {
- $val = $this->getGPCVal( $this->data, $name, $default );
+ public function getVal( $name, $default = null, $normalizeUnicode = true ) {
+ $val = $this->getGPCVal( $this->data, $name, $default, $normalizeUnicode );
if( is_array( $val ) ) {
$val = $default;
}

File Metadata

Mime Type
text/x-diff
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
7281
Default Alt Text
unicode-normalize.patch (5 KB)

Event Timeline