Page MenuHomePhabricator
Authored By
Lucas_Werkmeister_WMDE
Jan 17 2022, 7:12 PM
Size
11 KB
Referenced Files
None
Subscribers
None

T297754.patch

From 779e148a2ba378cb98fca1ccbe15803319cacbd4 Mon Sep 17 00:00:00 2001
From: Lucas Werkmeister <lucas.werkmeister@wikimedia.de>
Date: Mon, 17 Jan 2022 20:06:42 +0100
Subject: [PATCH] SECURITY: Sort Special:WhatLinksHere by namespace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
ApiQueryBacklinks already sorts by the from_namespace column before the
from column, but SpecialWhatLinksHere only sorted by the from column so
far (i.e. the page ID of the linking page). This means that MySQL cannot
always use the indexes on the backlinks and related tables efficiently,
turning the special page into a potential DoS vector for certain
parameter combinations (where it reads millions of rows and then sorts
them in memory). Fix this by sorting by namespace first (except for the
redirect table, which doesn’t have an rd_from column).
The URL changes format again, and now looks like &offset=0|123, where 0
is the namespace of page 123, and the results will be the pages in the
same namespace with an ID above 123, or the pages in namespaces above 0
regardless of page ID (though still sorted). Old URLs are of course
supported, and we look up the relevant namespace of the given page ID
on-the-fly in that case.
Bug: T297754
Change-Id: I8fa0ce5ea8798124807537b9f02a0832185411d4
---
includes/specials/SpecialWhatLinksHere.php | 139 +++++++++++++++------
1 file changed, 100 insertions(+), 39 deletions(-)
diff --git a/includes/specials/SpecialWhatLinksHere.php b/includes/specials/SpecialWhatLinksHere.php
index 4271ad76d0..04a60fa71a 100644
--- a/includes/specials/SpecialWhatLinksHere.php
+++ b/includes/specials/SpecialWhatLinksHere.php
@@ -23,6 +23,7 @@
use MediaWiki\Cache\LinkBatchFactory;
use MediaWiki\Content\IContentHandlerFactory;
+use MediaWiki\MediaWikiServices;
use Wikimedia\Rdbms\IDatabase;
use Wikimedia\Rdbms\ILoadBalancer;
use Wikimedia\Rdbms\SelectQueryBuilder;
@@ -94,7 +95,7 @@ public function execute( $par ) {
$opts->add( 'target', '' );
$opts->add( 'namespace', '', FormOptions::INTNULL );
$opts->add( 'limit', $this->getConfig()->get( 'QueryPageDefaultLimit' ) );
- $opts->add( 'offset', 0 );
+ $opts->add( 'offset', '' );
$opts->add( 'from', 0 );
$opts->add( 'dir', 'next' );
$opts->add( 'hideredirs', false );
@@ -130,31 +131,74 @@ public function execute( $par ) {
$out->setPageTitle( $this->msg( 'whatlinkshere-title', $this->target->getPrefixedText() ) );
$out->addBacklinkSubtitle( $this->target );
- // Workaround for legacy 'from' and 'back' system.
- // We need only 'from' param to get offset.
- $from = $opts->getValue( 'from' );
- $opts->reset( 'from' );
- $dir = $from ? 'next' : $opts->getValue( 'dir' );
- // 'from' was included in result set, offset is excluded. We need to align them.
- $offset = $from ? $from - 1 : $opts->getValue( 'offset' );
+ [ $offsetNamespace, $offsetPageID, $dir ] = $this->parseOffsetAndDir( $opts );
$this->showIndirectLinks(
0,
$this->target,
$opts->getValue( 'limit' ),
- $offset,
+ $offsetNamespace,
+ $offsetPageID,
$dir
);
}
+ /**
+ * Parse the offset and direction parameters.
+ *
+ * Three parameter kinds are supported:
+ * * from=123 (legacy), where page ID 123 is the first included one
+ * * offset=123&dir=next/prev (legacy), where page ID 123 is the last excluded one
+ * * offset=0|123&dir=next/prev (current), where namespace 0 page ID 123 is the last excluded one
+ *
+ * @param FormOptions $opts
+ * @return array( int $offsetNamespace, int $offsetPageID, string $dir )
+ */
+ private function parseOffsetAndDir( FormOptions $opts ): array {
+ $from = $opts->getValue( 'from' );
+ $opts->reset( 'from' );
+
+ if ( $from ) {
+ $dir = 'next';
+ $offsetNamespace = null;
+ $offsetPageID = $from - 1;
+ } else {
+ $dir = $opts->getValue( 'dir' );
+ [ $offsetNamespaceString, $offsetPageIDString ] = explode(
+ '|',
+ $opts->getValue( 'offset' ) . '|'
+ );
+ if ( !$offsetPageIDString ) {
+ $offsetPageIDString = $offsetNamespaceString;
+ $offsetNamespaceString = '';
+ }
+ if ( is_numeric( $offsetNamespaceString ) ) {
+ $offsetNamespace = (int)$offsetNamespaceString;
+ } else {
+ $offsetNamespace = null;
+ }
+ $offsetPageID = (int)$offsetPageIDString;
+ }
+
+ if ( $offsetNamespace === null ) {
+ $offsetTitle = MediaWikiServices::getInstance()
+ ->getTitleFactory()
+ ->newFromID( $offsetPageID );
+ $offsetNamespace = $offsetTitle ? $offsetTitle->getNamespace() : 0;
+ }
+
+ return [ $offsetNamespace, $offsetPageID, $dir ];
+ }
+
/**
* @param int $level Recursion level
* @param Title $target Target title
* @param int $limit Number of entries to display
- * @param int $offset Display from this article ID (excluded)
+ * @param int $offsetNamespace Display from this namespace number (included)
+ * @param int $offsetPageID Display from this article ID (excluded)
* @param string $dir 'next' or 'prev'
*/
- private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir = 'next' ) {
+ private function showIndirectLinks( $level, $target, $limit, $offsetNamespace = 0, $offsetPageID = 0, $dir = 'next' ) {
$out = $this->getOutput();
$dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
@@ -202,12 +246,15 @@ private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir =
$conds['imagelinks']['il_from_namespace'] = $namespaces;
}
- if ( $offset ) {
+ if ( $offsetPageID ) {
$rel = $dir === 'prev' ? '<' : '>';
- $conds['redirect'][] = "rd_from $rel $offset";
- $conds['templatelinks'][] = "tl_from $rel $offset";
- $conds['pagelinks'][] = "pl_from $rel $offset";
- $conds['imagelinks'][] = "il_from $rel $offset";
+ $conds['redirect'][] = "rd_from $rel $offsetPageID";
+ $conds['templatelinks'][] = "(tl_from_namespace = $offsetNamespace AND tl_from $rel $offsetPageID " .
+ "OR tl_from_namespace $rel $offsetNamespace)";
+ $conds['pagelinks'][] = "(pl_from_namespace = $offsetNamespace AND pl_from $rel $offsetPageID " .
+ "OR pl_from_namespace $rel $offsetNamespace)";
+ $conds['imagelinks'][] = "(il_from_namespace = $offsetNamespace AND il_from $rel $offsetPageID " .
+ "OR il_from_namespace $rel $offsetNamespace)";
}
if ( $hideredirs ) {
@@ -236,7 +283,7 @@ private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir =
->table( $table )
->fields( [ $fromCol, 'rd_from', 'rd_fragment' ] )
->conds( $conds[$table] )
- ->orderBy( $fromCol, $sortDirection )
+ ->orderBy( [ $fromCol . '_namespace', $fromCol ], $sortDirection )
->limit( 2 * $queryLimit )
->leftJoin( 'redirect', 'redirect', $on );
@@ -245,7 +292,7 @@ private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir =
->join( 'page', 'page', "$fromCol = page_id" )
->fields( [ 'page_id', 'page_namespace', 'page_title',
'rd_from', 'rd_fragment', 'page_is_redirect' ] )
- ->orderBy( 'page_id', $sortDirection )
+ ->orderBy( [ 'page_namespace', 'page_id' ], $sortDirection )
->limit( $queryLimit )
->caller( $fname )
->fetchResultSet();
@@ -339,40 +386,54 @@ private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir =
}
}
- // Sort by key and then change the keys to 0-based indices
- ksort( $rows );
- $rows = array_values( $rows );
+ // Sort by namespace + page ID, changing the keys to 0-based indices
+ usort( $rows, static function ( $rowA, $rowB ) {
+ if ( $rowA->page_namespace !== $rowB->page_namespace ) {
+ return $rowA->page_namespace < $rowB->page_namespace ? -1 : 1;
+ }
+ if ( $rowA->page_id !== $rowB->page_id ) {
+ return $rowA->page_id < $rowB->page_id ? - 1 : 1;
+ }
+ return 0;
+ } );
$numRows = count( $rows );
// Work out the start and end IDs, for prev/next links
if ( !$limit ) { // T289351
- $nextId = $prevId = false;
+ $nextNamespace = $nextPageId = $prevNamespace = $prevPageId = false;
$rows = [];
} elseif ( $dir === 'prev' ) {
if ( $numRows > $limit ) {
// More rows available after these ones
- // Get the nextId from the last row in the result set
- $nextId = $rows[$limit]->page_id;
+ // Get the next row from the last row in the result set
+ $nextNamespace = $rows[$limit]->page_namespace;
+ $nextPageId = $rows[$limit]->page_id;
// Remove undisplayed rows, for dir='prev' we need to discard first record after sorting
$rows = array_slice( $rows, 1, $limit );
- // Get the prevId from the first displayed row
- $prevId = $rows[0]->page_id;
+ // Get the prev row from the first displayed row
+ $prevNamespace = $rows[0]->page_namespace;
+ $prevPageId = $rows[0]->page_id;
} else {
- // Get the nextId from the last displayed row
- $nextId = $rows[$numRows - 1]->page_id;
- $prevId = false;
+ // Get the next row from the last displayed row
+ $nextNamespace = $rows[$numRows - 1]->page_namespace;
+ $nextPageId = $rows[$numRows - 1]->page_id;
+ $prevNamespace = false;
+ $prevPageId = false;
}
} else {
// If offset is not set disable prev link
- $prevId = $offset ? $rows[0]->page_id : false;
+ $prevNamespace = $offsetPageID ? $rows[0]->page_namespace : false;
+ $prevPageId = $offsetPageID ? $rows[0]->page_id : false;
if ( $numRows > $limit ) {
- // Get the nextId from the last displayed row
- $nextId = $rows[$limit - 1]->page_id;
+ // Get the next row from the last displayed row
+ $nextNamespace = $rows[$limit - 1]->page_namespace;
+ $nextPageId = $rows[$limit - 1]->page_id;
// Remove undisplayed rows
$rows = array_slice( $rows, 0, $limit );
} else {
- $nextId = false;
+ $nextNamespace = false;
+ $nextPageId = false;
}
}
@@ -403,7 +464,7 @@ private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir =
$out->addWikiMsg( 'whatlinkshere-count', Message::numParam( count( $rows ) ) );
- $prevnext = $this->getPrevNext( $prevId, $nextId );
+ $prevnext = $this->getPrevNext( $prevNamespace, $prevPageId, $nextNamespace, $nextPageId );
$out->addHTML( $prevnext );
}
$out->addHTML( $this->listStart( $level ) );
@@ -543,7 +604,7 @@ private function makeSelfLink( $text, $query ) {
);
}
- private function getPrevNext( $prevId, $nextId ) {
+ private function getPrevNext( $prevNamespace, $prevPageId, $nextNamespace, $nextPageId ) {
$currentLimit = $this->opts->getValue( 'limit' );
$prev = $this->msg( 'whatlinkshere-prev' )->numParams( $currentLimit )->text();
$next = $this->msg( 'whatlinkshere-next' )->numParams( $currentLimit )->text();
@@ -551,12 +612,12 @@ private function getPrevNext( $prevId, $nextId ) {
$changed = $this->opts->getChangedValues();
unset( $changed['target'] ); // Already in the request title
- if ( $prevId != 0 ) {
- $overrides = [ 'dir' => 'prev', 'offset' => $prevId, ];
+ if ( $prevPageId != 0 ) {
+ $overrides = [ 'dir' => 'prev', 'offset' => "$prevNamespace|$prevPageId", ];
$prev = Message::rawParam( $this->makeSelfLink( $prev, array_merge( $changed, $overrides ) ) );
}
- if ( $nextId != 0 ) {
- $overrides = [ 'dir' => 'next', 'offset' => $nextId, ];
+ if ( $nextPageId != 0 ) {
+ $overrides = [ 'dir' => 'next', 'offset' => "$nextNamespace|$nextPageId", ];
$next = Message::rawParam( $this->makeSelfLink( $next, array_merge( $changed, $overrides ) ) );
}
--
2.30.2

File Metadata

Mime Type
text/x-diff
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
9327217
Default Alt Text
T297754.patch (11 KB)

Event Timeline