Page MenuHomePhabricator
Authored By
Lucas_Werkmeister_WMDE
Feb 7 2022, 10:55 AM
Size
11 KB
Referenced Files
None
Subscribers
None

T297754-squash.patch

From 8d9e60302eb9cdcfba681fb423b245ced68aff1f Mon Sep 17 00:00:00 2001
From: Lucas Werkmeister <lucas.werkmeister@wikimedia.de>
Date: Mon, 17 Jan 2022 20:06:42 +0100
Subject: [PATCH] SECURITY: Sort Special:WhatLinksHere by namespace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
ApiQueryBacklinks already sorts by the from_namespace column before the
from column, but SpecialWhatLinksHere only sorted by the from column so
far (i.e. the page ID of the linking page). This means that MySQL cannot
always use the indexes on the backlinks and related tables efficiently,
turning the special page into a potential DoS vector for certain
parameter combinations (where it reads millions of rows and then sorts
them in memory). Fix this by sorting by namespace first (except for the
redirect table, which doesn’t have an rd_from column); this also means
that we always want to add a filter for the namespaces, even when it’s
just filtering for all the known namespaces.
The URL changes format again, and now looks like &offset=0|123, where 0
is the namespace of page 123, and the results will be the pages in the
same namespace with an ID above 123, or the pages in namespaces above 0
regardless of page ID (though still sorted). Old URLs are of course
supported, and we look up the relevant namespace of the given page ID
on-the-fly in that case.
Bug: T297754
---
includes/specials/SpecialWhatLinksHere.php | 151 +++++++++++++++------
1 file changed, 108 insertions(+), 43 deletions(-)
diff --git a/includes/specials/SpecialWhatLinksHere.php b/includes/specials/SpecialWhatLinksHere.php
index 4271ad76d0..0c6c19f26b 100644
--- a/includes/specials/SpecialWhatLinksHere.php
+++ b/includes/specials/SpecialWhatLinksHere.php
@@ -23,6 +23,7 @@
use MediaWiki\Cache\LinkBatchFactory;
use MediaWiki\Content\IContentHandlerFactory;
+use MediaWiki\MediaWikiServices;
use Wikimedia\Rdbms\IDatabase;
use Wikimedia\Rdbms\ILoadBalancer;
use Wikimedia\Rdbms\SelectQueryBuilder;
@@ -94,7 +95,7 @@ public function execute( $par ) {
$opts->add( 'target', '' );
$opts->add( 'namespace', '', FormOptions::INTNULL );
$opts->add( 'limit', $this->getConfig()->get( 'QueryPageDefaultLimit' ) );
- $opts->add( 'offset', 0 );
+ $opts->add( 'offset', '' );
$opts->add( 'from', 0 );
$opts->add( 'dir', 'next' );
$opts->add( 'hideredirs', false );
@@ -130,31 +131,74 @@ public function execute( $par ) {
$out->setPageTitle( $this->msg( 'whatlinkshere-title', $this->target->getPrefixedText() ) );
$out->addBacklinkSubtitle( $this->target );
- // Workaround for legacy 'from' and 'back' system.
- // We need only 'from' param to get offset.
- $from = $opts->getValue( 'from' );
- $opts->reset( 'from' );
- $dir = $from ? 'next' : $opts->getValue( 'dir' );
- // 'from' was included in result set, offset is excluded. We need to align them.
- $offset = $from ? $from - 1 : $opts->getValue( 'offset' );
+ [ $offsetNamespace, $offsetPageID, $dir ] = $this->parseOffsetAndDir( $opts );
$this->showIndirectLinks(
0,
$this->target,
$opts->getValue( 'limit' ),
- $offset,
+ $offsetNamespace,
+ $offsetPageID,
$dir
);
}
+ /**
+ * Parse the offset and direction parameters.
+ *
+ * Three parameter kinds are supported:
+ * * from=123 (legacy), where page ID 123 is the first included one
+ * * offset=123&dir=next/prev (legacy), where page ID 123 is the last excluded one
+ * * offset=0|123&dir=next/prev (current), where namespace 0 page ID 123 is the last excluded one
+ *
+ * @param FormOptions $opts
+ * @return array( int $offsetNamespace, int $offsetPageID, string $dir )
+ */
+ private function parseOffsetAndDir( FormOptions $opts ): array {
+ $from = $opts->getValue( 'from' );
+ $opts->reset( 'from' );
+
+ if ( $from ) {
+ $dir = 'next';
+ $offsetNamespace = null;
+ $offsetPageID = $from - 1;
+ } else {
+ $dir = $opts->getValue( 'dir' );
+ [ $offsetNamespaceString, $offsetPageIDString ] = explode(
+ '|',
+ $opts->getValue( 'offset' ) . '|'
+ );
+ if ( !$offsetPageIDString ) {
+ $offsetPageIDString = $offsetNamespaceString;
+ $offsetNamespaceString = '';
+ }
+ if ( is_numeric( $offsetNamespaceString ) ) {
+ $offsetNamespace = (int)$offsetNamespaceString;
+ } else {
+ $offsetNamespace = null;
+ }
+ $offsetPageID = (int)$offsetPageIDString;
+ }
+
+ if ( $offsetNamespace === null ) {
+ $offsetTitle = MediaWikiServices::getInstance()
+ ->getTitleFactory()
+ ->newFromID( $offsetPageID );
+ $offsetNamespace = $offsetTitle ? $offsetTitle->getNamespace() : 0;
+ }
+
+ return [ $offsetNamespace, $offsetPageID, $dir ];
+ }
+
/**
* @param int $level Recursion level
* @param Title $target Target title
* @param int $limit Number of entries to display
- * @param int $offset Display from this article ID (excluded)
+ * @param int $offsetNamespace Display from this namespace number (included)
+ * @param int $offsetPageID Display from this article ID (excluded)
* @param string $dir 'next' or 'prev'
*/
- private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir = 'next' ) {
+ private function showIndirectLinks( $level, $target, $limit, $offsetNamespace = 0, $offsetPageID = 0, $dir = 'next' ) {
$out = $this->getOutput();
$dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
@@ -196,18 +240,25 @@ private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir =
} else {
$namespaces = $namespace;
}
- $conds['redirect']['page_namespace'] = $namespaces;
- $conds['pagelinks']['pl_from_namespace'] = $namespaces;
- $conds['templatelinks']['tl_from_namespace'] = $namespaces;
- $conds['imagelinks']['il_from_namespace'] = $namespaces;
+ } else {
+ // Select all namespaces.
+ // This allows the database to use the *_from_namespace index. (T297754)
+ $namespaces = $this->namespaceInfo->getValidNamespaces();
}
+ $conds['redirect']['page_namespace'] = $namespaces;
+ $conds['pagelinks']['pl_from_namespace'] = $namespaces;
+ $conds['templatelinks']['tl_from_namespace'] = $namespaces;
+ $conds['imagelinks']['il_from_namespace'] = $namespaces;
- if ( $offset ) {
+ if ( $offsetPageID ) {
$rel = $dir === 'prev' ? '<' : '>';
- $conds['redirect'][] = "rd_from $rel $offset";
- $conds['templatelinks'][] = "tl_from $rel $offset";
- $conds['pagelinks'][] = "pl_from $rel $offset";
- $conds['imagelinks'][] = "il_from $rel $offset";
+ $conds['redirect'][] = "rd_from $rel $offsetPageID";
+ $conds['templatelinks'][] = "(tl_from_namespace = $offsetNamespace AND tl_from $rel $offsetPageID " .
+ "OR tl_from_namespace $rel $offsetNamespace)";
+ $conds['pagelinks'][] = "(pl_from_namespace = $offsetNamespace AND pl_from $rel $offsetPageID " .
+ "OR pl_from_namespace $rel $offsetNamespace)";
+ $conds['imagelinks'][] = "(il_from_namespace = $offsetNamespace AND il_from $rel $offsetPageID " .
+ "OR il_from_namespace $rel $offsetNamespace)";
}
if ( $hideredirs ) {
@@ -236,7 +287,7 @@ private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir =
->table( $table )
->fields( [ $fromCol, 'rd_from', 'rd_fragment' ] )
->conds( $conds[$table] )
- ->orderBy( $fromCol, $sortDirection )
+ ->orderBy( [ $fromCol . '_namespace', $fromCol ], $sortDirection )
->limit( 2 * $queryLimit )
->leftJoin( 'redirect', 'redirect', $on );
@@ -245,7 +296,7 @@ private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir =
->join( 'page', 'page', "$fromCol = page_id" )
->fields( [ 'page_id', 'page_namespace', 'page_title',
'rd_from', 'rd_fragment', 'page_is_redirect' ] )
- ->orderBy( 'page_id', $sortDirection )
+ ->orderBy( [ 'page_namespace', 'page_id' ], $sortDirection )
->limit( $queryLimit )
->caller( $fname )
->fetchResultSet();
@@ -339,40 +390,54 @@ private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir =
}
}
- // Sort by key and then change the keys to 0-based indices
- ksort( $rows );
- $rows = array_values( $rows );
+ // Sort by namespace + page ID, changing the keys to 0-based indices
+ usort( $rows, static function ( $rowA, $rowB ) {
+ if ( $rowA->page_namespace !== $rowB->page_namespace ) {
+ return $rowA->page_namespace < $rowB->page_namespace ? -1 : 1;
+ }
+ if ( $rowA->page_id !== $rowB->page_id ) {
+ return $rowA->page_id < $rowB->page_id ? - 1 : 1;
+ }
+ return 0;
+ } );
$numRows = count( $rows );
// Work out the start and end IDs, for prev/next links
if ( !$limit ) { // T289351
- $nextId = $prevId = false;
+ $nextNamespace = $nextPageId = $prevNamespace = $prevPageId = false;
$rows = [];
} elseif ( $dir === 'prev' ) {
if ( $numRows > $limit ) {
// More rows available after these ones
- // Get the nextId from the last row in the result set
- $nextId = $rows[$limit]->page_id;
+ // Get the next row from the last row in the result set
+ $nextNamespace = $rows[$limit]->page_namespace;
+ $nextPageId = $rows[$limit]->page_id;
// Remove undisplayed rows, for dir='prev' we need to discard first record after sorting
$rows = array_slice( $rows, 1, $limit );
- // Get the prevId from the first displayed row
- $prevId = $rows[0]->page_id;
+ // Get the prev row from the first displayed row
+ $prevNamespace = $rows[0]->page_namespace;
+ $prevPageId = $rows[0]->page_id;
} else {
- // Get the nextId from the last displayed row
- $nextId = $rows[$numRows - 1]->page_id;
- $prevId = false;
+ // Get the next row from the last displayed row
+ $nextNamespace = $rows[$numRows - 1]->page_namespace;
+ $nextPageId = $rows[$numRows - 1]->page_id;
+ $prevNamespace = false;
+ $prevPageId = false;
}
} else {
// If offset is not set disable prev link
- $prevId = $offset ? $rows[0]->page_id : false;
+ $prevNamespace = $offsetPageID ? $rows[0]->page_namespace : false;
+ $prevPageId = $offsetPageID ? $rows[0]->page_id : false;
if ( $numRows > $limit ) {
- // Get the nextId from the last displayed row
- $nextId = $rows[$limit - 1]->page_id;
+ // Get the next row from the last displayed row
+ $nextNamespace = $rows[$limit - 1]->page_namespace;
+ $nextPageId = $rows[$limit - 1]->page_id;
// Remove undisplayed rows
$rows = array_slice( $rows, 0, $limit );
} else {
- $nextId = false;
+ $nextNamespace = false;
+ $nextPageId = false;
}
}
@@ -403,7 +468,7 @@ private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir =
$out->addWikiMsg( 'whatlinkshere-count', Message::numParam( count( $rows ) ) );
- $prevnext = $this->getPrevNext( $prevId, $nextId );
+ $prevnext = $this->getPrevNext( $prevNamespace, $prevPageId, $nextNamespace, $nextPageId );
$out->addHTML( $prevnext );
}
$out->addHTML( $this->listStart( $level ) );
@@ -543,7 +608,7 @@ private function makeSelfLink( $text, $query ) {
);
}
- private function getPrevNext( $prevId, $nextId ) {
+ private function getPrevNext( $prevNamespace, $prevPageId, $nextNamespace, $nextPageId ) {
$currentLimit = $this->opts->getValue( 'limit' );
$prev = $this->msg( 'whatlinkshere-prev' )->numParams( $currentLimit )->text();
$next = $this->msg( 'whatlinkshere-next' )->numParams( $currentLimit )->text();
@@ -551,12 +616,12 @@ private function getPrevNext( $prevId, $nextId ) {
$changed = $this->opts->getChangedValues();
unset( $changed['target'] ); // Already in the request title
- if ( $prevId != 0 ) {
- $overrides = [ 'dir' => 'prev', 'offset' => $prevId, ];
+ if ( $prevPageId != 0 ) {
+ $overrides = [ 'dir' => 'prev', 'offset' => "$prevNamespace|$prevPageId", ];
$prev = Message::rawParam( $this->makeSelfLink( $prev, array_merge( $changed, $overrides ) ) );
}
- if ( $nextId != 0 ) {
- $overrides = [ 'dir' => 'next', 'offset' => $nextId, ];
+ if ( $nextPageId != 0 ) {
+ $overrides = [ 'dir' => 'next', 'offset' => "$nextNamespace|$nextPageId", ];
$next = Message::rawParam( $this->makeSelfLink( $next, array_merge( $changed, $overrides ) ) );
}
--
2.32.0

File Metadata

Mime Type
text/x-diff
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
9341658
Default Alt Text
T297754-squash.patch (11 KB)

Event Timeline