Page MenuHomePhabricator
Authored By
Lucas_Werkmeister_WMDE
Nov 24 2020, 7:23 PM
Size
17 KB
Referenced Files
None
Subscribers
None

T260349-2-alt.patch

From 2b0359cc805778af52cd415b02e6db6f7f7e2c58 Mon Sep 17 00:00:00 2001
From: Lucas Werkmeister <lucas.werkmeister@wikimedia.de>
Date: Tue, 24 Nov 2020 20:16:18 +0100
Subject: [PATCH] SECURITY: Add job to purge entity data on page deletion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Make EntityDataPurger handle a second hook, for when an entire page is
deleted; however, we don’t handle the deletion directly in the hook
handler. Instead, define a new job class to purge the entity data URLs
for all archived revisions of a deleted page, and make the hook handler
push that job. Deferring the purging to a job ensures that the main
transaction (archiving the rows) has committed and we can therefore read
the archived rows from a replica (though we still need to wait for
replication, in case the job runs immediately), and it’s probably also a
good idea in general to not read an unbounded number of archive rows in
the web request that is deleting the page.
The dependency injection pattern of PurgeEntityDataJob is mainly based
on ChangeVisibilityNotificationJob; the practice of using the standard
'namespace' and 'title' params even for a deleted page follows
DeletePageJob (MediaWiki core).
An additional feature not implemented here would be to pass the hook
handler’s $archivedRevisionCount into the job parameters, and have the
job log a warning if it processed a different number of archive rows.
However, I’ll leave that for later, once this patch becomes public.
Bug: T260349
Change-Id: I8f88178721c6c941530a03ca431725820a24d468
---
extension-repo.json | 2 +
repo/includes/Hooks/EntityDataPurger.php | 50 +++++-
repo/includes/PurgeEntityDataJob.php | 111 ++++++++++++++
.../includes/Hooks/EntityDataPurgerTest.php | 91 ++++++++++-
.../includes/PurgeEntityDataJobTest.php | 145 ++++++++++++++++++
5 files changed, 393 insertions(+), 6 deletions(-)
create mode 100644 repo/includes/PurgeEntityDataJob.php
create mode 100644 repo/tests/phpunit/includes/PurgeEntityDataJobTest.php
diff --git a/extension-repo.json b/extension-repo.json
index 32edde2886..b97735d4c5 100644
--- a/extension-repo.json
+++ b/extension-repo.json
@@ -269,6 +269,7 @@
},
"JobClasses": {
"CleanTermsIfUnused": "Wikibase\\Lib\\Store\\Sql\\Terms\\CleanTermsIfUnusedJob::getJobSpecification",
+ "PurgeEntityData": "Wikibase\\Repo\\PurgeEntityDataJob::newFromGlobalState",
"UpdateRepoOnMove": "Wikibase\\Repo\\UpdateRepo\\UpdateRepoOnMoveJob",
"UpdateRepoOnDelete": "Wikibase\\Repo\\UpdateRepo\\UpdateRepoOnDeleteJob",
"DispatchChangeDeletionNotification": "Wikibase\\Repo\\ChangeModification\\DispatchChangeDeletionNotificationJob"
@@ -530,6 +531,7 @@
"ApiQuery::moduleManager": "\\Wikibase\\Repo\\RepoHooks::onApiQueryModuleManager",
"ArticleDeleteComplete": [
"DeleteDispatcher",
+ "EntityDataPurger",
"\\Wikibase\\Repo\\RepoHooks::onArticleDeleteComplete"
],
"ArticleRevisionVisibilitySet": [
diff --git a/repo/includes/Hooks/EntityDataPurger.php b/repo/includes/Hooks/EntityDataPurger.php
index aaa63ed94a..b82ad894ed 100644
--- a/repo/includes/Hooks/EntityDataPurger.php
+++ b/repo/includes/Hooks/EntityDataPurger.php
@@ -5,7 +5,10 @@
namespace Wikibase\Repo\Hooks;
use HtmlCacheUpdater;
+use JobQueueGroup;
+use JobSpecification;
use MediaWiki\Hook\ArticleRevisionVisibilitySetHook;
+use MediaWiki\Page\Hook\ArticleDeleteCompleteHook;
use Title;
use Wikibase\Lib\Store\EntityIdLookup;
use Wikibase\Repo\LinkedData\EntityDataUriManager;
@@ -14,7 +17,7 @@
/**
* @license GPL-2.0-or-later
*/
-class EntityDataPurger implements ArticleRevisionVisibilitySetHook {
+class EntityDataPurger implements ArticleRevisionVisibilitySetHook, ArticleDeleteCompleteHook {
/** @var EntityIdLookup */
private $entityIdLookup;
@@ -25,14 +28,19 @@ class EntityDataPurger implements ArticleRevisionVisibilitySetHook {
/** @var HtmlCacheUpdater */
private $htmlCacheUpdater;
+ /** @var callable */
+ private $jobQueueGroupFactory;
+
public function __construct(
EntityIdLookup $entityIdLookup,
EntityDataUriManager $entityDataUriManager,
- HtmlCacheUpdater $htmlCacheUpdater
+ HtmlCacheUpdater $htmlCacheUpdater,
+ callable $jobQueueGroupFactory
) {
$this->entityIdLookup = $entityIdLookup;
$this->entityDataUriManager = $entityDataUriManager;
$this->htmlCacheUpdater = $htmlCacheUpdater;
+ $this->jobQueueGroupFactory = $jobQueueGroupFactory;
}
public static function factory( HtmlCacheUpdater $htmlCacheUpdater ): self {
@@ -40,7 +48,8 @@ public static function factory( HtmlCacheUpdater $htmlCacheUpdater ): self {
return new self(
$wikibaseRepo->getEntityIdLookup(),
$wikibaseRepo->getEntityDataUriManager(),
- $htmlCacheUpdater
+ $htmlCacheUpdater,
+ 'JobQueueGroup::singleton'
);
}
@@ -68,4 +77,39 @@ public function onArticleRevisionVisibilitySet( $title, $ids, $visibilityChangeM
}
}
+ /**
+ * @param \WikiPage $wikiPage
+ * @param \User $user
+ * @param string $reason
+ * @param int $id
+ * @param \Content|null $content
+ * @param \ManualLogEntry $logEntry
+ * @param int $archivedRevisionCount
+ * @return bool|void
+ */
+ public function onArticleDeleteComplete(
+ $wikiPage,
+ $user,
+ $reason,
+ $id,
+ $content,
+ $logEntry,
+ $archivedRevisionCount
+ ) {
+ $title = $wikiPage->getTitle();
+ $entityId = $this->entityIdLookup->getEntityIdForTitle( $title );
+ if ( !$entityId ) {
+ return;
+ }
+
+ /** @var JobQueueGroup $jobQueueGroup */
+ '@phan-var JobQueueGroup $jobQueueGroup';
+ $jobQueueGroup = ( $this->jobQueueGroupFactory )();
+ $jobQueueGroup->push( new JobSpecification( 'PurgeEntityData', [
+ 'namespace' => $title->getNamespace(),
+ 'title' => $title->getDBkey(),
+ 'pageId' => $id,
+ 'entityId' => $entityId->getSerialization(),
+ ] ) );
+ }
}
diff --git a/repo/includes/PurgeEntityDataJob.php b/repo/includes/PurgeEntityDataJob.php
new file mode 100644
index 0000000000..7c135e6b97
--- /dev/null
+++ b/repo/includes/PurgeEntityDataJob.php
@@ -0,0 +1,111 @@
+<?php
+
+declare( strict_types = 1 );
+
+namespace Wikibase\Repo;
+
+use BatchRowIterator;
+use HtmlCacheUpdater;
+use Job;
+use MediaWiki\MediaWikiServices;
+use Title;
+use Wikibase\DataModel\Entity\EntityIdParser;
+use Wikibase\Repo\LinkedData\EntityDataUriManager;
+use Wikimedia\Rdbms\ILBFactory;
+use Wikimedia\Rdbms\ILoadBalancer;
+
+/**
+ * Job to purge Special:EntityData URLs from the HTTP cache after a page was deleted.
+ *
+ * @license GPL-2.0-or-later
+ */
+class PurgeEntityDataJob extends Job {
+
+ /** @var EntityIdParser */
+ private $entityIdParser;
+
+ /** @var EntityDataUriManager */
+ private $entityDataUriManager;
+
+ /** @var ILBFactory */
+ private $lbFactory;
+
+ /** @var HtmlCacheUpdater */
+ private $htmlCacheUpdater;
+
+ /** @var int */
+ private $batchSize;
+
+ public function __construct(
+ EntityIdParser $entityIdParser,
+ EntityDataUriManager $entityDataUriManager,
+ ILBFactory $lbFactory,
+ HtmlCacheUpdater $htmlCacheUpdater,
+ int $batchSize,
+ array $params
+ ) {
+ parent::__construct( 'PurgeEntityData', $params );
+ $this->entityIdParser = $entityIdParser;
+ $this->entityDataUriManager = $entityDataUriManager;
+ $this->lbFactory = $lbFactory;
+ $this->htmlCacheUpdater = $htmlCacheUpdater;
+ $this->batchSize = $batchSize;
+ }
+
+ public static function newFromGlobalState( Title $unused, array $params ): self {
+ $repo = WikibaseRepo::getDefaultInstance();
+ $services = MediaWikiServices::getInstance();
+ return new self(
+ $repo->getEntityIdParser(),
+ $repo->getEntityDataUriManager(),
+ $services->getDBLoadBalancerFactory(),
+ $services->getHtmlCacheUpdater(),
+ $services->getMainConfig()->get( 'UpdateRowsPerQuery' ),
+ $params
+ );
+ }
+
+ public function run(): void {
+ $entityId = $this->entityIdParser->parse( $this->params['entityId'] );
+
+ // URLs for latest data...
+ $urls = $this->entityDataUriManager->getPotentiallyCachedUrls( $entityId );
+ foreach ( $this->getArchivedRevisionIds() as $revisionId ) {
+ // ...and URLs for each specific revision
+ $urls = array_merge( $urls, $this->entityDataUriManager->getPotentiallyCachedUrls(
+ $entityId,
+ $revisionId
+ ) );
+ }
+
+ if ( $urls !== [] ) {
+ $this->htmlCacheUpdater->purgeUrls( $urls );
+ }
+ }
+
+ private function getArchivedRevisionIds(): iterable {
+ // read archive rows from a replica, but only after it has caught up with the master
+ // (in theory we only need to wait for the master pos as of when the job was pushed,
+ // but DBMasterPos is an opaque object, so we can’t put it in the job params)
+ $this->lbFactory->waitForReplication( [
+ 'domain' => $this->lbFactory->getLocalDomainID(),
+ ] );
+ $dbr = $this->lbFactory->getMainLB()->getConnection( ILoadBalancer::DB_REPLICA );
+
+ $iterator = new BatchRowIterator( $dbr, 'archive', [ 'ar_id' ], $this->batchSize );
+ $iterator->setFetchColumns( [ 'ar_rev_id' ] );
+ $iterator->addConditions( [
+ 'ar_page_id' => $this->params['pageId'],
+ 'ar_title' => $this->params['title'],
+ 'ar_namespace' => $this->params['namespace'],
+ ] );
+ $iterator->setCaller( __METHOD__ );
+
+ foreach ( $iterator as $batch ) {
+ foreach ( $batch as $row ) {
+ yield (int)$row->ar_rev_id;
+ }
+ }
+ }
+
+}
diff --git a/repo/tests/phpunit/includes/Hooks/EntityDataPurgerTest.php b/repo/tests/phpunit/includes/Hooks/EntityDataPurgerTest.php
index a58766c619..e4b9f3ac6d 100644
--- a/repo/tests/phpunit/includes/Hooks/EntityDataPurgerTest.php
+++ b/repo/tests/phpunit/includes/Hooks/EntityDataPurgerTest.php
@@ -5,13 +5,17 @@
namespace Wikibase\Repo\Tests\Hooks;
use HtmlCacheUpdater;
+use IJobSpecification;
+use JobQueueGroup;
use PHPUnit\Framework\TestCase;
+use stdClass;
use Title;
use Wikibase\DataModel\Entity\ItemId;
use Wikibase\Lib\Store\EntityIdLookup;
use Wikibase\Repo\Hooks\EntityDataPurger;
use Wikibase\Repo\LinkedData\EntityDataRequestHandler;
use Wikibase\Repo\LinkedData\EntityDataUriManager;
+use WikiPage;
/**
* @covers \Wikibase\Repo\Hooks\EntityDataPurger
@@ -22,6 +26,20 @@
*/
class EntityDataPurgerTest extends TestCase {
+ private function mockJobQueueGroupFactory( JobQueueGroup $jobQueueGroup = null ): callable {
+ $factory = $this->getMockBuilder( stdClass::class )
+ ->addMethods( [ 'factory' ] )
+ ->getMock();
+ if ( $jobQueueGroup !== null ) {
+ $factory->method( 'factory' )
+ ->willReturn( $jobQueueGroup );
+ } else {
+ $factory->expects( $this->never() )
+ ->method( 'factory' );
+ }
+ return [ $factory, 'factory' ];
+ }
+
public function testGivenEntityIdLookupReturnsNull_handlerDoesNothing() {
$title = Title::newFromText( 'Project:About' );
$entityIdLookup = $this->createMock( EntityIdLookup::class );
@@ -35,7 +53,12 @@ public function testGivenEntityIdLookupReturnsNull_handlerDoesNothing() {
$htmlCacheUpdater = $this->createMock( HtmlCacheUpdater::class );
$htmlCacheUpdater->expects( $this->never() )
->method( 'purgeUrls' );
- $purger = new EntityDataPurger( $entityIdLookup, $entityDataUriManager, $htmlCacheUpdater );
+ $purger = new EntityDataPurger(
+ $entityIdLookup,
+ $entityDataUriManager,
+ $htmlCacheUpdater,
+ $this->mockJobQueueGroupFactory()
+ );
$purger->onArticleRevisionVisibilitySet( $title, [ 1, 2, 3 ], [] );
}
@@ -57,7 +80,12 @@ public function testGivenEntityIdLookupReturnsId_handlerPurgesCache() {
$htmlCacheUpdater->expects( $this->once() )
->method( 'purgeUrls' )
->with( [ 'urlA/Q1/1', 'urlB/Q1/1' ] );
- $purger = new EntityDataPurger( $entityIdLookup, $entityDataUriManager, $htmlCacheUpdater );
+ $purger = new EntityDataPurger(
+ $entityIdLookup,
+ $entityDataUriManager,
+ $htmlCacheUpdater,
+ $this->mockJobQueueGroupFactory()
+ );
$purger->onArticleRevisionVisibilitySet( $title, [ 1 ], [] );
}
@@ -91,8 +119,65 @@ public function testGivenMultipleRevisions_handlerPurgesCacheOnce() {
'urlA/Q1/2', 'urlB/Q1/2',
'urlA/Q1/3', 'urlB/Q1/3',
] );
- $purger = new EntityDataPurger( $entityIdLookup, $entityDataUriManager, $htmlCacheUpdater );
+ $purger = new EntityDataPurger(
+ $entityIdLookup,
+ $entityDataUriManager,
+ $htmlCacheUpdater,
+ $this->mockJobQueueGroupFactory()
+ );
$purger->onArticleRevisionVisibilitySet( $title, [ 1, 2, 3 ], [] );
}
+
+ public function testDeletionHandlerPushesJob() {
+ $title = Title::makeTitle( 0, 'Q123' );
+ $wikiPage = $this->createMock( WikiPage::class );
+ $wikiPage->method( 'getTitle' )
+ ->willReturn( $title );
+
+ $entityIdLookup = $this->createMock( EntityIdLookup::class );
+ $entityIdLookup->method( 'getEntityIdForTitle' )
+ ->with( $title )
+ ->willReturn( new ItemId( 'Q123' ) );
+ $entityDataUriManager = $this->createMock( EntityDataUriManager::class );
+ $entityDataUriManager->expects( $this->never() )
+ ->method( 'getPotentiallyCachedUrls' );
+ $htmlCacheUpdater = $this->createMock( HtmlCacheUpdater::class );
+ $htmlCacheUpdater->expects( $this->never() )
+ ->method( 'purgeUrls' );
+
+ $jobQueueGroup = $this->createMock( JobQueueGroup::class );
+ $jobQueueGroup->expects( $this->once() )
+ ->method( 'push' )
+ ->with( $this->callback( function ( IJobSpecification $specification ) {
+ $this->assertSame( 'PurgeEntityData', $specification->getType() );
+ $expectedParams = [
+ 'namespace' => 0,
+ 'title' => 'Q123',
+ 'pageId' => 123,
+ 'entityId' => 'Q123',
+ ];
+ $actualParams = $specification->getParams();
+ ksort( $expectedParams );
+ ksort( $actualParams );
+ $this->assertSame( $expectedParams, $actualParams );
+ return true;
+ } ) );
+
+ $purger = new EntityDataPurger(
+ $entityIdLookup,
+ $entityDataUriManager,
+ $htmlCacheUpdater,
+ $this->mockJobQueueGroupFactory( $jobQueueGroup )
+ );
+
+ $purger->onArticleDeleteComplete(
+ $wikiPage,
+ // unused
+ null, null,
+ 123,
+ // unused
+ null, null, null
+ );
+ }
}
diff --git a/repo/tests/phpunit/includes/PurgeEntityDataJobTest.php b/repo/tests/phpunit/includes/PurgeEntityDataJobTest.php
new file mode 100644
index 0000000000..9404618528
--- /dev/null
+++ b/repo/tests/phpunit/includes/PurgeEntityDataJobTest.php
@@ -0,0 +1,145 @@
+<?php
+
+declare( strict_types = 1 );
+
+namespace Wikibase\Repo\Tests;
+
+use HtmlCacheUpdater;
+use MediaWikiIntegrationTestCase;
+use Wikibase\DataModel\Entity\ItemId;
+use Wikibase\DataModel\Entity\ItemIdParser;
+use Wikibase\Repo\LinkedData\EntityDataUriManager;
+use Wikibase\Repo\PurgeEntityDataJob;
+use Wikimedia\Rdbms\LBFactorySingle;
+
+/**
+ * @covers \Wikibase\Repo\PurgeEntityDataJob
+ *
+ * @group Database
+ * @group Wikibase
+ *
+ * @license GPL-2.0-or-later
+ */
+class PurgeEntityDataJobTest extends MediaWikiIntegrationTestCase {
+
+ protected function setUp(): void {
+ parent::setUp();
+ $this->tablesUsed[] = 'archive';
+ }
+
+ public function addDBData() {
+ $this->db->truncate( 'archive', __METHOD__ ); // T265033
+ $this->db->insert( 'archive', [
+ // relevant rows: the corresponding URLs should be purged
+ [
+ 'ar_id' => 1,
+ 'ar_namespace' => 0,
+ 'ar_title' => 'Q123',
+ 'ar_page_id' => 123,
+ 'ar_rev_id' => 1234,
+ ],
+ [
+ 'ar_id' => 2,
+ 'ar_namespace' => 0,
+ 'ar_title' => 'Q123',
+ 'ar_page_id' => 123,
+ 'ar_rev_id' => 1235,
+ ],
+ // irrelevant row: wrong namespace
+ [
+ 'ar_id' => 3,
+ 'ar_namespace' => 1,
+ 'ar_title' => 'Q123',
+ 'ar_page_id' => 123,
+ 'ar_rev_id' => 1236,
+ ],
+ // irrelevant row: wrong title
+ [
+ 'ar_id' => 4,
+ 'ar_namespace' => 0,
+ 'ar_title' => 'Q124',
+ 'ar_page_id' => 123,
+ 'ar_rev_id' => 1237,
+ ],
+ // irrelevant row: wrong page ID
+ // (possible if the same namespace+title was deleted several times)
+ [
+ 'ar_id' => 5,
+ 'ar_namespace' => 1,
+ 'ar_title' => 'Q123',
+ 'ar_page_id' => 124,
+ 'ar_rev_id' => 1238,
+ ],
+ ], __METHOD__ );
+ }
+
+ public function testRun_purgesPotentiallyCachedUrls() {
+ $itemId = new ItemId( 'Q123' );
+
+ $entityDataUriManager = $this->createMock( EntityDataUriManager::class );
+ $entityDataUriManager->expects( $this->exactly( 3 ) )
+ ->method( 'getPotentiallyCachedUrls' )
+ ->withConsecutive(
+ [ $itemId ],
+ [ $itemId, 1234 ],
+ [ $itemId, 1235 ]
+ )
+ ->willReturnOnConsecutiveCalls(
+ [ '/Special:EntityData/Q123' ],
+ [ '/Special:EntityData/Q123?revision=1234' ],
+ [ '/Special:EntityData/Q123?revision=1235' ]
+ );
+
+ $htmlCacheUpdater = $this->createMock( HtmlCacheUpdater::class );
+ $htmlCacheUpdater->expects( $this->once() )
+ ->method( 'purgeUrls' )
+ ->with( [
+ '/Special:EntityData/Q123',
+ '/Special:EntityData/Q123?revision=1234',
+ '/Special:EntityData/Q123?revision=1235',
+ ] );
+
+ $job = new PurgeEntityDataJob(
+ new ItemIdParser(),
+ $entityDataUriManager,
+ new LBFactorySingle( [ 'connection' => $this->db ] ),
+ $htmlCacheUpdater,
+ 1,
+ [
+ 'namespace' => 0,
+ 'title' => 'Q123',
+ 'pageId' => 123,
+ 'entityId' => 'Q123',
+ ]
+ );
+
+ $job->run();
+ }
+
+ public function testRun_doesNotCallPurgeWithEmptyUrls() {
+ $entityDataUriManager = $this->createMock( EntityDataUriManager::class );
+ $entityDataUriManager->method( 'getPotentiallyCachedUrls' )
+ ->willReturn( [] );
+
+ $htmlCacheUpdater = $this->createMock( HtmlCacheUpdater::class );
+ $htmlCacheUpdater->expects( $this->never() )
+ ->method( 'purgeUrls' );
+
+ $job = new PurgeEntityDataJob(
+ new ItemIdParser(),
+ $entityDataUriManager,
+ new LBFactorySingle( [ 'connection' => $this->db ] ),
+ $htmlCacheUpdater,
+ 100,
+ [
+ 'namespace' => 0,
+ 'title' => 'Q456',
+ 'pageId' => 456,
+ 'entityId' => 'Q456',
+ ]
+ );
+
+ $job->run();
+ }
+
+}
--
2.27.0

File Metadata

Mime Type
text/x-diff
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
8791470
Default Alt Text
T260349-2-alt.patch (17 KB)

Event Timeline