Page MenuHomePhabricator
Paste P18740

(An Untitled Masterwork)
ActivePublic

Authored by Legoktm on Jan 15 2022, 1:40 AM.
Tags
None
Referenced Files
F34918765: raw-paste-data.txt
Jan 15 2022, 2:42 AM
F34918758: raw-paste-data.txt
Jan 15 2022, 2:24 AM
F34918755: raw-paste-data.txt
Jan 15 2022, 2:15 AM
F34918754: raw-paste-data.txt
Jan 15 2022, 2:14 AM
F34918744: raw-paste-data.txt
Jan 15 2022, 1:40 AM
Subscribers
<?php
/**
* Delete links table entries which refer to a nonexisting page.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Maintenance
* @author Rob Church <robchur@gmail.com>
* @author Kunal Mehta <legoktm@debian.org>
*/
require_once getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php';
use MediaWiki\MediaWikiServices;
/**
* Maintenance script that deletes revisions which refer to a nonexisting page.
*
* @ingroup Maintenance
*/
class PurgeOrphanedLinks extends Maintenance {
public function __construct() {
parent::__construct();
$this->addDescription(
'Maintenance script to delete links entries which refer to a nonexisting page' );
$this->addOption( 'dry-run', 'Dry-run' );
$this->setBatchSize( 500 );
}
public function execute() {
$dbw = $this->getDB( DB_PRIMARY );
$last = 0;
do {
list( $page, $links ) = $dbw->tableNamesN( 'page', 'categorylinks' );
$sql = "SELECT cl_from FROM {$links} LEFT JOIN {$page} ON cl_from = page_id "
. "WHERE page_id IS NULL AND cl_from >= {$last} LIMIT {$this->mBatchSize}";
# Find all the orphaned links
$res = $dbw->query( $sql, 'purgeOrphanedLinks' );
# Stash 'em all up for deletion (if needed)
$toDelete = [];
foreach ( $res as $row ) {
$toDelete[] = $row->cl_from;
$last = (int)$row->cl_from;
}
if ( !$toDelete ) {
break;
}
$count = count( $toDelete );
$this->output( "deleting {$count}..." );
if ( !$this->hasOption('dry-run' ) ) {
$dbw->delete( 'categorylinks', [ 'cl_from' => array_unique( $toDelete ) ], __METHOD__ );
} else {
var_dump($count);
}
$this->output( "done. Sleeping\n" );
MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
} while ( $dbw->affectedRows() );
$this->output("Done done!\n" );
}
}
$maintClass = PurgeOrphanedLinks::class;
require_once RUN_MAINTENANCE_IF_MAIN;

Event Timeline

Should the delete be in the same batch or its own loop? Should we add a --dry-run option to just print out what'll get deleted?

Otherwise LGTM if you get $id set from the loop.

Legoktm edited the content of this paste. (Show Details)