Page MenuHomePhabricator

Create Maintenance Script to Remove Terms in a Language from Current Revision
Closed, ResolvedPublic5 Estimated Story Points

Description

In order to efficiently clean up removed languages we should have a maintenance script that can remove the Terms from the current revision of the Entity.

Acceptance Criteria

  • It should take the language and entity ids to be removed as a parameter
  • After running the given entities has no terms in that language in their current revision
  • It should not change history (I.e. remove that language from old revisions)
  • It should leave a sensible revision comment for the change

Hints
For a maintenance script that makes changes to entities you could look at extensions/Wikibase/repo/maintenance/changePropertyDataType.php

Event Timeline

I wrote a basic maintaince script and ran it in production and worked fine:

compactTermsRemover.php
<?php

namespace Wikibase;

use InvalidArgumentException;
use Maintenance;
use Wikibase\DataModel\Term\AliasesProvider;
use Wikibase\DataModel\Term\DescriptionsProvider;
use Wikibase\DataModel\Term\LabelsProvider;
use Wikibase\Lib\Store\EntityRevisionLookup;
use Wikibase\Lib\Store\StorageException;
use Wikibase\Repo\WikibaseRepo;
use User;

$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../../..';
require_once $basePath . '/maintenance/Maintenance.php';

/**
 * @license GPL-2.0-or-later
 */
class RemoveTermsInLanguages extends Maintenance {

	public function __construct() {
		parent::__construct();
		$this->addDescription( "Removes terms in the given language in the given entities.");

		$this->addOption( 'entity-id', 'Id of the entity', true, true );
		$this->addOption( 'language', 'Language to remove', true, true );
	}

	public function execute() {
		if ( !WikibaseSettings::isRepoEnabled() ) {
			$this->error( "You need to have Wikibase enabled in order to use this maintenance script!\n", 1 );
		}

		$repo = WikibaseRepo::getDefaultInstance();

		$idSerialization = $this->getOption( 'entity-id' );
		$language = $this->getOption( 'language' );
		try {
			$entityId = $repo->getEntityIdParser()->parse( $idSerialization );
		} catch ( InvalidArgumentException $e ) {
			$this->error( "Invalid property id: " . $idSerialization, 1 );
		}

		$entityRevisionLookup = $repo->getEntityRevisionLookup();
		$entityStore = $repo->getEntityStore();

		$entityRevision = $entityRevisionLookup->getEntityRevision(
			$entityId,
			0,
			EntityRevisionLookup::LATEST_FROM_MASTER
		);

		if ( $entityRevision === null ) {
			throw new StorageException( "Could not load entity: " . $entityId->getSerialization() );
		}

		$entity = $entityRevision->getEntity();
		if ( $entity instanceof LabelsProvider ) {
			$entity->getLabels()->removeByLanguage( $language );
		}

		if ( $entity instanceof DescriptionsProvider ) {
			$entity->getDescriptions()->removeByLanguage( $language );
		}

		if ( $entity instanceof AliasesProvider ) {
			$entity->getAliasGroups()->removeByLanguage( $language );
		}

		$entityStore->saveEntity(
			$entity,
			'Removed terms in language ' . $language,
			User::newFromName( 'Maintenance script' ),
			EDIT_UPDATE,
			$entityRevision->getRevisionId()
		);

		$this->output( "Successfully removed terms in language $language from $idSerialization.\n" );
	}

}

$maintClass = RemoveTermsInLanguages::class;
require_once RUN_MAINTENANCE_IF_MAIN;
Addshore moved this task from Incoming to Ready to estimate on the Wikidata-Campsite board.

Can we get this added to wikibase.git? :)

Change 527111 had a related patch set uploaded (by Rosalie Perside (WMDE); owner: Rosalie Perside (WMDE)):
[mediawiki/extensions/Wikibase@master] Add maintenance script to remove Terms in a language from current revision

https://gerrit.wikimedia.org/r/527111

Change 529062 had a related patch set uploaded (by Ladsgroup; owner: Ladsgroup):
[mediawiki/extensions/Wikibase@master] Move RemoveTermsInLanguage.php to removeTermsInLanguage.php

https://gerrit.wikimedia.org/r/529062

Change 527111 merged by jenkins-bot:
[mediawiki/extensions/Wikibase@master] Add maintenance script to remove Terms in a language from current revision

https://gerrit.wikimedia.org/r/527111

Change 529062 merged by jenkins-bot:
[mediawiki/extensions/Wikibase@master] Move RemoveTermsInLanguage.php to removeTermsInLanguage.php

https://gerrit.wikimedia.org/r/529062