The Wikibase-Quality extensions define a couple of database tables that need review before deployment. The table definitions can be found here (linking the v1 branch designated for deployment):
* https://git.wikimedia.org/tree/mediawiki%2Fextensions%2FWikibaseQuality/v1/sql%2Fcreate_wbq_evaluation.sql
* https://git.wikimedia.org/tree/mediawiki%2Fextensions%2FWikibaseQuality/v1/sql%2Fcreate_wbq_violations.sql
* https://git.wikimedia.org/tree/mediawiki%2Fextensions%2FWikibaseQualityExternalValidation/v1/sql%2Fcreate_wbqev_dump_information.sql
* https://git.wikimedia.org/tree/mediawiki%2Fextensions%2FWikibaseQualityExternalValidation/v1/sql%2Fcreate_wbqev_external_data.sql
* https://git.wikimedia.org/tree/mediawiki%2Fextensions%2FWikibaseQualityExternalValidation/v1/sql%2Fcreate_wbqev_identifier_properties.sql
* https://git.wikimedia.org/blob/mediawiki%2Fextensions%2FWikibaseQualityConstraints/v1/sql%2Fcreate_wbqc_constraints.sql
Code running queries against these can be found here:
* WikidataQuality: includes/Violations/SqlViolationRepo.php
* WikidataQualityConstraints: includes/ConstraintRepository.php
* WikidataQualityExternalValidation: includes/ExternalDataRepo.php
* WikidataQualityExternalValidation: includes/DumpMetaInformation/DumpMetaInformationRepo.php
Quick overview of definitions and queries in V1:
ConstraintRepository / wbqc_constraints
=======================
CREATE TABLE IF NOT EXISTS /*_*/wbqc_constraints (
constraint_guid VARCHAR(255) PRIMARY KEY,
pid INT(11) NOT NULL,
constraint_type_qid VARCHAR(255) NOT NULL,
constraint_parameters TEXT DEFAULT NULL
) /*$wgDBTableOptions*/;
CREATE INDEX /*i*/wbqc_constraints_pid_index
ON /*_*/wbqc_constraints (pid);
CREATE INDEX /*i*/wbqc_constraints_constraint_type_qid_index
ON /*_*/wbqc_constraints (constraint_type_qid);
Queries:
SELECT * FROM wbqc_constraints WHERE pid = "xxxxxxxxx";
INSERT INTO wbqc_constraints VALUES ....; -- batch size in UpdateTable defaults to 1000
DELETE FROM wbqc_constraints LIMIT 1000; -- looped until the table is empty; MySQL only.
insertBatch() does a `$db->commit( __METHOD__, "flush" )`. That's probably not a good idea. UpdateTables could do that.
ExternalDataRepo / wbqev_external_data
==================
CREATE TABLE IF NOT EXISTS /*_*/wbqev_external_data (
row_id BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT,
dump_id VARBINARY(25) NOT NULL,
external_id VARBINARY(100) NOT NULL,
pid VARBINARY(15) NOT NULL,
external_value VARBINARY(255) NOT NULL
) /*$wgDBTableOptions*/;
CREATE INDEX /*i*/dump_id_external_id_pid ON /*_*/wbqev_external_data (dump_id, external_id, pid);
Queries:
SELECT ( dump_id, external_id, pid, external_value )
FROM wbqev_external_data
WHERE dump_id IN ( ... )
AND external_id IN ( ... )
AND pid IN ( ... );
INSERT INTO wbqev_external_data VALUES ....; -- batch size in UpdateTable defaults to 1000
DELETE FROM wbqev_external_data WHERE dumpId IN ( ... ) LIMIT 1000; -- looped until the table is empty; MySQL only.
insertBatch() does a `$db->commit( __METHOD__, "flush" )`. That's probably not a good idea. UpdateTables could do that.
DumpMetaInformationRepo / wbqev_dump_information / wbqev_dump_information
==============
CREATE TABLE IF NOT EXISTS /*_*/wbqev_dump_information (
dump_id VARBINARY(25) PRIMARY KEY NOT NULL,
source_qid VARBINARY(15) NOT NULL,
import_date VARBINARY(25) NOT NULL,
language VARBINARY(10) NOT NULL,
source_url VARBINARY(300) UNIQUE NOT NULL,
size INT UNSIGNED NOT NULL,
license_qid VARBINARY(15) NOT NULL
) /*$wgDBTableOptions*/;
CREATE TABLE IF NOT EXISTS /*_*/wbqev_identifier_properties (
identifier_pid VARBINARY(15) NOT NULL,
dump_id VARBINARY(25) NOT NULL,
PRIMARY KEY (identifier_pid, dump_id)
) /*$wgDBTableOptions*/;
Queries:
SELECT * FROM wbqev_dump_information
LEFT JOIN wbqev_identifier_properties
ON wbqev_dump_information.dumpId = wbqev_identifier_properties.dump_id;
SELECT * FROM wbqev_dump_information
LEFT JOIN wbqev_identifier_properties
ON wbqev_dump_information.dumpId = wbqev_identifier_properties.dump_id
WHERE wbqev_dump_information.dumpId IN ( ... );
SELECT dump_id FROM wbqev_identifier_properties
WHERE identifier_pid IN ( ... );
SELECT DISTINCT source_qid FROM wbqev_dump_information;
INSERT INTO wbqev_dump_information ( ... );
UPDATE wbqev_dump_information SET ...;
DELETE FROM wbqev_identifier_properties WHERE dumpId = '...';
INSERT INTO wbqev_identifier_properties VALUES ...; -- no batch limit in the code, but maybe 10 in practice, max 2000 (number of properites on wikidata)
wbq_violations
=================
CREATE TABLE IF NOT EXISTS /*_*/wbq_violations (
entity_id VARBINARY(15) NOT NULL,
pid VARBINARY(15) NOT NULL,
claim_guid VARBINARY(63) NOT NULL,
constraint_id VARBINARY(63) NOT NULL,
constraint_type_entity_id VARBINARY(15) NOT NULL,
additional_info TEXT DEFAULT NULL,
updated_at VARBINARY(31) NOT NULL,
revision_id INT(10) UNSIGNED NOT NULL,
status VARBINARY(31) NOT NULL,
PRIMARY KEY (claim_guid, constraint_id)
) /*$wgDBTableOptions*/;
CREATE INDEX /*i*/claim_guid ON /*_*/wbq_violations (claim_guid);
CREATE INDEX /*i*/constraint_id ON /*_*/wbq_violations (constraint_id);
Unused in v1.
wbq_evaluation
=================
CREATE TABLE IF NOT EXISTS /*_*/wbq_evaluation (
special_page_id int NOT NULL,
entity_id VARCHAR(15) NOT NULL,
insertion_timestamp int NOT NULL,
reference_timestamp int DEFAULT NULL,
result_string TEXT DEFAULT NULL
) /*$wgDBTableOptions*/;
No index defined at all! Unused??