diff --git a/src/MappingValidator/MappingValidator.php b/src/MappingValidator/MappingValidator.php
index b381f5c..2ff02f0 100644
--- a/src/MappingValidator/MappingValidator.php
+++ b/src/MappingValidator/MappingValidator.php
@@ -1,423 +1,492 @@
sparqlClient = new SparqlClient();
$this->cache = $cache;
}
/**
* @return ValidationMessage[]
*/
public function validate( string $vocabularyPrefix, string $vocabularyDefinitionUrl ): array {
$messages = [];
if ( !filter_var( $vocabularyPrefix, FILTER_VALIDATE_URL ) ) {
$messages += [
new ValidationMessage(
ValidationMessage::LEVEL_ERROR,
"{$vocabularyDefinitionUrl} is not a valid URL"
)
];
return $messages;
}
$triples = $this->loadTriples( $vocabularyDefinitionUrl );
$count = $triples->count();
if ( $count === 0 ) {
$messages += [
new ValidationMessage(
ValidationMessage::LEVEL_INFO,
"No machine readable description of the {$vocabularyPrefix} vocabulary found. " .
"Please set an URL to a RDF definition of the vocabulary " .
"in the 'Vocabulary RDF definition' parameter."
)
];
return $messages;
}
$messages += [
new ValidationMessage(
ValidationMessage::LEVEL_INFO,
"{$count} triple extracted from the definition file {$vocabularyDefinitionUrl}"
)
];
// We do checks for equivalent class
// We first build a WD => external mapping
// with error if there is a conflict and do some preliminary checks
$entityToClassMapping = [];
$classToEntityMapping = [];
foreach ( $this->pairsForWikidataProperties( [ 'P1709' ], $vocabularyPrefix ) as $mapping ) {
$uri = $mapping['uri'];
$entity = $mapping['entity'];
// We check that all equivalent classes have the rdfs:Class type
if ( !$this->isClass( $uri, $triples ) ) {
$messages[] = new ValidationMessage(
ValidationMessage::LEVEL_ERROR,
"{$this->formatLink($uri)} is mapped " .
"as equivalent class to {$this->formatLink($entity)} " .
"but is not an instance of rdfs:Class
or owl:Class
."
);
}
// We check that all equivalent classes are used on property entities
if ( strpos( $entity, 'http://www.wikidata.org/entity/Q' ) !== 0 ) {
$messages[] = new ValidationMessage(
ValidationMessage::LEVEL_ERROR,
"{$this->formatLink($uri)} not is mapped " .
"to a Wikidata item but to {$this->formatLink($entity)}."
);
}
// Multiple WD entities maps to the same thing
if ( array_key_exists( $entity, $entityToClassMapping ) ) {
$messages[] = new ValidationMessage(
ValidationMessage::LEVEL_WARNING,
"{$this->formatLink($entity)} is mapped as equivalent class to both " .
"{$this->formatLink($entityToClassMapping[$entity])} and {$this->formatLink($uri)}."
);
}
// The same WD entity maps to multiple things
if ( array_key_exists( $uri, $classToEntityMapping ) ) {
$messages[] = new ValidationMessage(
ValidationMessage::LEVEL_WARNING,
"{$this->formatLink($uri)} is mapped as equivalent class from both " .
"{$this->formatLink($classToEntityMapping[$uri])} and {$this->formatLink($entity)}."
);
}
$entityToClassMapping[$entity] = $uri;
$classToEntityMapping[$uri] = $entity;
}
// We check that, if an element is subclass of an other in Wikidata,
// it is sub class of in the ontology
// TODO: make it do more than 1 hop
$wdSubClassOfHierarchy = $this->wikidataDirectChildOf(
array_keys( $entityToClassMapping ),
'P279'
);
foreach ( $wdSubClassOfHierarchy as $map ) {
$sub = $map['sub'];
$super = $map['super'];
if ( !$this->isInSuperHierarchy(
$entityToClassMapping[$sub], $entityToClassMapping[$super],
self::RDFS_SUB_CLASS_OF, $triples
) ) {
$messages[] = new ValidationMessage(
ValidationMessage::LEVEL_WARNING,
"{$this->formatLink($sub)} is a sub class of {$this->formatLink($super)} but " .
"{$this->formatLink($entityToClassMapping[$sub])} is not a sub class of " .
"{$this->formatLink($entityToClassMapping[$super])}."
);
}
}
// We do checks for equivalent properties
// We first build a WD => external mapping
// with error if there is a conflict and do some preliminary checks
$entityToPropertyMapping = [];
$propertyToEntityMapping = [];
foreach ( $this->pairsForWikidataProperties( [ 'P1628' ], $vocabularyPrefix ) as $mapping ) {
$uri = $mapping['uri'];
$entity = $mapping['entity'];
$this->validateIsBetweenProperty( $entity, $uri, $messages, $triples );
// Multiple WD entities maps to the same thing
if ( array_key_exists( $entity, $entityToPropertyMapping ) ) {
$messages[] = new ValidationMessage(
ValidationMessage::LEVEL_WARNING,
"{$this->formatLink($entity)} is mapped as equivalent property to both " .
"{$this->formatLink($entityToPropertyMapping[$entity])} and {$this->formatLink($uri)}."
);
}
// The same WD entity maps to multiple things
if ( array_key_exists( $uri, $propertyToEntityMapping ) ) {
$messages[] = new ValidationMessage(
ValidationMessage::LEVEL_WARNING,
"{$this->formatLink($uri)} is mapped as equivalent property from both " .
"{$this->formatLink($propertyToEntityMapping[$uri])} and {$this->formatLink($entity)}."
);
}
$entityToPropertyMapping[$entity] = $uri;
$propertyToEntityMapping[$uri] = $entity;
}
// We check that, if an element is subproperty of an other in Wikidata,
// it is sub property of it in the ontology
// TODO: make it do more than 1 hop
$wdSubPropertyOfHierarchy = $this->wikidataDirectChildOf(
array_keys( $entityToPropertyMapping ),
'P1647'
);
foreach ( $wdSubPropertyOfHierarchy as $map ) {
$sub = $map['sub'];
$super = $map['super'];
if ( !$this->isInSuperHierarchy(
$entityToPropertyMapping[$sub], $entityToPropertyMapping[$super],
self::RDFS_SUB_PROPERTY_OF, $triples
) ) {
$messages[] = new ValidationMessage(
ValidationMessage::LEVEL_WARNING,
"{$this->formatLink($sub)} is a sub property of {$this->formatLink($super)} but " .
"{$this->formatLink($entityToPropertyMapping[$sub])} is not a sub property of " .
"{$this->formatLink($entityToPropertyMapping[$super])}."
);
}
}
// We do checks for sub/super properties
foreach ( $this->pairsForWikidataProperties(
[ 'P2235', 'P2236' ], $vocabularyPrefix
) as $mapping ) {
$this->validateIsBetweenProperty( $mapping['entity'], $mapping['uri'], $messages, $triples );
}
// We do checks for exact match properties
+ $entityToUriMatchMapping = [];
+ $uriToEntityMatchMapping = [];
foreach ( $this->pairsForWikidataProperties( [ 'P2888' ], $vocabularyPrefix ) as $mapping ) {
+ $uri = $mapping['uri'];
+ $entity = $mapping['entity'];
// We check that all target of property mappings are not classes or properties
- if ( $this->isProperty( $mapping['uri'], $triples ) ) {
+ if ( $this->isProperty( $uri, $triples ) ) {
$messages[] = new ValidationMessage(
ValidationMessage::LEVEL_WARNING,
- "{$this->formatLink($mapping['uri'])} is an exact match of " .
- "{$this->formatLink($mapping['entity'])} " .
+ "{$this->formatLink($uri)} is an exact match of " .
+ "{$this->formatLink($entity)} " .
"and is a property, you may want to use " .
"equivalent class (P1709) " .
"instead."
);
}
- if ( $this->isClass( $mapping['uri'], $triples ) ) {
+ if ( $this->isClass( $uri, $triples ) ) {
$messages[] = new ValidationMessage(
ValidationMessage::LEVEL_WARNING,
- "{$this->formatLink($mapping['uri'])} is an exact match of " .
- "{$this->formatLink($mapping['entity'])} " .
+ "{$this->formatLink($uri)} is an exact match of " .
+ "{$this->formatLink($entity)} " .
"and is a property, you may want to use " .
"equivalent property (P1628) " .
"instead."
);
}
// We check that all target of property mappings are used on item entities
if ( strpos( $mapping['entity'], 'http://www.wikidata.org/entity/Q' ) !== 0 ) {
$messages[] = new ValidationMessage(
ValidationMessage::LEVEL_ERROR,
"{$this->formatLink($mapping['uri'])} not is mapped " .
"to a Wikidata item but to {$this->formatLink($mapping['entity'])}."
);
}
+
+ // Multiple WD entities maps to the same thing
+ if ( array_key_exists( $entity, $entityToUriMatchMapping ) ) {
+ $messages[] = new ValidationMessage(
+ ValidationMessage::LEVEL_WARNING,
+ "{$this->formatLink($entity)} is mapped as exact match to both " .
+ "{$this->formatLink($entityToUriMatchMapping[$entity])} and {$this->formatLink($uri)}."
+ );
+ }
+ if (
+ array_key_exists( $entity, $entityToPropertyMapping ) &&
+ $entityToPropertyMapping[$entity] !== $uri
+ ) {
+ $messages[] = new ValidationMessage(
+ ValidationMessage::LEVEL_WARNING,
+ "{$this->formatLink($entity)} is mapped as exact match to " .
+ "{$this->formatLink($uri)} " .
+ "and as equivalent property to {$this->formatLink($entityToPropertyMapping[$entity])}."
+ );
+ }
+ if (
+ array_key_exists( $entity, $entityToClassMapping ) &&
+ $entityToClassMapping[$entity] !== $uri
+ ) {
+ $messages[] = new ValidationMessage(
+ ValidationMessage::LEVEL_WARNING,
+ "{$this->formatLink($entity)} is mapped as exact match to " .
+ "{$this->formatLink($uri)} " .
+ "and as equivalent class to {$this->formatLink($entityToClassMapping[$entity])}."
+ );
+ }
+
+ // The same WD entity maps to multiple things
+ if ( array_key_exists( $uri, $uriToEntityMatchMapping ) ) {
+ $messages[] = new ValidationMessage(
+ ValidationMessage::LEVEL_WARNING,
+ "{$this->formatLink($entity)} is mapped as exact match from both " .
+ "{$this->formatLink($uriToEntityMatchMapping[$uri])} and {$this->formatLink($entity)}."
+ );
+ }
+ if (
+ array_key_exists( $uri, $propertyToEntityMapping ) &&
+ $propertyToEntityMapping[$uri] !== $entity
+ ) {
+ $messages[] = new ValidationMessage(
+ ValidationMessage::LEVEL_WARNING,
+ "{$this->formatLink($uri)} is mapped as exact match from " .
+ "{$this->formatLink($entity)} " .
+ "and as equivalent property from {$this->formatLink($propertyToEntityMapping[$uri])}."
+ );
+ }
+ if (
+ array_key_exists( $uri, $classToEntityMapping ) &&
+ $classToEntityMapping[$uri] !== $entity
+ ) {
+ $messages[] = new ValidationMessage(
+ ValidationMessage::LEVEL_WARNING,
+ "{$this->formatLink($uri)} is mapped as exact match from " .
+ "{$this->formatLink($entity)} " .
+ "and as equivalent class from {$this->formatLink($classToEntityMapping[$uri])}."
+ );
+ }
+
+ $entityToUriMatchMapping[$entity] = $uri;
+ $uriToEntityMatchMapping[$uri] = $entity;
}
if ( !$this->withErrorOrWarning( $messages ) ) {
$messages[] = new ValidationMessage(
ValidationMessage::LEVEL_INFO,
"Mapping have been validated without any error or warning found"
);
}
$this->loadLabels();
return $this->formatMessages( $messages );
}
private function loadTriples( $documentUri ) {
$key = 'tptools-mapping-' . md5( $documentUri );
$triples = $this->cache->get( $key );
if ( $triples === null ) {
$triples = $this->doLoadTriples( $documentUri );
$this->cache->set( $key, $triples, 60 * 60 * 24 );
}
return new SimpleTripleStore( $triples );
}
private function doLoadTriples( $documentUri ) {
if ( strpos( $documentUri, 'http://' ) === 0 ) {
// Hack to avoid compatibility problems between ARC2 and HSTS
$documentUris = [ str_replace( 'http://', 'https://', $documentUri ), $documentUri ];
} else {
$documentUris = [ $documentUri ];
}
foreach ( $documentUris as $documentUri ) {
$parser = ARC2::getRDFParser();
$parser->parse( $documentUri );
$triples = $parser->getTriples();
if ( !empty( $triples ) ) {
return $triples;
}
}
return [];
}
private function validateIsBetweenProperty(
$entity, $uri, &$messages, SimpleTripleStore $triples
) {
// We check that all target of property mappings have the rdf:Property type
if ( !$this->isProperty( $uri, $triples ) ) {
$messages[] = new ValidationMessage(
ValidationMessage::LEVEL_ERROR,
"{$this->formatLink($uri)} is mapped to " .
"{$this->formatLink($entity)} " .
"but is not an instance of rdf:Property
, " .
"owl:DatatypeProperty
or owl:ObjectProperty
."
);
}
// We check that all target of property mappings are used on property entities
if ( strpos( $entity, 'http://www.wikidata.org/entity/P' ) !== 0 ) {
$messages[] = new ValidationMessage(
ValidationMessage::LEVEL_ERROR,
"{$this->formatLink($uri)} not is mapped " .
"to a Wikidata property but to {$this->formatLink($entity)}."
);
}
}
private function pairsForWikidataProperties( $propertyIds, $vocabularyPrefix ) {
$propertyPath = implode( '|', array_map( function ( $propertyId ) {
return 'wdt:' . $propertyId;
}, $propertyIds ) );
return $this->sparqlClient->getTuples(
'SELECT DISTINCT ?entity ?uri WHERE {' .
' ?entity (' . $propertyPath . ') ?uri .' .
' FILTER(STRSTARTS(STR(?uri), "' . $vocabularyPrefix . '"))' .
'}'
);
}
private function wikidataDirectChildOf( $itemUris, $propertyId ) {
$values = implode( ' ', array_map( function ( $uri ) {
return '<' . $uri . '>';
}, $itemUris ) );
return $this->sparqlClient->getTuples(
'SELECT DISTINCT ?sub ?super WHERE {' .
' VALUES ?sub { ' . $values . '}' .
' VALUES ?super { ' . $values . '}' .
' ?sub wdt:' . $propertyId . ' ?super .' .
'}'
);
}
private function isInSuperHierarchy(
$start, $end, $relation,
SimpleTripleStore $triples, &$seen = []
) {
if ( $start === $end ) {
return true;
}
if ( in_array( $start, $seen ) ) {
return false;
}
$seen[] = $start;
foreach ( $triples->objects( $start, $relation ) as $parentStart ) {
if ( $this->isInSuperHierarchy( $parentStart, $end, $relation, $triples, $seen ) ) {
return true;
}
}
return false;
}
private function formatLink( $uri ) {
if ( strpos( $uri, 'http://www.wikidata.org/entity/' ) === 0 ) {
$id = str_replace( 'http://www.wikidata.org/entity/', '', $uri );
$this->entitiesWithoutLabels[$id] = $id;
return "$$id$";
}
return "{$uri}";
}
/**
* @param ValidationMessage[] $messages
* @return bool
*/
private function withErrorOrWarning( $messages ) {
foreach ( $messages as $message ) {
switch ( $message->getLevel() ) {
case ValidationMessage::LEVEL_ERROR:
case ValidationMessage::LEVEL_WARNING:
return true;
}
}
return false;
}
private function isClass( $uri, SimpleTripleStore $triples ) {
return $triples->contains( $uri, self::RDF_TYPE, self::RDFS_CLASS ) ||
$triples->contains( $uri, self::RDF_TYPE, self::OWL_CLASS );
}
private function isProperty( $uri, SimpleTripleStore $triples ) {
return $triples->contains( $uri, self::RDF_TYPE, self::RDF_PROPERTY ) ||
$triples->contains( $uri, self::RDF_TYPE, self::OWL_DATATYPE_PROPERTY ) ||
$triples->contains( $uri, self::RDF_TYPE, self::OWL_OBJECT_PROPERTY );
}
private function formatMessages( array $messages ) {
return array_map( function ( ValidationMessage $message ) {
return new ValidationMessage(
$message->getLevel(),
preg_replace_callback( '/\$([PQ]\d+)\$/', function ( $args ) {
$entityId = $args[1];
if ( array_key_exists( $entityId, $this->entityLabels ) ) {
return "
{$this->entityLabels[$entityId]} ({$entityId})
";
}
return $args[1];
}, $message->getMessage() )
);
}, $messages );
}
private function loadLabels() {
$entitiesToGet = [];
foreach ( $this->entitiesWithoutLabels as $entityId ) {
$label = $this->cache->get( 'tptools-entity-label-en-' . $entityId );
if ( $label !== null ) {
$this->entityLabels[$entityId] = $label;
} else {
$entitiesToGet[] = $entityId;
}
}
if ( !empty( $entitiesToGet ) ) {
$values = implode( ' ', array_map( function ( $id ) {
return 'wd:' . $id;
}, $entitiesToGet ) );
foreach ( $this->sparqlClient->getTuples(
'SELECT ?entity ?entityLabel WHERE {' .
' VALUES ?entity { ' . $values . ' }' .
' SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }' .
'}'
) as $tuple ) {
$entityId = str_replace( 'http://www.wikidata.org/entity/', '', $tuple['entity'] );
$label = $tuple['entityLabel'];
$this->cache->set( 'tptools-entity-label-en-' . $entityId, $label, 60 * 60 * 24 * 30 );
$this->entityLabels[$entityId] = $label;
}
}
}
}