diff --git a/README.md b/README.md index edd0618..5dbdcd2 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,23 @@ # Purtle **Purtle** is a fast, lightweight RDF generator. It provides a "fluent" interface for generating RDF output in Turtle, JSON-LD, XML/RDF or N-Triples. The fluent interface allows the resulting PHP code to be structured just like Turtle notation for RDF, hence the name: "Purtle" is a contraction of "PHP Turtle". The concrete classes implementing the common `RdfWriter` interface are: * `TurtleRdfWriter` outputs Turtle +* `JsonLdRdfWriter` outputs JSON-LD * `XmlRdfWriter` outputs XML/RDF * `NTriplesRdfWriter` outputs N-Triples -* `JsonLdRdfWriter` outputs JSON-LD The PHP code would look something like this: $writer = new TurtleRdfWriter(); $writer->prefix( 'acme', 'http://acme.test/terms/' ); $writer->about( 'http://quux.test/Something' ) ->a( 'acme', 'Thing' ) ->say( 'acme', 'name' )->text( 'Thingy' )->text( 'Dingsda', 'de' ) ->say( 'acme', 'owner' )->is( 'http://quux.test/' ); diff --git a/composer.json b/composer.json index d714be3..4291bb6 100644 --- a/composer.json +++ b/composer.json @@ -1,68 +1,68 @@ { "name": "wikimedia/purtle", "type": "library", "description": "Fast streaming RDF serializer", "keywords": [ + "JSON-LD", "RDF", "Serializer", - "Turtle", - "JSON-LD" + "Turtle" ], "homepage": "https://mediawiki.org/wiki/Purtle", "license": "GPL-2.0+", "authors": [ { "name": "Daniel Kinzler" }, { "name": "Stanislav Malyshev" }, { "name": "Thiemo Mättig" }, { "name": "C. Scott Ananian" } ], "support": { "irc": "irc://irc.freenode.net/wikimedia-dev" }, "require": { "php": ">=5.5.9" }, "require-dev": { "mediawiki/mediawiki-codesniffer": "0.12.0", "ockcyp/covers-validator": "~0.4.0", "phpunit/phpunit": "4.8.24", "phpmd/phpmd": "~2.3" }, "autoload": { "psr-4": { "Wikimedia\\Purtle\\": "src/", "Wikimedia\\Purtle\\Tests\\": "tests/phpunit/" } }, "extra": { "branch-alias": { "dev-master": "1.0.x-dev" } }, "scripts": { "test": [ "composer validate --no-interaction", "phpunit", "covers-validator" ], "cs": [ "phpcs -p -s", "phpmd src/ text phpmd.xml" ], "ci": [ "@cs", "@test" ], "fix": [ "phpcbf" ] } } diff --git a/src/JsonLdRdfWriter.php b/src/JsonLdRdfWriter.php index 2366a98..48dde9c 100644 --- a/src/JsonLdRdfWriter.php +++ b/src/JsonLdRdfWriter.php @@ -1,368 +1,375 @@ graph to null in + * #finishJson() to ensure that the deferred callback in #finishDocument() + * doesn't later emit "@graph". + * * @see https://www.w3.org/TR/json-ld/#named-graphs + * * @var array[]|null */ private $graph = []; /** * A collection of predicates about a specific subject. The * subject is identified by the "@id" key in this array; the other * keys identify JSON-LD properties. + * * @see https://www.w3.org/TR/json-ld/#dfn-edge + * * @var array */ private $predicates = []; /** * A sequence of zero or more IRIs, nodes, or values, which are the * destination targets of the current predicates. + * * @see https://www.w3.org/TR/json-ld/#dfn-list + * * @var array */ private $values = []; /** * True iff we have written the opening of the "@graph" field. + * * @var bool */ private $wroteGraph = false; /** * JSON-LD objects describing a single node can omit the "@graph" field; * this variable remains false only so long as we can guarantee that * only a single node has been described. + * * @var bool */ private $disableGraphOpt = false; /** * The IRI for the RDF `type` property. */ const RDF_TYPE_IRI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'; /** * @param string $role * @param BNodeLabeler|null $labeler */ public function __construct( $role = parent::DOCUMENT_ROLE, BNodeLabeler $labeler = null ) { parent::__construct( $role, $labeler ); // The following named methods are protected, not private, so we // can invoke them directly w/o function wrappers. $this->transitionTable[self::STATE_START][self::STATE_DOCUMENT] = [ $this, 'beginJson' ]; $this->transitionTable[self::STATE_DOCUMENT][self::STATE_FINISH] = [ $this, 'finishJson' ]; $this->transitionTable[self::STATE_OBJECT][self::STATE_PREDICATE] = [ $this, 'finishPredicate' ]; $this->transitionTable[self::STATE_OBJECT][self::STATE_SUBJECT] = [ $this, 'finishSubject' ]; $this->transitionTable[self::STATE_OBJECT][self::STATE_DOCUMENT] = [ $this, 'finishDocument' ]; } /** * Emit $val as JSON, with $indent extra indentations on each line. * @param array $val * @param int $indent * @return string the JSON string for $val */ public function encode( $val, $indent ) { $str = json_encode( $val, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES ); // Strip outermost open/close braces/brackets - $str = preg_replace( '/(^[\[{]\n?)|(\n?[}\]]$)/', '', $str ); - // add extra indentation - $str = preg_replace( '/^/m', str_repeat( " ", $indent ), $str ); + $str = preg_replace( '/^[[{]\n?|\n?[}\]]$/', '', $str ); + + if ( $indent > 0 ) { + // add extra indentation + $str = preg_replace( '/^/m', str_repeat( ' ', $indent ), $str ); + } return $str; } /** * Return a "compact IRI" corresponding to the given base/local pair. * This adds entries to the "@context" key when needed to allow use * of a given prefix. * @see https://www.w3.org/TR/json-ld/#dfn-compact-iri * * @param string $base A QName prefix if $local is given, or an IRI if $local is null. * @param string|null $local A QName suffix, or null if $base is an IRI. * * @return string A compact IRI. */ private function compactify( $base, $local = null ) { $this->expandShorthand( $base, $local ); if ( $local === null ) { return $base; } else { if ( $base !== '_' && isset( $this->prefixes[ $base ] ) ) { if ( $base === '' ) { // Empty prefix not supported; use full IRI return $this->prefixes[ $base ] . $local; } $this->context[ $base ] = $this->prefixes[ $base ]; } return $base . ':' . $local; } } /** * Return an absolute IRI from the given base/local pair. * @see https://www.w3.org/TR/json-ld/#dfn-absolute-iri * * @param string $base A QName prefix if $local is given, or an IRI if $local is null. * @param string|null $local A QName suffix, or null if $base is an IRI. * * @return string|null An absolute IRI, or null if it cannot be constructed. */ private function toIRI( $base, $local ) { $this->expandShorthand( $base, $local ); $this->expandQName( $base, $local ); if ( $local !== null ) { throw new LogicException( 'Unknown prefix: ' . $base ); } return $base; } /** * Write document header. */ protected function beginJson() { if ( $this->role === self::DOCUMENT_ROLE ) { $this->write( "{\n" ); $this->write( function () { // If this buffer is drained early, disable @graph optimization $this->disableGraphOpt = true; return ''; } ); } } /** * Write document footer. */ protected function finishJson() { // If we haven't drained yet, and @graph has only 1 element, then we // can optimize our output and hoist the single node to top level. if ( $this->role === self::DOCUMENT_ROLE ) { if ( ( !$this->disableGraphOpt ) && count( $this->graph ) === 1 ) { $this->write( $this->encode( $this->graph[0], 0 ) ); $this->graph = null; // We're done with @graph. } else { $this->disableGraphOpt = true; $this->write( "\n ]" ); } } + if ( count( $this->context ) ) { // Write @context field. $this->write( ",\n" ); $this->write( $this->encode( [ "@context" => $this->context ], 0 ) ); } - $this->write( "\n" ); - $this->write( "}" ); + + $this->write( "\n}" ); } protected function finishDocument() { $this->finishSubject(); $this->write( function () { // if this is drained before finishJson(), then disable // the graph optimization and dump what we've got so far. $str = ''; if ( $this->graph !== null && count( $this->graph ) > 0 ) { $this->disableGraphOpt = true; if ( $this->role === self::DOCUMENT_ROLE && !$this->wroteGraph ) { $str .= " \"@graph\": [\n"; $this->wroteGraph = true; } else { $str .= ",\n"; } $str .= $this->encode( $this->graph, 1 ); $this->graph = []; return $str; } // Delay; maybe we'll be able to optimize this later. return $str; } ); } /** * @param string $base * @param string|null $local */ protected function writeSubject( $base, $local = null ) { $this->predicates = [ "@id" => $this->compactify( $base, $local ) ]; } protected function finishSubject() { $this->finishPredicate(); $this->graph[] = $this->predicates; } /** * @param string $base * @param string|null $local */ protected function writePredicate( $base, $local = null ) { // no op } /** * @param string $base * @param string|null $local */ protected function writeResource( $base, $local = null ) { $this->values[] = [ "@id" => $this->compactify( $base, $local ) ]; } /** * @param string $text * @param string|null $language */ protected function writeText( $text, $language = null ) { - if ( - $language === null || - !$this->isValidLanguageCode( $language ) - ) { + if ( !$this->isValidLanguageCode( $language ) ) { $this->values[] = $text; } else { $this->values[] = [ "@language" => $language, "@value" => $text ]; } } /** * @param string $literal * @param string|null $typeBase * @param string|null $typeLocal */ public function writeValue( $literal, $typeBase, $typeLocal = null ) { if ( $typeBase === null && $typeLocal === null ) { $this->values[] = $literal; - } elseif ( $typeLocal === null ) { - throw new InvalidArgumentException( "Got IRI: $typeBase" ); - } else { - $typeIRI = $this->toIRI( $typeBase, $typeLocal ); - if ( $typeIRI === 'http://www.w3.org/2001/XMLSchema#string' ) { + return; + } + + switch ( $this->toIRI( $typeBase, $typeLocal ) ) { + case 'http://www.w3.org/2001/XMLSchema#string': $this->values[] = strval( $literal ); return; - } - if ( $typeIRI === 'http://www.w3.org/2001/XMLSchema#integer' ) { + case 'http://www.w3.org/2001/XMLSchema#integer': $this->values[] = intval( $literal ); return; - } - if ( $typeIRI === 'http://www.w3.org/2001/XMLSchema#boolean' ) { - $this->values[] = ( $literal === 'true' ); + case 'http://www.w3.org/2001/XMLSchema#boolean': + $this->values[] = $literal === 'true'; return; - } - if ( $typeIRI === 'http://www.w3.org/2001/XMLSchema#double' ) { + case 'http://www.w3.org/2001/XMLSchema#double': $v = doubleval( $literal ); // Only "numbers with fractions" are xsd:double. We need // to verify that the JSON string will contain a decimal // point, otherwise the value would be interpreted as an // xsd:integer. // TODO: consider instead using JSON_PRESERVE_ZERO_FRACTION // in $this->encode() once our required PHP >= 5.6.6. if ( strpos( json_encode( $v ), '.' ) !== false ) { $this->values[] = $v; return; } - } - $this->values[] = [ - "@type" => $this->compactify( $typeBase, $typeLocal ), - "@value" => strval( $literal ) - ]; } + + $this->values[] = [ + "@type" => $this->compactify( $typeBase, $typeLocal ), + "@value" => strval( $literal ) + ]; } protected function finishPredicate() { list( $base, $local ) = $this->currentPredicate; $predIRI = $this->toIRI( $base, $local ); if ( $predIRI === self::RDF_TYPE_IRI ) { // TODO: the context can optionally specify other predicates // have type "@id" or "@vocab", which would trigger this // same coercion. See https://www.w3.org/TR/json-ld/#iris $name = "@type"; $this->values = array_map( function ( array $val ) { return $val[ "@id" ]; }, $this->values ); } else { $name = $this->compactify( $base, $local ); } if ( isset( $this->predicates[$name] ) ) { - $was = $this->predicates[$name]; - if ( !is_array( $was ) ) { - $was = [ $was ]; - } - $this->values = array_merge( $was, $this->values ); + $this->values = array_merge( (array)$this->predicates[$name], $this->values ); } + $cnt = count( $this->values ); if ( $cnt === 0 ) { throw new LogicException( "finishPredicate can't be called without at least one value" ); } elseif ( $cnt === 1 ) { $this->predicates[$name] = $this->values[0]; } else { $this->predicates[$name] = $this->values; } $this->values = []; } /** * @param string $role * @param BNodeLabeler $labeler * * @return RdfWriterBase */ protected function newSubWriter( $role, BNodeLabeler $labeler ) { $writer = new self( $role, $labeler ); // Have subwriter share context with this parent. $writer->context = &$this->context; // We can't use the @graph optimization. $this->disableGraphOpt = true; return $writer; } /** * @return string a MIME type */ public function getMimeType() { return 'application/ld+json; charset=UTF-8'; } }