diff --git a/README.md b/README.md index 95321b5..1d4e39c 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,23 @@ # Purtle **Purtle** is a fast, lightweight RDF generator. It provides a "fluent" interface for -generating RDF output in Turtle, XML/RDF or N-Triples. The fluent interface allows the +generating RDF output in Turtle, JSON-LD, XML/RDF or N-Triples. The fluent interface allows the resulting PHP code to be structured just like Turtle notation for RDF, hence the name: "Purtle" is a contraction of "PHP Turtle". -The three concrete classes implementing the common `RdfWriter` interface are: +The four concrete classes implementing the common `RdfWriter` interface are: * `TurtleRdfWriter` outputs Turtle * `XmlRdfWriter` outputs XML/RDF * `NTriplesRdfWriter` outputs N-Triples +* `JsonLdRdfWriter` outputs JSON-LD The PHP code would look something like this: $writer = new TurtleRdfWriter(); $writer->prefix( 'acme', 'http://acme.test/terms/' ); $writer->about( 'http://quux.test/Something' ) ->a( 'acme', 'Thing' ) ->say( 'acme', 'name' )->text( 'Thingy' )->text( 'Dingsda', 'de' ) ->say( 'acme', 'owner' )->is( 'http://quux.test/' ); diff --git a/composer.json b/composer.json index 1243c49..1c8f823 100644 --- a/composer.json +++ b/composer.json @@ -1,61 +1,65 @@ { "name": "wikimedia/purtle", "type": "library", "description": "Fast streaming RDF serializer", "keywords": [ "RDF", "Serializer", - "Turtle" + "Turtle", + "JSON-LD" ], "homepage": "https://mediawiki.org/wiki/Purtle", "license": "GPL-2.0+", "authors": [ { "name": "Daniel Kinzler" }, { "name": "Stanislav Malyshev" }, { "name": "Thiemo Mättig" + }, + { + "name": "C. Scott Ananian" } ], "support": { "irc": "irc://irc.freenode.net/wikimedia-dev" }, "require": { "php": ">=5.5.9" }, "require-dev": { "mediawiki/mediawiki-codesniffer": "0.9.0", "ockcyp/covers-validator": "~0.4.0", "phpunit/phpunit": "4.8.24", "phpmd/phpmd": "~2.3" }, "autoload": { "psr-4": { "Wikimedia\\Purtle\\": "src/", "Wikimedia\\Purtle\\Tests\\": "tests/phpunit/" } }, "extra": { "branch-alias": { "dev-master": "1.0.x-dev" } }, "scripts": { "test": [ "@validate --no-interaction", "phpunit", "covers-validator" ], "cs": [ "phpcs -p -s", "phpmd src/ text phpmd.xml" ], "ci": [ "@cs", "@test" ] } } diff --git a/src/JsonLdRdfWriter.php b/src/JsonLdRdfWriter.php new file mode 100644 index 0000000..af36408 --- /dev/null +++ b/src/JsonLdRdfWriter.php @@ -0,0 +1,354 @@ +transitionTable[self::STATE_START][self::STATE_DOCUMENT] = function () { + $this->beginJson(); + }; + $this->transitionTable[self::STATE_DOCUMENT][self::STATE_FINISH] = function () { + $this->finishJson(); + }; + $this->transitionTable[self::STATE_OBJECT][self::STATE_PREDICATE] = function () { + $this->finishPredicate(); + }; + $this->transitionTable[self::STATE_OBJECT][self::STATE_SUBJECT] = function () { + $this->finishPredicate(); + $this->finishSubject(); + }; + $this->transitionTable[self::STATE_OBJECT][self::STATE_DOCUMENT] = function () { + $this->finishPredicate(); + $this->finishSubject(); + $this->finishDocument(); + }; + } + + /** + * Emit $val as JSON, with $indent extra indentations on each line. + * @param array $val + * @param int $indent + * @return string the JSON string for $val + */ + public function encode( $val, $indent=0 ) { + $str = json_encode( $val, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES ); + // Strip outermost open/close braces/brackets + $str = preg_replace( '/(^[\[{]\n?)|(\n?[}\]]$)/', '', $str ); + // add extra indentation + $str = preg_replace( '/^/m', str_repeat( " ", $indent ), $str ); + + return $str; + } + + /** + * Return a "compact IRI" corresponding to the given base/local pair. + * This adds entries to the "@context" key when needed to allow use + * of a given prefix. + * @see https://www.w3.org/TR/json-ld/#dfn-compact-iri + * + * @param string $base A QName prefix if $local is given, or an IRI if $local is null. + * @param string|null $local A QName suffix, or null if $base is an IRI. + * + * @return string A compact IRI. + */ + private function compactify( $base, $local = null ) { + $this->expandShorthand( $base, $local ); + + if ( $local === null ) { + return $base; + } else { + if ( $base !== '_' && $this->isPrefix( $base ) ) { + $prefixes = $this->getPrefixes(); + if ( $base === '' ) { + // Empty prefix not supported; use full IRI + return $prefixes[ $base ] . $local; + } + $this->context[ $base ] = $prefixes[ $base ]; + } + return $base . ':' . $local; + } + } + + /** + * Return an absolute IRI from the given base/local pair. + * @see https://www.w3.org/TR/json-ld/#dfn-absolute-iri + * + * @param string $base A QName prefix if $local is given, or an IRI if $local is null. + * @param string|null $local A QName suffix, or null if $base is an IRI. + * + * @return string|null An absolute IRI, or null if it cannot be constructed. + */ + private function toIRI( $base, $local ) { + $this->expandShorthand( $base, $local ); + $this->expandQName( $base, $local ); + if ( $local !== null ) { + throw new LogicException( 'Unknown prefix: ' . $base ); + } + return $base; + } + + /** + * Write document header. + */ + private function beginJson() { + if ( $this->role === self::DOCUMENT_ROLE ) { + $this->write( "{\n" ); + $this->write( function () { + // If this buffer is drained early, disable @graph optimization + $this->disableGraphOpt = true; + return ''; + } ); + } + } + + /** + * Write document footer. + */ + private function finishJson() { + // If we haven't drained yet, and @graph has only 1 element, then we + // can optimize our output and hoist the single node to top level. + if ( $this->role === self::DOCUMENT_ROLE ) { + if ( count( $this->graph ) === 1 && !$this->disableGraphOpt ) { + $this->write( $this->encode( $this->graph[0], 0 ) ); + $this->graph = null; // We're done with @graph. + } else { + $this->disableGraphOpt = true; + $this->write( "\n ]" ); + } + } + if ( count( $this->context ) ) { + // Write @context field. + $this->write( ",\n" ); + $this->write( $this->encode( [ + "@context" => $this->context + ], 0 ) ); + } + $this->write( "\n" ); + $this->write( "}" ); + } + + private function finishDocument() { + $this->write( function () { + // if this is drained before finishJson(), then disable + // the graph optimization and dump what we've got so far. + $str = ''; + if ( $this->graph !== null && count( $this->graph ) > 0 ) { + $this->disableGraphOpt = true; + if ( $this->role === self::DOCUMENT_ROLE && !$this->wroteGraph ) { + $str .= " \"@graph\": [\n"; + $this->wroteGraph = true; + } else { + $str .= ",\n"; + } + $str .= $this->encode( $this->graph, 1 ); + $this->graph = []; + return $str; + } + // Delay; maybe we'll be able to optimize this later. + return $str; + } ); + } + + /** + * @param string $base + * @param string|null $local + */ + protected function writeSubject( $base, $local = null ) { + $this->predicates = [ + "@id" => $this->compactify( $base, $local ) + ]; + } + + protected function finishSubject() { + $this->graph[] = $this->predicates; + } + + /** + * @param string $base + * @param string|null $local + */ + protected function writePredicate( $base, $local = null ) { + // no op + } + + /** + * @param string $base + * @param string|null $local + */ + protected function writeResource( $base, $local = null ) { + $this->values[] = [ + "@id" => $this->compactify( $base, $local ) + ]; + } + + /** + * @param string $text + * @param string|null $language + */ + protected function writeText( $text, $language = null ) { + if ( + $language === null || + !$this->isValidLanguageCode( $language ) + ) { + $this->values[] = $text; + } else { + $this->values[] = [ + "@language" => $language, + "@value" => $text + ]; + } + } + + /** + * @param string $literal + * @param string|null $typeBase + * @param string|null $typeLocal + */ + public function writeValue( $literal, $typeBase, $typeLocal = null ) { + if ( $typeBase === null && $typeLocal === null ) { + $this->values[] = $literal; + } elseif ( $typeLocal === null ) { + throw new InvalidArgumentException( "Got IRI: $typeBase" ); + } else { + $typeIRI = $this->toIRI( $typeBase, $typeLocal ); + if ( $typeIRI === 'http://www.w3.org/2001/XMLSchema#string' ) { + $this->values[] = strval( $literal ); + return; + } + if ( $typeIRI === 'http://www.w3.org/2001/XMLSchema#integer' ) { + $this->values[] = intval( $literal ); + return; + } + if ( $typeIRI === 'http://www.w3.org/2001/XMLSchema#boolean' ) { + $this->values[] = ( $literal === 'true' ); + return; + } + if ( $typeIRI === 'http://www.w3.org/2001/XMLSchema#double' ) { + $v = doubleval( $literal ); + // Only "numbers with fractions" are xsd:double. We need + // to verify that the JSON string will contain a decimal + // point, otherwise the value would be interpreted as an + // xsd:integer. + if ( strpos( json_encode( $v ), '.' ) !== false ) { + $this->values[] = $v; + return; + } + } + $this->values[] = [ + "@type" => $this->compactify( $typeBase, $typeLocal ), + "@value" => strval( $literal ) + ]; + } + } + + private function finishPredicate() { + + $val = $this->values; + $cnt = count( $val ); + if ( $cnt === 0 ) { + throw new LogicException( "finishPredicate can't be called without at least one value" ); + } elseif ( $cnt === 1 ) { + $val = $val[0]; + } + + $base = $this->currentPredicate[0]; + $local = $this->currentPredicate[1]; + $predIRI = $this->toIRI( $base, $local ); + + if ( $predIRI === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' ) { + $this->predicates[ "@type" ] = $val[ "@id" ]; + } else { + $this->predicates[ $this->compactify( $base, $local ) ] = $val; + } + + $this->values = []; + } + + /** + * @param string $role + * @param BNodeLabeler $labeler + * + * @return RdfWriterBase + */ + protected function newSubWriter( $role, BNodeLabeler $labeler ) { + $writer = new self( $role, $labeler ); + + // Have subwriter share context with this parent. + $writer->context = &$this->context; + // We can't use the @graph optimization. + $this->disableGraphOpt = true; + + return $writer; + } + + /** + * @return string a MIME type + */ + public function getMimeType() { + return 'application/ld+json; charset=UTF-8'; + } + +} diff --git a/src/RdfWriterFactory.php b/src/RdfWriterFactory.php index dc02000..35956ba 100644 --- a/src/RdfWriterFactory.php +++ b/src/RdfWriterFactory.php @@ -1,157 +1,172 @@ assertEquals( $writer->encode( "foo{bar}bat" ), '"foo{bar}bat"' ); + $this->assertEquals( $writer->encode( [] ), "" ); + $this->assertEquals( $writer->encode( [ + "@id" => "foo" + ] ), " \"@id\": \"foo\"" ); + $this->assertEquals( + $writer->encode( [ 1, 2, 3 ] ), + " 1,\n 2,\n 3" + ); + } + +} diff --git a/tests/phpunit/NTriplesRdfWriterTest.php b/tests/phpunit/NTriplesRdfWriterTest.php index 0c7a504..69b97e9 100644 --- a/tests/phpunit/NTriplesRdfWriterTest.php +++ b/tests/phpunit/NTriplesRdfWriterTest.php @@ -1,35 +1,42 @@ getSupportedFormats(); $this->assertInternalType( 'array', $formats ); $this->assertNotEmpty( $formats ); } public function testGetWriter() { $factory = new RdfWriterFactory(); foreach ( $factory->getSupportedFormats() as $format ) { $writer = $factory->getWriter( $format ); $this->assertInstanceOf( RdfWriter::class, $writer ); } } public function testGivenInvalidFormat_getWriterThrowsException() { $factory = new RdfWriterFactory(); $this->setExpectedException( InvalidArgumentException::class ); $factory->getWriter( 'invalid' ); } public function testGetFormatName() { $factory = new RdfWriterFactory(); foreach ( $factory->getSupportedFormats() as $format ) { $actual = $factory->getFormatName( $format ); // the canonical name should just stay $this->assertEquals( $format, $actual ); } } public function testGivenInvalidFormat_getFormatNameReturnsFalse() { $factory = new RdfWriterFactory(); $this->assertFalse( $factory->getFormatName( 'invalid' ) ); } public function provideFormats() { return [ // N3 (currently falls through to turtle) [ 'N3', 'n3', 'n3', 'text/n3' ], [ 'text/n3', 'n3', 'n3', 'text/n3' ], [ 'text/rdf+n3', 'n3', 'n3', 'text/n3' ], [ 'ttl', 'turtle', 'ttl', 'text/turtle' ], [ 'turtle', 'turtle', 'ttl', 'text/turtle' ], [ 'text/turtle', 'turtle', 'ttl', 'text/turtle' ], [ 'application/x-turtle', 'turtle', 'ttl', 'text/turtle' ], [ 'nt', 'ntriples', 'nt', 'application/n-triples' ], [ 'ntriples', 'ntriples', 'nt', 'application/n-triples' ], [ 'n-triples', 'ntriples', 'nt', 'application/n-triples' ], [ 'text/plain', 'ntriples', 'nt', 'application/n-triples' ], [ 'text/n-triples', 'ntriples', 'nt', 'application/n-triples' ], [ 'application/ntriples', 'ntriples', 'nt', 'application/n-triples' ], [ 'application/n-triples', 'ntriples', 'nt', 'application/n-triples' ], [ 'xml', 'rdfxml', 'rdf', 'application/rdf+xml' ], [ 'rdf', 'rdfxml', 'rdf', 'application/rdf+xml' ], [ 'rdfxml', 'rdfxml', 'rdf', 'application/rdf+xml' ], [ 'application/rdf+xml', 'rdfxml', 'rdf', 'application/rdf+xml' ], [ 'application/xml', 'rdfxml', 'rdf', 'application/rdf+xml' ], [ 'text/xml', 'rdfxml', 'rdf', 'application/rdf+xml' ], + + [ 'json', 'jsonld', 'jsonld', 'application/ld+json' ], + [ 'jsonld', 'jsonld', 'jsonld', 'application/ld+json' ], + [ 'application/ld+json', 'jsonld', 'jsonld', 'application/ld+json' ], + [ 'application/json', 'jsonld', 'jsonld', 'application/ld+json' ], + [ 'application/json', 'jsonld', 'jsonld', 'application/json' ], ]; } /** * @dataProvider provideFormats */ public function testFormats( $name, $canonicalName, $expectedFileExtension, $expectedMimeType ) { $factory = new RdfWriterFactory(); $this->assertEquals( $canonicalName, $factory->getFormatName( $name ) ); $this->assertEquals( $expectedFileExtension, $factory->getFileExtension( $canonicalName ) ); $this->assertContains( $expectedMimeType, $factory->getMimeTypes( $canonicalName ) ); $writer = $factory->getWriter( $canonicalName ); $this->assertInstanceOf( RdfWriter::class, $writer ); } public function testGetMimeTypes() { $factory = new RdfWriterFactory(); foreach ( $factory->getSupportedFormats() as $format ) { $mimeTypes = $factory->getMimeTypes( $format ); $this->assertInternalType( 'array', $mimeTypes ); $this->assertNotEmpty( $mimeTypes ); } } public function testGivenInvalidFormat_getMimeTypesThrowsException() { $factory = new RdfWriterFactory(); $this->setExpectedException( InvalidArgumentException::class ); $factory->getMimeTypes( 'invalid' ); } public function testGetFileExtensions() { $factory = new RdfWriterFactory(); foreach ( $factory->getSupportedFormats() as $format ) { $extension = $factory->getFileExtension( $format ); $this->assertInternalType( 'string', $extension ); } } public function testGivenInvalidFormat_getFileExtensionsThrowsException() { $factory = new RdfWriterFactory(); $this->setExpectedException( InvalidArgumentException::class ); $factory->getFileExtension( 'invalid' ); } } diff --git a/tests/phpunit/RdfWriterTestBase.php b/tests/phpunit/RdfWriterTestBase.php index 39aaa61..49f4bd4 100644 --- a/tests/phpunit/RdfWriterTestBase.php +++ b/tests/phpunit/RdfWriterTestBase.php @@ -1,382 +1,398 @@ newWriter()->getMimeType(); $this->assertInternalType( 'string', $mimeType ); - $this->assertRegExp( '/^\w+\/[\w-]+(\+xml)?(; charset=UTF-8)?$/', $mimeType ); + $this->assertRegExp( '/^\w+\/[\w-]+(\+(xml|json))?(; charset=UTF-8)?$/', $mimeType ); } public function testTriples() { $writer = $this->newWriter(); $writer->prefix( 'acme', 'http://acme.test/' ); $writer->start(); $writer->about( 'http://foobar.test/Bananas' ) ->say( 'a' )->is( 'http://foobar.test/Fruit' ); // shorthand name "a" $writer->about( 'acme', 'Nuts' ) ->say( 'acme', 'weight' )->value( '5.5', 'xsd', 'decimal' ); // redundant about( 'acme', 'Nuts' ) $writer->about( 'acme', 'Nuts' ) ->say( 'acme', 'color' )->value( 'brown' ); $writer->finish(); $rdf = $writer->drain(); $this->assertOutputLines( 'Triples', $rdf ); } public function testPredicates() { $writer = $this->newWriter(); $writer->prefix( '', 'http://acme.test/' ); // empty prefix $writer->start(); $writer->about( 'http://foobar.test/Bananas' ) ->a( 'http://foobar.test/Fruit' ) // shorthand function a() ->say( '', 'name' ) // empty prefix ->text( 'Banana' ) ->say( '', 'name' ) // redundant say( '', 'name' ) ->text( 'Banane', 'de' ); $writer->about( 'http://foobar.test/Apples' ) ->say( '', 'name' ) // subsequent call to say( '', 'name' ) for a different subject ->text( 'Apple' ); $writer->finish(); $rdf = $writer->drain(); $this->assertOutputLines( 'Predicates', $rdf ); } public function testPredicates_drain() { $writer = $this->newWriter(); $writer->prefix( '', 'http://acme.test/' ); // empty prefix $writer->start(); $writer->about( 'http://foobar.test/Bananas' ) ->a( 'http://foobar.test/Fruit' ) // shorthand function a() ->say( '', 'name' ) // empty prefix ->text( 'Banana' ) ->say( '', 'name' ) // redundant say( '', 'name' ) ->text( 'Banane', 'de' ); $rdf1 = $writer->drain(); $this->assertNotEmpty( $rdf1 ); $writer->about( 'http://foobar.test/Apples' ) ->say( '', 'name' ) // subsequent call to say( '', 'name' ) for a different subject ->text( 'Apple' ); $writer->finish(); $rdf2 = $writer->drain(); $this->assertNotEmpty( $rdf2 ); $this->assertOutputLines( 'Predicates', $rdf1 . $rdf2 ); } public function testPredicates_sub() { $writer = $this->newWriter(); $writer->prefix( '', 'http://acme.test/' ); // empty prefix $writer->start(); $sub = $writer->sub(); // output of the sub writer will appear after the output of the main writer. $sub->about( 'http://foobar.test/Apples' ) ->say( '', 'name' ) // subsequent call to say( '', 'name' ) for a different subject ->text( 'Apple' ); $writer->about( 'http://foobar.test/Bananas' ) ->a( 'http://foobar.test/Fruit' ) // shorthand function a() ->say( '', 'name' ) // empty prefix ->text( 'Banana' ) ->say( '', 'name' ) // redundant say( '', 'name' ) ->text( 'Banane', 'de' ); $writer->finish(); $rdf = $writer->drain(); $this->assertOutputLines( 'Predicates', $rdf ); } public function testPredicates_sub_drain() { $writer = $this->newWriter(); $writer->prefix( '', 'http://acme.test/' ); // empty prefix $writer->start(); $sub = $writer->sub(); $writer->about( 'http://foobar.test/Bananas' ) ->a( 'http://foobar.test/Fruit' ) // shorthand function a() ->say( '', 'name' ) // empty prefix ->text( 'Banana' ) ->say( '', 'name' ) // redundant say( '', 'name' ) ->text( 'Banane', 'de' ); $rdf1 = $writer->drain(); $this->assertNotEmpty( $rdf1 ); // sub-writer should still be usable after drain() $sub->about( 'http://foobar.test/Apples' ) ->say( '', 'name' ) // subsequent call to say( '', 'name' ) for a different subject ->text( 'Apple' ); $writer->finish(); $rdf2 = $writer->drain(); $this->assertNotEmpty( $rdf2 ); $this->assertOutputLines( 'Predicates', $rdf1 . $rdf2 ); } public function testValues() { $writer = $this->newWriter(); $writer->prefix( 'acme', 'http://acme.test/' ); $writer->start(); $writer->about( 'http://foobar.test/Bananas' ) ->say( 'acme', 'multi' ) ->value( 'A' ) ->value( 'B' ) ->value( 'C' ) ->say( 'acme', 'type' ) ->value( 'foo', 'acme', 'thing' ) ->value( '-5', 'xsd', 'integer' ) ->value( '-5', 'xsd', 'decimal' ) ->value( '-5', 'xsd', 'double' ) ->value( 'true', 'xsd', 'boolean' ) ->value( 'false', 'xsd', 'boolean' ) ->say( 'acme', 'autotype' ) ->value( -5 ) ->value( 3.14 ) ->value( true ) ->value( false ) ->say( 'acme', 'no-autotype' ) ->value( -5, 'xsd', 'decimal' ) ->value( 3.14, 'xsd', 'string' ) ->value( true, 'xsd', 'string' ) ->value( false, 'xsd', 'string' ) ->say( 'acme', 'shorthand' )->value( 'foo' ) ->say( 'acme', 'typed-shorthand' )->value( 'foo', 'acme', 'thing' ); $writer->finish(); $rdf = $writer->drain(); $this->assertOutputLines( 'Values', $rdf ); } public function testResources() { $writer = $this->newWriter(); $writer->prefix( 'acme', 'http://acme.test/' ); $writer->start(); $writer->about( 'acme', 'Bongos' ) ->say( 'acme', 'sounds' ) ->is( 'acme', 'Bing' ) ->is( 'http://foobar.test/sound/Bang' ); $writer->finish(); $rdf = $writer->drain(); $this->assertOutputLines( 'Resources', $rdf ); } public function testTexts() { $writer = $this->newWriter(); $writer->prefix( 'acme', 'http://acme.test/' ); $writer->start(); $writer->about( 'acme', 'Bongos' ) ->say( 'acme', 'sounds' ) ->text( 'Bom', 'de' ) ->text( 'Bam', 'en' ) ->text( 'Como estas', 'es-419' ) ->text( 'What?', 'bad tag' ); $writer->finish(); $rdf = $writer->drain(); $this->assertOutputLines( 'Texts', $rdf ); } public function testNumbers() { $writer = $this->newWriter(); $writer->prefix( 'acme', 'http://acme.test/' ); $writer->start(); $writer->about( 'acme', 'Bongos' ) ->say( 'acme', 'stock' )->value( 5, 'xsd', 'integer' ) ->value( 7 ) ->about( 'acme', 'Tablas' ) ->say( 'acme', 'stock' )->value( 6 ); $writer->finish(); $rdf = $writer->drain(); $this->assertOutputLines( 'Numbers', $rdf ); } public function testEricMiller() { // example taken from http://www.w3.org/2007/02/turtle/primer/ $writer = $this->newWriter(); $writer->prefix( 'contact', 'http://www.w3.org/2000/10/swap/pim/contact#' ); $writer->start(); $writer->about( 'http://www.w3.org/People/EM/contact#me' ) ->say( 'rdf', 'type' )->is( 'contact', 'Person' ) ->say( 'contact', 'fullName' )->text( 'Eric Miller' ) ->say( 'contact', 'mailbox' )->is( 'mailto:em@w3.org' ) ->say( 'contact', 'personalTitle' )->text( 'Dr.' ); $writer->finish(); $rdf = $writer->drain(); $this->assertOutputLines( 'EricMiller', $rdf ); } public function testLabeledBlankNode() { // example taken from http://www.w3.org/2007/02/turtle/primer/ $writer = $this->newWriter(); $writer->prefix( 'exterms', 'http://www.example.org/terms/' ); $writer->prefix( 'exstaff', 'http://www.example.org/staffid/' ); $writer->start(); $writer->about( 'exstaff', '85740' ) ->say( 'exterms', 'address' )->is( '_', $label = $writer->blank( 'johnaddress' ) ) ->about( '_', $label ) ->say( 'exterms', 'street' )->text( '1501 Grant Avenue' ) ->say( 'exterms', 'city' )->text( 'Bedfort' ) ->say( 'exterms', 'state' )->text( 'Massachusetts' ) ->say( 'exterms', 'postalCode' )->text( '01730' ); $writer->finish(); $rdf = $writer->drain(); $this->assertOutputLines( 'LabeledBlankNode', $rdf ); } public function testNumberedBlankNodes() { // example taken from http://www.w3.org/2007/02/turtle/primer/ $writer = $this->newWriter(); $writer->prefix( 'exterms', 'http://www.example.org/terms/' ); $writer->prefix( 'exstaff', 'http://www.example.org/staffid/' ); $writer->prefix( 'ex', 'http://example.org/packages/vocab#' ); $writer->start(); $writer->about( 'exstaff', 'Sue' ) ->say( 'exterms', 'publication' )->is( '_', $label1 = $writer->blank() ); $writer->about( '_', $label1 ) ->say( 'exterms', 'title' )->text( 'Antology of Time' ); $writer->about( 'exstaff', 'Jack' ) ->say( 'exterms', 'publication' )->is( '_', $label2 = $writer->blank() ); $writer->about( '_', $label2 ) ->say( 'exterms', 'title' )->text( 'Anthony of Time' ); $writer->finish(); $rdf = $writer->drain(); $this->assertOutputLines( 'NumberedBlankNode', $rdf ); } public function testQuotesAndSpecials() { $writer = $this->newWriter(); $writer->prefix( 'exterms', 'http://www.example.org/terms/' ); $writer->start(); $writer->about( 'exterms', 'Duck' )->say( 'exterms', 'says' ) ->text( 'Duck says: "Quack!"' ); $writer->about( 'exterms', 'Cow' )->say( 'exterms', 'says' ) ->text( "Cow says:\n\r 'Moo! \\Moo!'" ); $writer->about( 'exterms', 'Bear' )->say( 'exterms', 'says' ) ->text( 'Bear says: Превед!' ); $writer->finish(); $rdf = $writer->drain(); $this->assertOutputLines( 'TextWithSpecialChars', $rdf ); } /** * @param string $datasetName * @param string[]|string $actual */ private function assertOutputLines( $datasetName, $actual ) { $path = __DIR__ . '/../data/' . $datasetName . '.' . $this->getFileSuffix(); $this->assertNTriplesEquals( file_get_contents( $path ), $actual, "Result mismatches data in $path" ); } /** * @param string[]|string $nTriples * * @return string[] Sorted alphabetically. */ protected function normalizeNTriples( $nTriples ) { if ( is_string( $nTriples ) ) { // Trim and ignore newlines at the end of the file only. $nTriples = explode( "\n", rtrim( $nTriples, "\n" ) ); } - sort( $nTriples ); + if ( $this->sortLines() ) { + sort( $nTriples ); + } return $nTriples; } /** * @param string[]|string $expected * @param string[]|string $actual * @param string $message */ protected function assertNTriplesEquals( $expected, $actual, $message = '' ) { $expected = $this->normalizeNTriples( $expected ); $actual = $this->normalizeNTriples( $actual ); - // Comparing $expected and $actual directly would show triples that are present in both but - // shifted in position. That makes the output hard to read. Calculating the $missing and - // $extra sets helps. - $extra = array_diff( $actual, $expected ); - $missing = array_diff( $expected, $actual ); - - // Cute: $missing and $extra can be equal only if they are empty. Comparing them here - // directly looks a bit odd in code, but produces meaningful output, especially if the input - // was sorted. - $this->assertEquals( $missing, $extra, $message ); + if ( $this->sortLines() ) { + // Comparing $expected and $actual directly would show triples that are present in both but + // shifted in position. That makes the output hard to read. Calculating the $missing and + // $extra sets helps. + $extra = array_diff( $actual, $expected ); + $missing = array_diff( $expected, $actual ); + + // Cute: $missing and $extra can be equal only if they are empty. Comparing them here + // directly looks a bit odd in code, but produces meaningful output, especially if the input + // was sorted. + $this->assertEquals( $missing, $extra, $message ); + } else { + $this->assertEquals( $expected, $actual, $message ); + } } //FIXME: test non-ascii literals! //FIXME: test uerl-encoding //FIXME: test IRIs! } diff --git a/tests/phpunit/TurtleRdfWriterTest.php b/tests/phpunit/TurtleRdfWriterTest.php index 0e1c3dd..05bc872 100644 --- a/tests/phpunit/TurtleRdfWriterTest.php +++ b/tests/phpunit/TurtleRdfWriterTest.php @@ -1,43 +1,50 @@ assertTrue( $writer->getTrustIRIs(), 'initialy enabled' ); $writer->setTrustIRIs( false ); $this->assertFalse( $writer->getTrustIRIs(), 'disabled' ); } }