Page MenuHomePhabricator

0001-Use-a-protected-key-to-distinguish-comments-internal.patch

Authored By
Arlolra
Apr 7 2021, 8:35 PM
Size
3 KB
Referenced Files
None
Subscribers
None

0001-Use-a-protected-key-to-distinguish-comments-internal.patch

From 15169a678f9f468ff6465035a32f28e8ec82003f Mon Sep 17 00:00:00 2001
From: Arlo Breault <abreault@wikimedia.org>
Date: Wed, 7 Apr 2021 12:10:39 -0400
Subject: [PATCH] Use a protected key to distinguish comments internal to
Parsoid
Bug: T279451
Change-Id: I40bdfddaed292a33479874b5e49b17fe616c3889
---
src/Utils/WTUtils.php | 14 ++++++++++++--
src/Wt2Html/Grammar.pegphp | 2 ++
src/Wt2Html/Grammar.php | 2 ++
3 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/src/Utils/WTUtils.php b/src/Utils/WTUtils.php
index 88b7c0ba6..f6e0720a3 100644
--- a/src/Utils/WTUtils.php
+++ b/src/Utils/WTUtils.php
@@ -700,6 +700,8 @@ class WTUtils {
// Now encode '-', '>' and '&' in the "true value" as HTML entities,
// so that they can be safely embedded in an HTML comment.
// This part doesn't have to map strings 1-to-1.
+ // WARNING(T279451): This is actually the part which protects the
+ // "-type" key in self::fosterCommentData
return preg_replace_callback( '/[->&]/', function ( $m ) {
return Utils::entityEncodeAll( $m[0] );
}, $trueValue );
@@ -764,7 +766,11 @@ class WTUtils {
*/
public static function fosterCommentData( string $typeOf, array $attrs, bool $encode ): string {
$str = PHPUtils::jsonEncode( [
- '@type' => $typeOf,
+ // WARNING(T279451): The choice of "-type" as the key is because
+ // "-" will be encoded with self::encodeComment when comments come
+ // from source wikitext (see the grammar), so we can be sure when
+ // reinserting that the comments are internal to Parsoid
+ '-type' => $typeOf,
'attrs' => $attrs
] );
if ( $encode ) {
@@ -782,16 +788,20 @@ class WTUtils {
public static function reinsertFosterableContent( Env $env, DOMNode $node, bool $decode ):
?DOMNode {
if ( DOMUtils::isComment( $node ) && preg_match( '/^\{.+\}$/D', $node->nodeValue ) ) {
+ // XXX(T279451#6981267): Hardcode this for good measure, even
+ // though all production uses should already be passing in `false`
+ $decode = false;
// Convert serialized meta tags back from comments.
// We use this trick because comments won't be fostered,
// providing more accurate information about where tags are expected
// to be found.
+ // @phan-suppress-next-line PhanImpossibleCondition
$data = json_decode( $decode ? self::decodeComment( $node->nodeValue ) : $node->nodeValue );
if ( $data === null ) {
// not a valid json attribute, do nothing
return null;
}
- $type = $data->{'@type'};
+ $type = $data->{'-type'} ?? '';
if ( preg_match( '/^mw:/', $type ) ) {
$meta = $node->ownerDocument->createElement( 'meta' );
foreach ( $data->attrs as $attr ) {
diff --git a/src/Wt2Html/Grammar.pegphp b/src/Wt2Html/Grammar.pegphp
index c70fe58fa..0a102751c 100644
--- a/src/Wt2Html/Grammar.pegphp
+++ b/src/Wt2Html/Grammar.pegphp
@@ -657,6 +657,8 @@ heading =
comment =
'<!--' c:$(!"-->" .)* ('-->' / eof) {
+ // WARNING(T279451): This encoding is important for the choice of key
+ // in WTUtils::fosterCommentData
$data = WTUtils::encodeComment( $c );
return [ new CommentTk( $data, (object)[ 'tsr' => $this->tsrOffsets() ] ) ];
}
diff --git a/src/Wt2Html/Grammar.php b/src/Wt2Html/Grammar.php
index 3453737af..2b4ffd4fe 100644
--- a/src/Wt2Html/Grammar.php
+++ b/src/Wt2Html/Grammar.php
@@ -537,6 +537,8 @@ class Grammar extends \WikiPEG\PEGParserBase {
}
private function a23($c) {
+ // WARNING(T279451): This encoding is important for the choice of key
+ // in WTUtils::fosterCommentData
$data = WTUtils::encodeComment( $c );
return [ new CommentTk( $data, (object)[ 'tsr' => $this->tsrOffsets() ] ) ];
--
2.31.1

File Metadata

Mime Type
text/x-diff
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
8977870
Default Alt Text
0001-Use-a-protected-key-to-distinguish-comments-internal.patch (3 KB)

Event Timeline