Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F41726671
cite-ext-add-validations.patch
dominic.mayers
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Authored By
dominic.mayers
Jan 28 2024, 10:43 PM
2024-01-28 22:43:17 (UTC+0)
Size
18 KB
Referenced Files
None
Subscribers
None
cite-ext-add-validations.patch
View Options
diff --git a/src/Cite.php b/src/Cite.php
index d6761def..d8a27dd1 100644
--- a/src/Cite.php
+++ b/src/Cite.php
@@ -153,16 +153,30 @@ class Cite {
}
unset($nm);
+ $inList = ($this->inReferencesGroup !== null && $this->depthRef == 0);
+ $grKey = $this->referenceStack->register($arguments['group'], $arguments['name'], $inList);
+ $validator = new Validator($parser->getStripState(), $this->referenceStack, $this->inReferencesGroup);
+ $warnings = $validator->validateNewRef($text,
+ $arguments['group'],
+ $arguments['name'],
+ $grKey)->getErrors();
+ // The warnings are displayed later together with the ref when it is presented inside a references tag.
+ $this->referenceStack->setWarnings($arguments['group'], $grKey, $warnings);
+ if ($inList) {
+ $inReferencesWarnings = $validator->validateNewRefInList($text,
+ $arguments['group'],
+ $arguments['name'],
+ $grKey)->getErrors();
+ $this->referenceStack->setWarnings($arguments['group'], $grKey, $inReferencesWarnings);
+ }
+
// Validation cares about the difference between null and empty, but from here on we don't
if ($text !== null && trim($text) === '') {
$text = null;
}
- $inList = ($this->inReferencesGroup !== null && $this->depthRef == 0);
- $grKey = $this->referenceStack->register($arguments['group'], $arguments['name'], $inList);
-
- // This is not only a shortcut : a value null is not accepted when a string is expected.
$processed_text = $text;
+ // This is not only a shortcut : a value null is not accepted when a string is expected.
if ($text !== null) {
$this->depthRef++;
$processed_text = $parser->recursiveTagParse($text, $frame);
@@ -174,6 +188,9 @@ class Cite {
$processed_text = null;
}
+ $otherWarnings = $validator->validateNewHalfParsedHtml($processed_text, $inList, $arguments['group'], $grKey)->getErrors();
+ $this->referenceStack->setWarnings($arguments['group'], $grKey, $otherWarnings);
+
$ref = $this->referenceStack->setHalfParsedHtml($processed_text, $arguments['group'], $grKey);
if (isset($arguments['dir'])) {
$ref = $this->referenceStack->setDir($arguments['dir'], $arguments['group'], $grKey);
@@ -242,6 +259,15 @@ class Cite {
$this->parseReferencesTagContent($parser, $text, $frame);
+ // Now that all the ref tags for the group have been processed as ref items, we validate them.
+ // The warnings will be displayed later when the references (the items) will be formatted.
+ $validator = new Validator($parser->getStripState(), $this->referenceStack, $this->inReferencesGroup);
+ $refsGroup = $this->referenceStack->getGroupRefs($this->inReferencesGroup);
+ foreach (array_keys($refsGroup) as $grKey) {
+ $referencesWarnings = $validator->validateGroupReferences($this->inReferencesGroup, $grKey)->getErrors();
+ $this->referenceStack->setWarnings($this->inReferencesGroup, $grKey, $referencesWarnings);
+ }
+
$responsive = $arguments['responsive'];
$ret = $this->formatReferences($parser, $this->inReferencesGroup, $responsive);
$this->inReferencesGroup = null;
@@ -321,6 +347,11 @@ class Cite {
foreach ($groups as $group) {
if (!$isSectionPreview) {
$remainingRefs = $this->referenceStack->getGroupRefs($group);
+ $validator = new Validator($parser->getStripState(), $this->referenceStack, $this->inReferencesGroup);
+ foreach ($remainingRefs as $grKey => $ref) {
+ $remainingWarnings = $validator->validateRemainingRef($ref->group, $grKey, $isSectionPreview)->getErrors();
+ $this->referenceStack->setWarnings($ref->group, $grKey, $remainingWarnings);
+ }
}
$formattedRefs = $this->referencesFormatter->formatReferences(
$parser,
diff --git a/src/ErrorReporter.php b/src/ErrorReporter.php
index b37c5a5a..88cd42a7 100644
--- a/src/ErrorReporter.php
+++ b/src/ErrorReporter.php
@@ -30,6 +30,19 @@ class ErrorReporter {
return $this->halfParsed( $parser, $error['message'], ...$error['params'] );
}
+ /**
+ * @param Parser $parser
+ * @param string $key Message name of the error or warning
+ * @param mixed ...$params
+ *
+ * @return string Fully-parsed wikitext with extension's tags already being expanded
+ */
+ public function fullyParsed(Parser $parser, string $key, ...$params): string {
+ $msg = $this->msg($parser, $key, ...$params);
+ $fullyParsed = $parser->recursiveTagParseFully($msg->plain());
+ return $this->wrapInHtmlContainer($fullyParsed, $key, $msg->getLanguage());
+ }
+
/**
* @param Parser $parser
* @param string $key Message name of the error or warning
diff --git a/src/ReferenceStack.php b/src/ReferenceStack.php
index ecbefc98..ebcc7000 100644
--- a/src/ReferenceStack.php
+++ b/src/ReferenceStack.php
@@ -84,6 +84,18 @@ class ReferenceStack {
return $grKey;
}
+ /**
+ * Get a ref that is identified by its group and key
+ *
+ * @param string $group
+ * @param string|int $key
+ *
+ * @return ReferenceStackItem The ref with given $group and $key.
+ */
+ public function getRef(string $group, string|int $key): ?ReferenceStackItem {
+ return $this->refs[$group][$key] ?? null;
+ }
+
/**
* Set the text for a ref that is identified by its group and key
*
@@ -324,4 +336,18 @@ class ReferenceStack {
private function nextRefSequence() {
return ++$this->refSequence;
}
+
+ /**
+ * Set a warning.
+ *
+ * @param string $group
+ * @param string|int $grKey
+ * @param string $msg Unwrapped but valid html expressing the warning.
+ *
+ * @return null
+ */
+ public function setWarnings(string $group, string|int $grKey, array $warnings) {
+ $this->refs[$group][$grKey]->warnings = array_merge($this->refs[$group][$grKey]->warnings, $warnings);
+ return null;
+ }
}
diff --git a/src/ReferencesFormatter.php b/src/ReferencesFormatter.php
index aeb7edfe..0a59e673 100644
--- a/src/ReferencesFormatter.php
+++ b/src/ReferencesFormatter.php
@@ -117,7 +117,13 @@ class ReferencesFormatter {
private function formatListItem(
Parser $parser, $grKey, ReferenceStackItem $ref, bool $isSectionPreview
): string {
+ $warnings = '';
+ foreach ($ref->warnings as $warning) {
+ $halfParsedHtmlWarning = $this->errorReporter->halfParsed($parser, $warning['message'], ...$warning['params']);
+ $warnings .= $halfParsedHtmlWarning;
+ }
$text = $this->referenceText($parser, $grKey, $ref, $isSectionPreview);
+ $text .= $warnings;
$extraAttributes = '';
// Todo: Check that the followings are OK with the new code.
@@ -190,22 +196,15 @@ class ReferencesFormatter {
* @return string
*/
private function referenceText(
- Parser $parser, $grKey, ReferenceStackItem $ref, bool $isSectionPreview
- ): string {
- $text = $ref->text ?? null;
- if ($text === null) {
- return $this->errorReporter->plain($parser,
- $isSectionPreview ? 'cite_warning_sectionpreview_no_text' : 'cite_error_references_no_text', $grKey);
+ Parser $parser, $key, ReferenceStackItem $ref, bool $isSectionPreview
+ ): ?string {
+ if ($ref->name == ":4") {
+ $a = null;
}
-
- foreach ($ref->warnings as $warning) {
- // @phan-suppress-next-line PhanParamTooFewUnpack
- $text .= ' ' . $this->errorReporter->plain($parser, ...$warning);
- // FIXME: We could use a StatusValue object to get rid of duplicates
- break;
- }
-
- return '<span class="reference-text">' . rtrim($text, "\n") . "</span>\n";
+ if ($ref->text)
+ return '<span class="reference-text">' . rtrim($ref->text, "\n") . "</span>\n";
+ else
+ return null;
}
/**
diff --git a/src/Validator.php b/src/Validator.php
index fe81050f..fc1927cf 100644
--- a/src/Validator.php
+++ b/src/Validator.php
@@ -4,6 +4,7 @@ namespace Cite;
use MediaWiki\Parser\Sanitizer;
use StatusValue;
+use StripState;
/**
* Context-aware, detailed validation of the arguments and content of a <ref> tag.
@@ -12,160 +13,153 @@ use StatusValue;
*/
class Validator {
+ // This class seems to not use optimally status objects. The idea is to create a status object and
+ // cumulate information into it and return it at the end.
+ private StripState $stripState;
private ReferenceStack $referenceStack;
private ?string $inReferencesGroup;
- private bool $isSectionPreview;
- private bool $isExtendsEnabled;
/**
- * @param ReferenceStack $referenceStack
* @param string|null $inReferencesGroup Group name of the <references> context to consider
- * during validation. Null if we are currently not in a <references> context.
- * @param bool $isSectionPreview Validation is relaxed when previewing parts of a page
- * @param bool $isExtendsEnabled Temporary feature flag
*/
- public function __construct(
- ReferenceStack $referenceStack,
- ?string $inReferencesGroup = null,
- bool $isSectionPreview = false,
- bool $isExtendsEnabled = false
- ) {
+ public function __construct($stripState, $referenceStack, $inReferencesGroup = null) {
+ $this->stripState = $stripState;
$this->referenceStack = $referenceStack;
$this->inReferencesGroup = $inReferencesGroup;
- $this->isSectionPreview = $isSectionPreview;
- $this->isExtendsEnabled = $isExtendsEnabled;
}
- public function validateRef(
- ?string $text,
- string $group,
- ?string $name,
- ?string $extends,
- ?string $follow,
- ?string $dir
- ): StatusValue {
- if ( ctype_digit( (string)$name )
- || ctype_digit( (string)$extends )
- || ctype_digit( (string)$follow )
- ) {
+ public function validateNewRef(?string $text, string $group, ?string $name, string|int $key): StatusValue {
+ // Not sure thar it is optimal to return the status as soon as we have an error.
+ // The idea is perhaps to cumulate the errors in the status and return it at the end.
+ if ($name == "inNotMatchingGroupAndTooLate") {
+ $a = null;
+ }
+ if (ctype_digit((string) $name)) {
// Numeric names mess up the resulting id's, potentially producing
// duplicate id's in the XHTML. The Right Thing To Do
// would be to mangle them, but it's not really high-priority
// (and would produce weird id's anyway).
- return StatusValue::newFatal( 'cite_error_ref_numeric_key' );
+ return StatusValue::newFatal('cite_error_ref_numeric_key');
}
- if ( $extends ) {
- // Temporary feature flag until mainstreamed, see T236255
- if ( !$this->isExtendsEnabled ) {
- return StatusValue::newFatal( 'cite_error_ref_too_many_keys' );
+ if ($text !== null) {
+ $partiallyUntaggedText = preg_replace('#<(\w++)[^>]*+>.*?</\1\s*>|<!--.*?-->#s', '', $text);
+ $unTaggedText = preg_replace('#<ref(erences)?\b[^>]*/>#s', '', $partiallyUntaggedText);
+ if (preg_match('/<ref(erences)?\b[^>]*+>/i', $unTaggedText)) {
+ // (bug T8199) This most likely implies that someone left off the
+ // closing </ref> tag, which will cause the entire article to be
+ // eaten up until the next closing </ref>. So we bail out early instead.
+ // The fancy regex above first tries chopping out anything that
+ // looks like a comment or SGML tag, which is a crude way to avoid
+ // false alarms for <nowiki>, <pre>, etc.
+ //
+ // Possible improvement: print the warning, followed by the contents
+ // of the <ref> tag. This way no part of the article will be eaten
+ // even temporarily.
+ //
+ // This cannot be managed as the other warnings, because it is hard to
+ // predict the behaviour of the parser.
+ return StatusValue::newFatal('cite_error_included_ref');
}
+ }
+ return StatusValue::newGood();
+ }
- $groupRefs = $this->referenceStack->getGroupRefs( $group );
- // @phan-suppress-next-line PhanTypeMismatchDimFetchNullable false positive
- if ( isset( $groupRefs[$name] ) && !isset( $groupRefs[$name]->extends ) ) {
- // T242141: A top-level <ref> can't be changed into a sub-reference
- return StatusValue::newFatal( 'cite_error_references_duplicate_key', $name );
- } elseif ( isset( $groupRefs[$extends]->extends ) ) {
- // A sub-reference can not be extended a second time (no nesting)
- return StatusValue::newFatal( 'cite_error_ref_nested_extends', $extends,
- $groupRefs[$extends]->extends );
- }
+ public function validateNewRefInList(?string $text, string $group, ?string $name, string|int $key): StatusValue {
+ // Not sure thar it is optimal to return the status as soon as we have an error.
+ // The idea is perhaps to cumulate the errors in the status and return it at the end.
+ if ($name == "inNotMatchingGroupAndTooLate") {
+ $a = null;
}
- if ( $follow && ( $name || $extends ) ) {
- return StatusValue::newFatal( 'cite_error_ref_follow_conflicts' );
+ if ($name === null) {
+ // <ref> calls inside <references> must be named
+ return StatusValue::newFatal('cite_error_references_no_key');
}
- if ( $dir !== null && $dir !== 'rtl' && $dir !== 'ltr' ) {
- return StatusValue::newFatal( 'cite_error_ref_invalid_dir', $dir );
+ if ($group !== $this->inReferencesGroup) {
+ // <ref> and <references> have conflicting group attributes.
+ return StatusValue::newFatal('cite_error_references_group_mismatch',
+ Sanitizer::safeEncodeAttribute($group));
}
- return $this->inReferencesGroup === null ?
- $this->validateRefOutsideOfReferences( $text, $name ) :
- $this->validateRefInReferences( $text, $group, $name );
+ return StatusValue::newGood();
}
- private function validateRefOutsideOfReferences(
- ?string $text,
- ?string $name
- ): StatusValue {
- if ( !$name ) {
- if ( $text === null ) {
- // Completely empty ref like <ref /> is forbidden.
- return StatusValue::newFatal( 'cite_error_ref_no_key' );
- } elseif ( trim( $text ) === '' ) {
- // Must have content or reuse another ref by name.
- return StatusValue::newFatal( 'cite_error_ref_no_input' );
- }
+ public function validateNewHalfParsedHtml(?string $strippedHtml, bool $inList, string $group, string|int $key): StatusValue {
+ // Not sure thar it is optimal to return the status as soon as we have an error.
+ // The idea is perhaps to cumulate the errors in the status and return it at the end.
+ if ($key == "inNotMatchingGroupAndTooLate") {
+ $a = null;
}
-
- if ( $text !== null && preg_match(
- '/<ref(erences)?\b[^>]*+>/i',
- preg_replace( '#<(\w++)[^>]*+>.*?</\1\s*>|<!--.*?-->#s', '', $text )
- ) ) {
- // (bug T8199) This most likely implies that someone left off the
- // closing </ref> tag, which will cause the entire article to be
- // eaten up until the next <ref>. So we bail out early instead.
- // The fancy regex above first tries chopping out anything that
- // looks like a comment or SGML tag, which is a crude way to avoid
- // false alarms for <nowiki>, <pre>, etc.
- //
- // Possible improvement: print the warning, followed by the contents
- // of the <ref> tag. This way no part of the article will be eaten
- // even temporarily.
- return StatusValue::newFatal( 'cite_error_included_ref' );
+ if ($inList && ( $strippedHtml === null || trim($strippedHtml) === '' )) {
+ // <ref> called in <references> has no content.
+ return StatusValue::newFatal(
+ 'cite_error_empty_references_define',
+ Sanitizer::safeEncodeAttribute($key),
+ Sanitizer::safeEncodeAttribute($group)
+ );
}
+ $storedStrippedHtml = $this->referenceStack->getRef($group, $key)->text;
+ if (isset($storedStrippedHtml) && isset($strippedHtml) &&
+ $this->stripState->unstripBoth($strippedHtml) !== $this->stripState->unstripBoth($storedStrippedHtml)) {
+ return StatusValue::newFatal(
+ 'cite_error_references_duplicate_key',
+ Sanitizer::safeEncodeAttribute($key)
+ );
+ }
return StatusValue::newGood();
}
- private function validateRefInReferences(
- ?string $text,
- string $group,
- ?string $name
- ): StatusValue {
- if ( $group !== $this->inReferencesGroup ) {
- // <ref> and <references> have conflicting group attributes.
- return StatusValue::newFatal( 'cite_error_references_group_mismatch',
- Sanitizer::safeEncodeAttribute( $group ) );
+ // Only to be executed after all other ref tags for the group have been processed.
+ // Otherwise, the count property for the item might not be the final value.
+ public function validateGroupReferences(string $group, string|int $key): StatusValue {
+ // Not sure, eventually, if there is more things checked, that it would be optimal to return
+ // the status as soon as we have an error. The idea is perhaps to cumulate the errors
+ // in the status and return it at the end.
+ if ($key == "inNotMatchingGroupAndTooLate") {
+ $a = null;
}
-
- if ( !$name ) {
- // <ref> calls inside <references> must be named
- return StatusValue::newFatal( 'cite_error_references_no_key' );
+ if ($this->referenceStack->getRef($group, $key)->count === 0) {
+ return StatusValue::newFatal(
+ 'cite_error_references_missing_key',
+ Sanitizer::safeEncodeAttribute($key),
+ Sanitizer::safeEncodeAttribute($group)
+ );
}
- if ( $text === null || trim( $text ) === '' ) {
- // <ref> called in <references> has no content.
+ $text = $this->referenceStack->getRef($group, $key)->text;
+ if ($text === null || trim($text) === '') {
return StatusValue::newFatal(
- 'cite_error_empty_references_define',
- Sanitizer::safeEncodeAttribute( $name ),
- Sanitizer::safeEncodeAttribute( $group )
+ 'cite_error_references_no_text',
+ Sanitizer::safeEncodeAttribute($key)
);
}
- // Section previews are exempt from some rules.
- if ( !$this->isSectionPreview ) {
- if ( !$this->referenceStack->hasGroup( $group ) ) {
- // Called with group attribute not defined in text.
- return StatusValue::newFatal(
- 'cite_error_references_missing_group',
- Sanitizer::safeEncodeAttribute( $group ),
- Sanitizer::safeEncodeAttribute( $name )
- );
- }
-
- $groupRefs = $this->referenceStack->getGroupRefs( $group );
+ return StatusValue::newGood();
+ }
- if ( !isset( $groupRefs[$name] ) ) {
- // No such named ref exists in this group.
- return StatusValue::newFatal( 'cite_error_references_missing_key',
- Sanitizer::safeEncodeAttribute( $name ) );
- }
+ /** *
+ * @param Parser $parser
+ * @param bool $isSectionPreview
+ *
+ * @return string HTML
+ */
+ public function validateRemainingRef(string $group, string|int $key, bool $isSectionPreview): StatusValue {
+ // Not sure, eventually, if there is more things checked, that it would be optimal to return
+ // the status as soon as we have an error. The idea is perhaps to cumulate the errors
+ // in the status and return it at the end.
+ if ($key == "inNotMatchingGroupAndTooLate") {
+ $a = null;
+ }
+ if (!$isSectionPreview) {
+ return StatusValue::newFatal(
+ 'cite_error_group_refs_without_references',
+ Sanitizer::safeEncodeAttribute($group),
+ Sanitizer::safeEncodeAttribute($key)
+ );
}
-
return StatusValue::newGood();
}
-
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14506371
Default Alt Text
cite-ext-add-validations.patch (18 KB)
Attached To
Mode
T22707: Nested refs fail inside references block
Attached
Detach File
Event Timeline
Log In to Comment