rECHO/includes/DiscussionParser.php

Owners
None

DiscussionParser.php

1<?php
2
3abstract class EchoDiscussionParser {
4static protected $timestampRegex;
5static protected $headerRegex = '^(==+)\s*([^=].*)\s*\1$';
6static protected $revisionInterpretationCache = array();
7static protected $diffParser;
8
9/**
10 * Given a Revision object, generates EchoEvent objects for
11 * the discussion-related actions that occurred in that Revision.
12 *
13 * @param $revision Revision object
14 * @return null
15 */
16static function generateEventsForRevision( $revision ) {
17$interpretation = self::getChangeInterpretationForRevision( $revision );
18$createdEvents = false;
19
20// use slave database if there is a previous revision
21if ( $revision->getPrevious() ) {
22$title = Title::newFromID( $revision->getPage() );
23// use master database for new page
24} else {
25$title = Title::newFromID( $revision->getPage(), Title::GAID_FOR_UPDATE );
26}
27
28// not a valid title
29if ( !$title ) {
30return;
31}
32
33$userID = $revision->getUser();
34$userName = $revision->getUserText();
35$user = $userID != 0 ? User::newFromId( $userID ) : User::newFromName( $userName, false );
36
37foreach ( $interpretation as $action ) {
38// These two event types are disabled temporarily, there is no need to process them
39if ( $action['type'] == 'add-comment' ) {
40$fullSection = $action['full-section'];
41$header = self::extractHeader( $fullSection );
42/*
43 EchoEvent::create( array(
44 'type' => 'add-comment',
45 'title' => $title,
46 'extra' => array(
47 'revid' => $revision->getID(),
48 'section-title' => $header,
49 'content' => $action['content'],
50 ),
51 'agent' => $user,
52 ) );
53 */
54self::generateMentionEvents( $header, $action['content'], $revision, $user );
55
56//$createdEvents = true;
57} elseif ( $action['type'] == 'new-section-with-comment' ) {
58$content = $action['content'];
59$header = self::extractHeader( $content );
60/*
61 EchoEvent::create( array(
62 'type' => 'add-talkpage-topic',
63 'title' => $title,
64 'extra' => array(
65 'revid' => $revision->getID(),
66 'section-title' => $header,
67 'content' => $content,
68 ),
69 'agent' => $user,
70 ) );
71 */
72self::generateMentionEvents( $header, $content, $revision, $user );
73
74//$createdEvents = true;
75}
76}
77
78if ( !$createdEvents && $title->getNamespace() == NS_USER_TALK ) {
79$notifyUser = User::newFromName( $title->getText() );
80// If the recipient is a valid non-anonymous user and hasn't turned
81// off thier notifications, generate a talk page post Echo notification.
82if ( $notifyUser && $notifyUser->getID() && $notifyUser->getOption( 'echo-notify-show-link' ) ) {
83// if this is a minor edit, only notify if the agent doesn't have talk page minor edit notification blocked
84if ( !$revision->isMinor() || !$user->isAllowed( 'nominornewtalk' ) ) {
85$section = self::detectSectionTitleAndText( $interpretation );
86EchoEvent::create( array(
87'type' => 'edit-user-talk',
88'title' => $title,
89'extra' => array(
90'revid' => $revision->getID(),
91'minoredit' => $revision->isMinor(),
92'section-title' => $section['section-title'],
93'section-text' => $section['section-text']
94),
95'agent' => $user,
96) );
97}
98}
99}
100}
101
102/**
103 * Attempts to determine what section title the edit was performed under (if any)
104 *
105 * @param $interpretation array Results of self::getChangeInterpretationForRevision
106 * @return array Array containing section title and text
107 */
108public static function detectSectionTitleAndText( array $interpretation ) {
109$header = $snippet = '';
110$found = false;
111
112foreach ( $interpretation as $action ) {
113switch( $action['type'] ) {
114case 'add-comment':
115$header = self::extractHeader( $action['full-section'] );
116$snippet = self::getTextSnippet( self::stripSignature( self::stripHeader( $action['content'] ) ), 150 );
117break;
118case 'new-section-with-comment':
119$header = self::extractHeader( $action['content'] );
120$snippet = self::getTextSnippet( self::stripSignature( self::stripHeader( $action['content'] ) ), 150 );
121break;
122}
123if ( $header ) {
124// If we find multiple headers within the same change interpretation then
125// we cannot choose just 1 to link to
126if ( $found ) {
127return array( 'section-title' => '', 'section-text' => '' );
128}
129$found = $header;
130}
131}
132if ( $found ) {
133return array( 'section-title' => $header, 'section-text' => $snippet );
134}
135return array( 'section-title' => '', 'section-text' => '' );
136}
137
138/**
139 * For an action taken on a talk page, notify users whose user pages
140 * are linked.
141 * @param $header string The subject line for the discussion.
142 * @param $content string The content of the post, as a wikitext string.
143 * @param $revision Revision object.
144 * @param $agent User The user who made the comment.
145 */
146public static function generateMentionEvents( $header, $content, $revision, $agent ) {
147$title = $revision->getTitle();
148if ( !$title ) {
149return;
150}
151
152$output = self::parseNonEditWikitext( $content, new Article( $title ) );
153$links = $output->getLinks();
154
155if ( !isset( $links[NS_USER] ) || !is_array( $links[NS_USER] ) ) {
156return;
157}
158$mentionedUsers = array();
159$count = 0;
160
161foreach ( $links[NS_USER] as $dbk => $page_id ) {
162$user = User::newFromName( $dbk );
163// we should not add user to 'mention' notification list if
164// 1. the user name is not valid
165// 2. the user mentions themselves
166// 3. the user is the owner of the talk page
167// 4. user is anonymous
168if (
169!$user || $user->isAnon() || $user->getId() == $revision->getUser() ||
170( $title->getNamespace() === NS_USER_TALK && $title->getDBkey() === $dbk )
171) {
172continue;
173}
174$mentionedUsers[$user->getId()] = $user->getId();
175$count++;
176// This is an unbounded list, put a cap on the allowable mentioned user list
177if ( $count > 100 ) {
178break;
179}
180}
181
182if ( !$mentionedUsers ) {
183return;
184}
185
186EchoEvent::create( array(
187'type' => 'mention',
188'title' => $title,
189'extra' => array(
190'content' => $content,
191'section-title' => $header,
192'revid' => $revision->getId(),
193'mentioned-users' => $mentionedUsers,
194),
195'agent' => $agent,
196) );
197}
198
199/**
200 * It's like Article::prepareTextForEdit,
201 * but not for editing (old wikitext usually)
202 * Stolen from AbuseFilterVariableHolder
203 *
204 * @param $wikitext String
205 * @param $article Article
206 *
207 * @return object
208 */
209static function parseNonEditWikitext( $wikitext, $article ) {
210static $cache = array();
211
212$cacheKey = md5( $wikitext ) . ':' . $article->getTitle()->getPrefixedText();
213
214if ( isset( $cache[$cacheKey] ) ) {
215return $cache[$cacheKey];
216}
217
218global $wgParser;
219$options = new ParserOptions;
220$options->setTidy( true );
221$output = $wgParser->parse( $wikitext, $article->getTitle(), $options );
222$cache[$cacheKey] = $output;
223
224return $output;
225}
226
227/**
228 * Given a Revision object, determines which users are interested
229 * in related EchoEvents.
230 *
231 * @param $revision Revision object.
232 * @return Array of User objects
233 */
234static function getNotifiedUsersForComment( $revision ) {
235$interpretation = self::getChangeInterpretationForRevision( $revision );
236$users = array();
237
238foreach ( $interpretation as $action ) {
239if ( $action['type'] == 'add-comment' ) {
240$fullSection = $action['full-section'];
241$interestedUsers = array_keys( self::extractSignatures( $fullSection ) );
242
243foreach ( $interestedUsers as $userName ) {
244$user = User::newFromName( $userName );
245
246// Deliberately ignoring anonymous users
247if ( $user && $user->getID() ) {
248$users[$user->getID()] = $user;
249}
250}
251}
252}
253
254if ( $revision->getTitle()->getNamespace() == NS_USER_TALK ) {
255$userName = $revision->getTitle()->getText();
256$user = User::newFromName( $userName );
257
258if ( $user ) {
259$users[$user->getID()] = $user;
260}
261}
262
263return $users;
264}
265
266/**
267 * Given a Revision object, returns a talk-page-centric interpretation
268 * of the changes made in it.
269 *
270 * @param $revision Revision object
271 * @see EchoDiscussionParser::interpretDiff
272 * @return Array, see interpretDiff for details.
273 */
274static function getChangeInterpretationForRevision( $revision ) {
275if ( $revision->getID() && isset( self::$revisionInterpretationCache[$revision->getID()] ) ) {
276return self::$revisionInterpretationCache[$revision->getID()];
277}
278
279$userID = $revision->getUser();
280$userName = $revision->getUserText();
281$user = $userID != 0 ? User::newFromId( $userID ) : User::newFromName( $userName, false );
282$prevText = '';
283if ( $revision->getParentId() ) {
284$prevRevision = Revision::newFromId( $revision->getParentId() );
285if ( $prevRevision ) {
286$prevText = $prevRevision->getText();
287}
288}
289
290$changes = self::getMachineReadableDiff( $prevText, $revision->getText() );
291$output = self::interpretDiff( $changes, $user->getName() );
292
293self::$revisionInterpretationCache[$revision->getID()] = $output;
294return $output;
295}
296
297/**
298 * Given a machine-readable diff, interprets the changes
299 * in terms of discussion page actions
300 *
301 * @todo Expand recognisable actions.
302 * @param $changes array Output of EchoEvent::getMachineReadableDiff
303 * @param $user User name
304 * @return Array of associative arrays.
305 * Each entry represents an action, which is classified in the 'action' field.
306 * All types contain a 'content' field except 'unknown'
307 * (which instead passes through the machine-readable diff in 'details')
308 * and 'unknown-change' (which provides 'new_content' and 'old_content')
309 * action may be:
310 * - add-comment: A comment signed by the user is added to an
311 * existing section.
312 * - new-section-with-comment: A new section is added, containing
313 * a single comment signed by the user in question.
314 * - unknown-signed-addition: Some signed content is added, but it
315 * includes section headers, is signed by another user or
316 * otherwise confuses the interpretation engine.
317 * - unknown-multi-signed-addition: Some signed content is added,
318 * but it contains multiple signatures.
319 * - unknown-unsigned-addition: Some content is added, but it is
320 * unsigned.
321 * - unknown-subtraction: Some content was removed. These actions are
322 * not currently analysed.
323 * - unknown-change: Some content was replaced with other content.
324 * These actions are not currently analysed.
325 * - unknown: Unrecognised change type.
326 */
327static function interpretDiff( $changes, $user ) {
328// One extra item in $changes for _info
329$actions = array();
330
331foreach ( $changes as $index => $change ) {
332if ( !is_numeric( $index ) ) {
333continue;
334}
335
336if ( !$change['action'] ) {
337// Unknown action; skip
338continue;
339}
340
341if ( $change['action'] == 'add' ) {
342$content = trim( $change['content'] );
343$startSection = preg_match( "/\A" . self::$headerRegex . '/um', $content );
344$sectionCount = self::getSectionCount( $content );
345$signedUsers = array_keys( self::extractSignatures( $content ) );
346
347if (
348count( $signedUsers ) == 1 &&
349in_array( $user, $signedUsers )
350) {
351if ( $sectionCount === 0 ) {
352$fullSection = self::getFullSection( $changes['_info']['rhs'], $change['right-pos'] );
353$actions[] = array(
354'type' => 'add-comment',
355'content' => $content,
356'full-section' => $fullSection,
357);
358} elseif ( $startSection && $sectionCount === 1 ) {
359$actions[] = array(
360'type' => 'new-section-with-comment',
361'content' => $content,
362);
363} else {
364$actions[] = array(
365'type' => 'unknown-signed-addition',
366'content' => $content,
367);
368}
369} elseif ( count( $signedUsers ) >= 1 ) {
370$actions[] = array(
371'type' => 'unknown-multi-signed-addition',
372'content' => $content,
373);
374} else {
375$actions[] = array(
376'type' => 'unknown-unsigned-addition',
377'content' => $content,
378);
379}
380} elseif ( $change['action'] == 'subtract' ) {
381$actions[] = array(
382'type' => 'unknown-subtraction',
383'content' => $change['content'],
384);
385} elseif ( $change['action'] == 'change' ) {
386$actions[] = array(
387'type' => 'unknown-change',
388'old_content' => $change['old_content'],
389'new_content' => $change['new_content'],
390);
391} else {
392$actions[] = array(
393'type' => 'unknown',
394'details' => $change,
395);
396}
397}
398
399// $actions['_diff'] = $changes;
400// unset( $actions['_diff']['_info'] );
401
402return $actions;
403}
404
405/**
406 * Finds the section that a given line is in.
407 *
408 * @param $lines Array of lines in the page.
409 * @param $offset int The line to find the full section for.
410 * @return string Content of the section.
411 */
412static function getFullSection( $lines, $offset ) {
413$content = $lines[$offset - 1];
414$headerRegex = '/' . self::$headerRegex . '/um';
415
416// Expand backwards...
417$continue = !preg_match( $headerRegex, $lines[$offset - 1] );
418$i = $offset - 1;
419while ( $continue && $i > 0 ) {
420--$i;
421$line = $lines[$i];
422$content = "$line\n$content";
423if ( preg_match( $headerRegex, $line ) ) {
424$continue = false;
425}
426}
427
428// And then forwards...
429
430$continue = true;
431$i = $offset - 1;
432while ( $continue && $i < count( $lines ) - 1 ) {
433++$i;
434$line = $lines[$i];
435if ( preg_match( $headerRegex, $line ) ) {
436$continue = false;
437} else {
438$content .= "\n$line";
439}
440}
441
442return trim( $content, "\n" );
443}
444
445/**
446 * Gets the number of section headers in a string.
447 *
448 * @param $text string The text.
449 * @return int Number of section headers found.
450 */
451static function getSectionCount( $text ) {
452$text = trim( $text );
453
454$matches = array();
455preg_match_all( '/' . self::$headerRegex . '/um', $text, $matches );
456
457return count( $matches[0] );
458}
459
460/**
461 * Gets the title of a section or sub section
462 *
463 * @param $text string The text of the section.
464 * @return string The title of the section.
465 */
466static function extractHeader( $text ) {
467$text = trim( $text );
468
469$matches = array();
470
471if ( !preg_match_all( '/' . self::$headerRegex . '/um', $text, $matches ) ) {
472return false;
473}
474
475return trim( end( $matches[2] ) );
476}
477
478/**
479 * Strips out a signature if possible.
480 *
481 * @param $text string The wikitext to strip
482 * @return string
483 */
484static function stripSignature( $text ) {
485$timestampPos = self::getTimestampPosition( $text );
486
487if ( $timestampPos === false ) {
488return $text;
489}
490
491$output = self::getUserFromLine( $text, $timestampPos );
492
493if ( $output === false ) {
494return substr( $text, 0, $timestampPos );
495}
496
497// Use truncate() instead of truncateHTML() because truncateHTML()
498// would not strip signature if the text conatins < or &
499global $wgContLang;
500$truncated_text = $wgContLang->truncate( $text, $output[0], '' );
501
502return $truncated_text;
503}
504
505/**
506 * Strips unnecessary indentation and so on from comments
507 *
508 * @param $text string The text to strip from
509 * @return string Stripped wikitext
510 */
511static function stripIndents( $text ) {
512// First strip all indentation from the beginning of lines
513$text = preg_replace( '/^\s*\:+/m', '', $text );
514
515// Now if there is only one list item, strip that too
516$listRegex = '/^\s*(?:[\:#*]\s*)*[#*]/m';
517$matches = array();
518if ( preg_match_all( $listRegex, $text, $matches ) ) {
519if ( count( $matches ) == 1 ) {
520$text = preg_replace( $listRegex, '', $text );
521}
522}
523
524return $text;
525}
526
527/**
528 * Strips out a section header
529 * @param $text string The text to strip out the section header from.
530 * @return string: The same text, with the section header stripped out.
531 */
532static function stripHeader( $text ) {
533$text = preg_replace( '/' . self::$headerRegex . '/um', '', $text );
534
535return $text;
536}
537
538/**
539 * Determines whether the input is a signed comment.
540 *
541 * @param $text string The text to check.
542 * @param $user User|bool If set, will only return true if the comment is
543 * signed by this user.
544 * @return bool: true or false.
545 */
546static function isSignedComment( $text, $user = false ) {
547$timestampPos = self::getTimestampPosition( $text );
548
549if ( $timestampPos === false ) {
550return false;
551}
552
553$userData = self::getUserFromLine( $text, $timestampPos );
554
555if ( $userData === false ) {
556return false;
557} elseif ( $user === false ) {
558return true;
559}
560
561list( , $foundUser ) = $userData;
562
563return User::getCanonicalName( $foundUser, false ) === User::getCanonicalName( $user, false );
564}
565
566/**
567 * Finds the start position, if any, of the timestamp on a line
568 *
569 * @param $line string The line to search for a signature on
570 * @return int|bool Integer position
571 */
572static function getTimestampPosition( $line ) {
573$timestampRegex = self::getTimestampRegex();
574$endOfLine = self::getLineEndingRegex();
575$tsMatches = array();
576if ( !preg_match(
577"/$timestampRegex$endOfLine/mu",
578$line,
579$tsMatches,
580PREG_OFFSET_CAPTURE
581) ) {
582return false;
583}
584
585return $tsMatches[0][1];
586}
587
588/**
589 * Finds differences between $oldText and $newText
590 * and returns the result in a machine-readable format.
591 *
592 * @param $oldText string The "left hand side" of the diff.
593 * @param $newText string The "right hand side" of the diff.
594 * @throws MWException
595 * @return Array of changes.
596 * Each change consists of:
597 * * An 'action', one of:
598 * - add
599 * - subtract
600 * - change
601 * * 'content' that was added or removed, or in the case
602 * of a change, 'old_content' and 'new_content'
603 * * 'left_pos' and 'right_pos' (in lines) of the change.
604 */
605static function getMachineReadableDiff( $oldText, $newText ) {
606if ( !isset( self::$diffParser ) ) {
607self::$diffParser = new EchoDiffParser;
608}
609return self::$diffParser->getChangeSet( $oldText, $newText );
610}
611
612/**
613 * Finds and extracts signatures in $text
614 *
615 * @param $text string The text in which to look for signed comments.
616 * @return array. Associative array, the key is the username, the value
617 * is the last signature that was found.
618 */
619static function extractSignatures( $text ) {
620$lines = explode( "\n", $text );
621
622$output = array();
623
624$lineNumber = 0;
625
626foreach ( $lines as $line ) {
627++$lineNumber;
628$timestampPos = self::getTimestampPosition( $line );
629if ( !$timestampPos ) {
630// Ignore lines that don't finish with a timestamp
631// print "I\tNo timestamp\n";
632// print "$line\n";
633continue;
634}
635
636// Now that we know we have a timestamp, look for
637// the last user link on the line.
638$userData = self::getUserFromLine( $line, $timestampPos );
639if ( $userData === false ) {
640// print "F\t$lineNumber\t$line\n";
641continue;
642} else {
643// print "S\t$lineNumber\n";
644}
645
646list( $signaturePos, $user ) = $userData;
647
648$signature = substr( $line, $signaturePos );
649$output[$user] = $signature;
650}
651
652return $output;
653}
654
655/**
656 * From a line in a wiki page, determine which user, if any,
657 * has signed it.
658 *
659 * @param $line string The line.
660 * @param $timestampPos int The offset of the start of the timestamp.
661 * @return bool|array false for none, Array for success.
662 * - First element is the position of the signature.
663 * - Second element is the normalised user name.
664 */
665static function getUserFromLine( $line, $timestampPos ) {
666global $wgContLang;
667
668// Later entries have a higher precedence
669// @todo FIXME: handle optional whitespace in links
670$languages = array( $wgContLang );
671if ( $wgContLang->getCode() !== 'en' ) {
672$languages[] = Language::factory( 'en' );
673}
674
675$possiblePrefixes = array();
676
677foreach ( $languages as $language ) {
678$nsNames = $language->getNamespaces();
679$possiblePrefixes[] = '[[' . $nsNames[NS_USER] . ':';
680$possiblePrefixes[] = '[[' . $nsNames[NS_USER_TALK] . ':';
681
682$nsAliases = $language->getNamespaceAliases();
683foreach ( $nsAliases as $text => $id ) {
684if ( $id == NS_USER || $id == NS_USER_TALK ) {
685$possiblePrefixes[] = '[[' . $text . ':';
686}
687}
688}
689
690// @todo FIXME: Check aliases too
691$possiblePrefixes[] = '[[' . SpecialPage::getTitleFor( 'Contributions' )->getPrefixedText() . '/';
692
693foreach ( $possiblePrefixes as $prefix ) {
694if ( strpos( $prefix, '_' ) !== false ) {
695$possiblePrefixes[] = str_replace( '_', ' ', $prefix );
696}
697}
698
699$winningUser = false;
700$winningPos = false;
701
702// Look for the leftmost link to the rightmost user
703foreach ( $possiblePrefixes as $prefix ) {
704$output = self::getLinkFromLine( $line, $prefix );
705
706if ( $output === false ) {
707continue;
708} else {
709list( $pos, $user ) = $output;
710}
711
712// Couldn't be a signature
713if ( ( $timestampPos - $pos ) > 255 ) {
714continue;
715}
716
717if (
718$winningPos === false ||
719( $pos > $winningPos && $user !== $winningUser ) ||
720(
721$pos < $winningPos &&
722$user === $winningUser
723)
724) {
725$winningPos = $pos;
726$winningUser = ucfirst( trim( $user ) );
727}
728}
729
730if ( $winningUser === false ) {
731// print "E\tNo winning user\n";
732return false;
733}
734
735return array( $winningPos, $winningUser );
736}
737
738/**
739 * Find the last link beginning with a given prefix on a line.
740 *
741 * @param $line string The line to search.
742 * @param $linkPrefix string The prefix to search for.
743 * @param $failureOffset bool
744 * @return bool false for failure, array for success.
745 * - First element is the string offset of the link.
746 * - Second element is the user the link refers to.
747 */
748static function getLinkFromLine( $line, $linkPrefix, $failureOffset = false ) {
749$offset = 0;
750
751// If extraction failed at another offset, try again.
752if ( $failureOffset !== false ) {
753$offset = $failureOffset - strlen( $line ) - 1;
754}
755
756// Avoid PHP warning: Offset is greater than the length of haystack string
757if ( abs( $offset ) > strlen( $line ) ) {
758return false;
759}
760
761$linkPos = strripos( $line, $linkPrefix, $offset );
762
763if ( $linkPos === false ) {
764// print "I\tNo match for $linkPrefix\n";
765return false;
766}
767
768$linkUser = self::extractUserFromLink( $line, $linkPrefix, $linkPos );
769
770if ( $linkUser === false ) {
771// print "E\tExtraction failed\t$linkPrefix\n";
772// Look for another place.
773return self::getLinkFromLine( $line, $linkPrefix, $linkPos );
774} else {
775return array( $linkPos, $linkUser );
776}
777}
778
779/**
780 * Given text including a link, gives the user that that link refers to
781 *
782 * @param $text string The text to extract from.
783 * @param $prefix string The link prefix that was used to find the link.
784 * @param $offset int Optionally, the offset of the start of the link.
785 * @return bool|string Type description
786 */
787static function extractUserFromLink( $text, $prefix, $offset = 0 ) {
788$userPart = substr( $text, strlen( $prefix ) + $offset );
789
790$userMatches = array();
791if ( !preg_match(
792'/^[^\|\]\#]+/u',
793$userPart,
794$userMatches
795) ) {
796// user link is invalid
797// print "I\tUser link invalid\t$userPart\n";
798// print "E\tCannot find user info to extract\n";
799return false;
800}
801
802$user = $userMatches[0];
803
804if (
805!User::isIP( $user ) &&
806User::getCanonicalName( $user ) === false
807) {
808// Not a real username
809// print "E\tInvalid username\n";
810return false;
811}
812
813return User::getCanonicalName( $userMatches[0], false );
814}
815
816/**
817 * Gets a regular expression fragmentmatching characters that
818 * can appear in a line after the signature.
819 *
820 * @return String regular expression fragment.
821 */
822static function getLineEndingRegex() {
823$ignoredEndings = array(
824'\s*',
825preg_quote( '}' ),
826preg_quote( '{' ),
827'\<[^\>]+\>',
828preg_quote( '{{' ) . '[^}]+' . preg_quote( '}}' ),
829);
830
831$regex = '(?:' . implode( '|', $ignoredEndings ) . ')*';
832
833return $regex;
834}
835
836/**
837 * Gets a regular expression that will match this wiki's
838 * timestamps as given by ~~~~.
839 *
840 * @throws MWException
841 * @return String regular expression fragment.
842 */
843static function getTimestampRegex() {
844if ( self::$timestampRegex !== null ) {
845return self::$timestampRegex;
846}
847
848// Step 1: Get an exemplar timestamp
849$title = Title::newMainPage();
850$user = User::newFromName( 'Test' );
851$options = new ParserOptions;
852
853global $wgParser;
854$exemplarTimestamp =
855$wgParser->preSaveTransform( '~~~~~', $title, $user, $options );
856
857// Step 2: Generalise it
858// Trim off the timezone to replace at the end
859$output = $exemplarTimestamp;
860$tzRegex = '/\s*\(\w+\)\s*$/';
861$tzMatches = array();
862preg_match( $tzRegex, $output, $tzMatches );
863$output = preg_replace( $tzRegex, '', $output );
864$output = preg_quote( $output, '/' );
865$output = preg_replace( '/[^\d\W]+/u', '[^\d\W]+', $output );
866$output = preg_replace( '/\d+/u', '\d+', $output );
867
868$output .= preg_quote( $tzMatches[0] );
869
870if ( !preg_match( "/$output/u", $exemplarTimestamp ) ) {
871throw new MWException( "Timestamp regex does not match exemplar" );
872}
873
874self::$timestampRegex = $output;
875
876return $output;
877}
878
879/**
880 * This function returns plain text snippet, it also removes html tag,
881 * template from text content
882 * @param $text string
883 * @param $length int default 150
884 * @return string
885 */
886static function getTextSnippet( $text, $length = 150 ) {
887global $wgLang;
888
889$text = strip_tags( $text );
890$attempt = 0;
891
892// 10 attempts at most, the logic here is to find the first }} and
893// find the matching {{ for that }}
894while ( $attempt < 10 ) {
895$closeCurPos = strpos( $text, '}}' );
896
897if ( $closeCurPos === false ) {
898break;
899}
900$tempStr = substr( $text, 0, $closeCurPos + 2 );
901
902$openCurPos = strrpos( $tempStr, '{{' );
903if ( $openCurPos === false ) {
904$text = substr_replace( $text, '', $closeCurPos, 2 );
905} else {
906$text = substr_replace( $text, '', $openCurPos, $closeCurPos - $openCurPos + 2 );
907}
908$attempt++;
909}
910
911// See Parser::parse() function, &#160; is replaced specifically, replace it back here
912// with a space as this html entity won't be handled by htmlspecialchars_decode()
913$text = str_replace( '&#160;', ' ', MessageCache::singleton()->parse( $text )->getText() );
914$text = trim( strip_tags( htmlspecialchars_decode( $text ) ) );
915// strip out non-useful data for snippet
916$text = str_replace( array( '{', '}' ), '', $text );
917$text = $wgLang->truncate( $text, $length );
918
919// Return empty string if there is undecoded char left
920if ( strpos( $text, '&#' ) !== false ) {
921$text = '';
922}
923
924return $text;
925}
926}