Page MenuHomePhabricator

extensions-diff.txt

Authored By
bzimport
Nov 21 2014, 9:30 PM
Size
30 KB
Referenced Files
None
Subscribers
None

extensions-diff.txt

Index: DidYouMean/DidYouMean.php
===================================================================
--- DidYouMean/DidYouMean.php (revision 0)
+++ DidYouMean/DidYouMean.php (revision 0)
@@ -0,0 +1,399 @@
+<?php
+
+if ( !defined( 'MEDIAWIKI' ) ) {
+ die( 'This file is a MediaWiki extension, it is not a valid entry point' );
+}
+
+require_once( 'DYMNorm.php' );
+
+$wgExtensionCredits['other'][] = array( 'name' => 'DidYouMean', 'author' => 'hippietrail (Andrew Dunbar)' );
+
+# do database lookup from these
+$wgHooks['ArticleNoArticleText'][] = 'wfDymArticleNoArticleText';
+$wgHooks['SpecialSearchNogomatch'][] = 'wfDymSpecialSearchNogomatch';
+
+# db lookup + parse existing {{see}} and add enhanced one with db results
+$wgHooks['ParserBeforeStrip'][] = 'wfDymParserBeforeStrip';
+
+# handle delete
+$wgHooks['ArticleDelete'][] = 'wfDymArticleDelete';
+
+# handle move
+$wgHooks['TitleMoveComplete'][] = 'wfDymTitleMoveComplete';
+
+# handle create / edit
+$wgHooks['AlternateEdit'][] = 'wfDymAlternateEdit';
+$wgHooks['ArticleSaveComplete'][] = 'wfDymArticleSaveComplete';
+
+# handle undelete
+$wgHooks['ArticleUndelete'][] = 'wfDymArticleUndelete';
+
+# set this in LocalSettings.php
+$wgDymUseSeeTemplate = false;
+
+# TODO this is called even when editing a new page
+
+function wfDymArticleNoArticleText( &$article, &$text ) {
+ wfDebug( 'HIPP: ' . __METHOD__ . "\n" );
+
+ $sees = wfDymLookup( 0, $article->getTitle()->getText() );
+
+ sort($sees);
+
+ if (count($sees))
+ $text = build_sees($sees) . $text;
+
+ return true;
+}
+
+# this is called when using the Go/Search box but it is not called when entering
+# a URL for a non-existing article
+
+function wfDymSpecialSearchNogomatch( &$title ) {
+ global $wgOut;
+
+ wfDebug( 'HIPP: ' . __METHOD__ . "\n" );
+
+ $sees = wfDymLookup( 0, $title->getText() );
+
+ sort($sees);
+
+ if (count($sees))
+ $wgOut->addWikiText( build_sees($sees) );
+
+ return true;
+}
+
+# this is called per chunk of wikitext, not per article
+
+function wfDymParserBeforeStrip( &$parser, &$text, &$stripState ) {
+ #wfDebug( 'HIPP: ' . __METHOD__ . "\n" );
+
+ # if revisionid is 0 this is not an article chunk
+ if ($parser->mDymFirstChunk == 'no' || !$parser->getVariableValue('revisionid') || $parser->getVariableValue('namespace'))
+ return true;
+
+ $parser->mDymFirstChunk = 'no';
+
+ $title = $parser->getTitle();
+ $parser->mDymSees = wfDymLookup( $title->getArticleID(), $title->getText() );
+
+ if (preg_match( "/{{[sS]ee\|([^}]*)}}/", $text, $see )) {
+ wfDebug( "HIPP: see Hit\n" );
+ $sees = explode("|", $see[1]);
+ } elseif (preg_match( "/{{[xX]see(\|[^}]*)}}/", $text, $see )) {
+ wfDebug( "HIPP: xsee Hit\n" );
+ preg_match_all( "/\|\[\[([^]|]*)(?:\|([^|]*))?\]\](?: \(([^)]*)\))?/", $see[1], $ma );
+ $sees = $ma[1];
+ } else {
+ wfDebug( "HIPP: (x)see Miss\n" );
+ # there's no {{see}} in this chunk of wikitext
+ # if this is the 1st chunk of the article itself we can put an empty {{see}} there.
+ $text = "{{see|}}\n" . $text;
+ $sees = array();
+ }
+
+ # normalize entities and urlencoding to pure utf-8
+ foreach ($sees as &$value)
+ $value = urldecode(html_entity_decode($value, ENT_QUOTES, 'UTF-8'));
+
+ wfDebug( 'HIPP: Parser: ' . utf8_decode(implode(', ', $sees)) . "\n" );
+ wfDebug( 'HIPP: DBase: ' . utf8_decode(implode(', ', $parser->mDymSees)) . "\n" );
+
+ # add in the stuff from the database lookup
+ $sees = array_unique(array_merge($sees, $parser->mDymSees));
+ sort($sees);
+
+ wfDebug( 'HIPP: Merged: ' . utf8_decode(implode(', ', $sees)) . "\n" );
+
+ # TODO is it better to use $parser->insertStripItem() ?
+
+ if (count($sees))
+ $built_sees = build_sees($sees);
+ else
+ $built_sees = '';
+
+ $text = preg_replace(
+ '/{{[xX]?[sS]ee\|[^}]*}}/',
+ #$built_sees . '<div style="text-decoration:line-through">$0</div>',
+ $built_sees,
+ $text );
+
+ return true;
+}
+
+# turn the array of titles into some wikitext we can add to an article
+
+function build_sees( $sees ) {
+ global $wgDymUseSeeTemplate;
+
+ if ($wgDymUseSeeTemplate == true)
+ return '{{see|' . implode('|', $sees) . '}}';
+ else
+ return '<div>\'\'See also:\'\' \'\'\'[[' . implode(']]\'\'\', \'\'\'[[', $sees) . ']]\'\'\'</div>';
+}
+
+# pass pageid = 0 to lookup by normtitle
+
+function wfDymLookup( $pageid, $title ) {
+ wfDebug( 'HIPP: ' . __METHOD__ . "\n" );
+
+ $sees = array();
+
+ $dbr = wfGetDB( DB_SLAVE );
+
+ if ( $dbr->tableExists( 'dympage' ) && $dbr->tableExists( 'dymnorm' ) ) {
+ $normid = false;
+
+ if ($pageid) {
+ wfDebug( "HIPP: lookup by pageid: $pageid\n" );
+ $normid = $dbr->selectField(
+ array( 'page', 'dympage' ),
+ 'dp_normid',
+ array( 'page_id = dp_pageid', 'page_id' => $pageid )
+ );
+ } else {
+ wfDebug( "HIPP: lookup by normtitle: " . wfDymNormalise($title) . "\n" );
+ $normid = $dbr->selectField(
+ 'dymnorm',
+ 'dn_normid',
+ array( 'dn_normtitle' => wfDymNormalise($title) )
+ );
+ }
+
+ if ($normid) {
+ $res = $dbr->select(
+ /* FROM */ array( 'page', 'dympage' ),
+ /* SELECT */ 'page_title',
+ /* WHERE */ array( 'page_id = dp_pageid', 'dp_normid' => $normid )
+ );
+
+ $nr = $dbr->numRows( $res );
+
+ if ($nr == 0) {
+ wfDebug( "HIPP: DB New Miss\n" );
+ } else {
+ wfDebug( "HIPP: DB New Hit\n" );
+
+ # accumulate the db results
+ while( $o = $dbr->fetchObject( $res ) ) {
+ $t2 = str_replace('_', ' ', $o->page_title);
+ $dbo = utf8_decode($t2);
+ if ($title != $t2) {
+ array_push( $sees, $t2 );
+ $dbo = '++ ' . $dbo;
+ }
+ else
+ $dbo = ' (' . $dbo . ')';
+ wfDebug( "HIPP: $dbo\n" );
+ }
+
+ $dbr->freeResult( $res );
+ }
+ }
+ } else {
+ wfDebug( "HIPP: No dympage or dymnorm table\n" );
+ }
+
+ return $sees;
+}
+
+function wfDymArticleInsertComplete( &$article, &$user, $text, $summary, $isminor, $watchthis, $something ) {
+
+ if ($article->getTitle()->getNamespace() != 0 || $article->isRedirect() == true)
+ return true;
+
+ wfDoInsert( $article->getID(), $article->getTitle()->getText() );
+
+ return true;
+}
+
+function dymArticleUndelete( &$title, &$create ) {
+
+ if ($create == false || $title->getNamespace() != 0)
+ return true;
+
+ # TODO it's not possible to detect if the undeleted article is a redirect!
+ #$artic1e = new Article( $title );
+ #if ($article->isRedirect( $article->getContent() )) {
+ # return true;
+ #}
+
+ doInsert( $title->getArticleId(), $title->getText() );
+
+ return true;
+}
+
+function wfDymArticleDelete( $article, $user, $reason ) {
+
+ if ($article->getTitle()->getNamespace() != 0 || $article->isRedirect() == true)
+ return true;
+
+ wfDoDelete( $article->getID() );
+
+ return true;
+}
+
+function wfDymTitleMoveComplete( &$title, &$nt, &$wgUser, &$pageid, &$redirid ) {
+ $oldtitletext = $title->getText();
+ $oldns = $title->getNamespace();
+ $newtitletext = $nt->getText();
+ $newns = $nt->getNamespace();
+
+ wfDebug( 'HIPP: ' . __METHOD__ . "\n" );
+
+ if ($oldns != 0 && $newns != 0)
+ return true;
+
+ # TODO we can't always check if we're moving a redirect because the old article's content
+ # TODO has already been replaced with the redirect to the new title but a
+ # TODO new title's content is still "noarticletext" at this point!
+ #$a1 = new Article( $title );
+ #$a2 = new Article( $nt );
+ #wfDebug( "HIPP: getContent() for isRedirect()\n\tfrom <<<" . $a1->getContent() . ">>>\n\t to <<<" . $a2->getContent() . ">>>\n" );
+ #if ($a1->isRedirect( $a->getContent() )) {
+ # wfDebug( "HIPP: moving a redirect (?)\n" );
+ # return true;
+ #}
+
+ if ($oldns == 0 && $newns == 0) {
+ wfDoUpdate( $pageid, $newtitletext );
+ } elseif ($oldns == 0) {
+ wfDoDelete( $pageid );
+ } elseif ($newns == 0) {
+ wfDoInsert( $pageid, $newtitletext );
+ }
+
+ return true;
+}
+
+# called at action=edit. can detect if we're about to edit a redirect
+
+function wfDymAlternateEdit( $editpage ) {
+ global $wgParser;
+
+ if ($editpage->mArticle->isRedirect())
+ $wgParser->mDymRedirBeforeEdit = true;
+
+ return 1;
+}
+
+# called at end of action=submit
+
+function wfDymArticleSaveComplete( $article, $user, $text, $summary, $isminor, $dunno1, $dunno2, $flags ) {
+ global $wgParser;
+
+ if ($article->getTitle()->getNamespace() != 0)
+ return true;
+
+ if ($article->isRedirect($text)) {
+ if (!$wgParser->mDymRedirBeforeEdit && !($flags & EDIT_NEW))
+ wfDoDelete( $article->getID() );
+ } else {
+ if ($wgParser->mDymRedirBeforeEdit || $flags & EDIT_NEW)
+ wfDoInsert( $article->getID(), $article->getTitle()->getText() );
+ }
+
+ $wgParser->mDymRedirBeforeEdit = false;
+
+ return true;
+}
+
+function wfDoInsert( $pageid , $title ) {
+ wfDebug( 'HIPP: ' . __METHOD__ . " INSERT\n" );
+ $dbw = wfGetDB( DB_MASTER );
+
+ $norm = wfDymNormalise($title);
+
+ # find or create normid for the new title
+ $normid = $dbw->selectField( 'dymnorm', 'dn_normid', array( 'dn_normtitle' => $norm ) );
+ if ($normid) {
+ wfDebug( "HIPP: old: $title ->\t$norm = $normid\n" );
+ } else {
+ $nsvid = $dbw->nextSequenceValue( 'dymnorm_dn_normid_seq' );
+ $dbw->insert( 'dymnorm', array( 'dn_normid' => $nsvid, 'dn_normtitle' => $norm ) );
+ $normid = $dbw->insertId();
+ wfDebug( "HIPP: NEW: $title ->\t$norm = $normid\n" );
+ }
+ $dbw->insert( 'dympage', array( 'dp_pageid' => $pageid, 'dp_normid' => $normid ) );
+
+ # touch all pages which will now link here
+ $dbw->query( 'UPDATE ' . $dbw->tableName('page') . ',' . $dbw->tableName('dympage') . ' SET page_touched=' . $dbw->addQuotes($dbw->timestamp()) .
+ " WHERE page_id=dp_pageid AND dp_normid=$normid" );
+ #$dbw->update(
+ # array('page', 'dympage'),
+ # array('page_touched' => $dbw->timestamp()),
+ # array('page_id=dp_pageid', 'dp_normid' => $normid),
+ # __METHOD__
+ #);
+}
+
+function wfDoDelete( $pageid ) {
+ wfDebug( 'HIPP: ' . __METHOD__ . " DELETE\n" );
+ $dbw = wfGetDB( DB_MASTER );
+
+ $normid = $dbw->selectField( 'dympage', 'dp_normid', array('dp_pageid' => $pageid) );
+
+ $dbw->delete( 'dympage', array('dp_pageid' => $pageid) );
+
+ $count = $dbw->selectField( 'dympage', 'COUNT(*)', array('dp_normid' => $normid) );
+
+ if ($count == 0)
+ $dbw->delete( 'dymnorm', array('dn_normid' => $normid) );
+
+ # touch all pages which used to link here
+ $dbw->query( 'UPDATE ' . $dbw->tableName('page') . ',' . $dbw->tableName('dympage') . ' SET page_touched=' . $dbw->addQuotes($dbw->timestamp()) .
+ " WHERE page_id=dp_pageid AND dp_normid=$normid" );
+ #$dbw->update(
+ # array('page', 'dympage'),
+ # array('page_touched' => $dbw->timestamp()),
+ # array('page_id=dp_pageid', 'dp_normid' => $normid),
+ # __METHOD__
+ #);
+}
+
+function wfDoUpdate( $pageid, $title ) {
+ wfDebug( 'HIPP: ' . __METHOD__ . " MOVE\n" );
+ $dbw = wfGetDB( DB_MASTER );
+
+ $norm = wfDymNormalise($title);
+
+ $normid = $dbw->selectField( 'dymnorm', 'dn_normid', array( 'dn_normtitle' => $norm ) );
+ if ($normid) {
+ wfDebug( "HIPP: old: $title ->\t$norm = $normid\n" );
+ } else {
+ $nsvid = $dbw->nextSequenceValue( 'dymnorm_dn_normid_seq' );
+ $dbw->insert( 'dymnorm', array( 'dn_normid' => $nsvid, 'dn_normtitle' => $norm ) );
+ $normid = $dbw->insertId();
+ wfDebug( "HIPP: NEW: $title ->\t$norm = $normid\n" );
+ }
+
+ $oldnormid = $dbw->selectField( 'dympage', 'dp_normid', array('dp_pageid' => $pageid) );
+
+ if ($oldnormid != $normid) {
+ $dbw->update( 'dympage', array( 'dp_normid' => $normid ), array( 'dp_pageid' => $pageid ) );
+
+ $count = $dbw->selectField( 'dympage', 'COUNT(*)', array('dp_normid' => $oldnormid) );
+
+ if ($count == 0)
+ $dbw->delete( 'dymnorm', array('dn_normid' => $oldnormid) );
+
+ # touch all pages which linked to the old name or will link to the new one
+ $dbw->query( 'UPDATE ' . $dbw->tableName('page') . ',' . $dbw->tableName('dympage') . ' SET page_touched=' . $dbw->addQuotes($dbw->timestamp()) .
+ " WHERE page_id=dp_pageid AND (dp_normid=$normid OR dp_normid=$oldnormid)" );
+ #$dbw->update(
+ # array('page', 'dympage'),
+ # array('page_touched' => $dbw->timestamp()),
+ # array('page_id=dp_pageid', 'dp_normid' => $normid),
+ # __METHOD__
+ #);
+ #$dbw->update(
+ # array('page', 'dympage'),
+ # array('page_touched' => $dbw->timestamp()),
+ # array('page_id=dp_pageid', 'dp_normid' => $oldnormid),
+ # __METHOD__
+ #);
+ }
+}
+
+?>
Index: DidYouMean/DYMNorm.php
===================================================================
--- DidYouMean/DYMNorm.php (revision 0)
+++ DidYouMean/DYMNorm.php (revision 0)
@@ -0,0 +1,250 @@
+<?php
+
+function wfDymNormalise( $norm ) {
+ # ignore: spaces, hyphens, commas, periods, mid dots
+ $norm = preg_replace( '/[\- _,.·]/u', '', $norm );
+
+ # ignore apostrophe-like characters
+ $norm = preg_replace( '/[\'’`ˊʻʼ]/u', '', $norm );
+
+ # ignore all combinng diacritics
+ $norm = preg_replace( '/[̱̃]/u', '', $norm );
+
+ # latin / roman
+ $norm = preg_replace( '/[AaAaÁáÀàÂâÄäǍǎĂăĀāÃãÅåĄąẤấẢảẠạẬậª]/u', 'A', $norm );
+ $norm = preg_replace( '/[ÆæǼǽǢǣ]/u', 'AE', $norm );
+ $norm = preg_replace( '/[BbBb]/u', 'B', $norm );
+ $norm = preg_replace( '/[CcCcĆćĊċĈĉČčÇç]/u', 'C', $norm );
+ $norm = preg_replace( '/[DdDdĎďĐđÐðƉɖ]/u', 'D', $norm );
+ $norm = preg_replace( '/[EeEeÉéÈèĖėÊêËëĚěĒēẼẽĘęẾếỀềḖḗỂểẸẹỆệƏə]/u', 'E', $norm );
+ $norm = preg_replace( '/[Ɛɛ]/u', 'Ɛ', $norm );
+ $norm = preg_replace( '/[FfFf]/u', 'F', $norm );
+ $norm = preg_replace( '/[GgGgĠġĜĝǦǧĞğĢģǤǥ]/u', 'G', $norm );
+ $norm = preg_replace( '/[HhHhĤĥĦħḤḥ]/u', 'H', $norm );
+ $norm = preg_replace( '/[IiIiÍíÌìİıÎîÏïĬĭĪīĨĩĮįỊị]/u', 'I', $norm );
+ $norm = preg_replace( '/[IJij]/u', 'IJ', $norm );
+ $norm = preg_replace( '/[JjJjĴĵ]/u', 'J', $norm );
+ $norm = preg_replace( '/[KkKkǨǩ]/u', 'K', $norm );
+ $norm = preg_replace( '/[LlLlĹ弾ĻļŁłḺḻ]/u', 'L', $norm );
+ $norm = preg_replace( '/[MmMmḾḿṂṃ]/u', 'M', $norm );
+ $norm = preg_replace( '/[NnNnŃńŇňÑñṆṇŊŋⁿ]/u', 'N', $norm );
+ $norm = preg_replace( '/[OoOoÓóÒòÔôÖöŌōÕõŐőỐốỒồØøǾǿƠơỚớỜờỘộỞở0º]/u', 'O', $norm );
+ $norm = preg_replace( '/[Œœ]/u', 'OE', $norm );
+ $norm = preg_replace( '/[Ɔɔ]/u', 'Ɔ', $norm );
+ $norm = preg_replace( '/[PpPp]/u', 'P', $norm );
+ $norm = preg_replace( '/[QqQq]/u', 'Q', $norm );
+ $norm = preg_replace( '/[RrRrŔŕŘřṞṟṚṛ]/u', 'R', $norm );
+ $norm = preg_replace( '/[SsSsŚśŜŝŠšŞşṢṣ]/u', 'S', $norm );
+ $norm = preg_replace( '/[TtTtŤťŢţṮṯṬṭŦŧ]/u', 'T', $norm );
+ $norm = preg_replace( '/[UuUuÚúÙùÛûÜüŬŭŪūŨũŮůŲųŰűǗǘǛǜǙǚǕǖỦủƯưỤụỨứỪừỮữỬửỬửỰự]/u', 'U', $norm );
+ $norm = preg_replace( '/[VvVv]/u', 'V', $norm );
+ $norm = preg_replace( '/[WwWwẂẃẀẁŴŵẄẅ]/u', 'W', $norm );
+ $norm = preg_replace( '/[XxXx]/u', 'X', $norm );
+ $norm = preg_replace( '/[YyYyÝýỲỳŶŷŸÿỸỹ]/u', 'Y', $norm );
+ $norm = preg_replace( '/[ZzZzŹźŻżŽž]/u', 'Z', $norm );
+ $norm = preg_replace( '/[ÞþǷƿ]/u', 'þ', $norm );
+
+ # greek
+ $norm = preg_replace( '/[ΑαΆάᾶ]/u', 'Α', $norm );
+ $norm = preg_replace( '/[Ββ]/u', 'Β', $norm );
+ $norm = preg_replace( '/[Γγ]/u', 'Γ', $norm );
+ $norm = preg_replace( '/[Δδ]/u', 'Δ', $norm );
+ $norm = preg_replace( '/[ΕεΈέ]/u', 'Ε', $norm );
+ $norm = preg_replace( '/[Ζζ]/u', 'Ζ', $norm );
+ $norm = preg_replace( '/[ΗηΉήῆῆ]/u', 'Η', $norm );
+ $norm = preg_replace( '/[Θθ]/u', 'Θ', $norm );
+ $norm = preg_replace( '/[ΙιΊίΪϊἸἰἼἴἿἷῖ]/u', 'Ι', $norm );
+ $norm = preg_replace( '/[Κκ]/u', 'Κ', $norm );
+ $norm = preg_replace( '/[Λλ]/u', 'Λ', $norm );
+ $norm = preg_replace( '/[Μμ]/u', 'Μ', $norm );
+ $norm = preg_replace( '/[Νν]/u', 'Ν', $norm );
+ $norm = preg_replace( '/[Ξξ]/u', 'Ξ', $norm );
+ $norm = preg_replace( '/[ΟοΌό]/u', 'Ο', $norm );
+ $norm = preg_replace( '/[Ππ]/u', 'Π', $norm );
+ $norm = preg_replace( '/[ΡρῤῤῬῥ]/u', 'Ρ', $norm );
+ $norm = preg_replace( '/[Σσς]/u', 'Σ', $norm );
+ $norm = preg_replace( '/[Ττ]/u', 'Τ', $norm );
+ $norm = preg_replace( '/[ΥυΎύὐὐ]/u', 'Υ', $norm );
+ $norm = preg_replace( '/[Φφ]/u', 'Φ', $norm );
+ $norm = preg_replace( '/[Χχ]/u', 'Χ', $norm );
+ $norm = preg_replace( '/[Ψψ]/u', 'Ψ', $norm );
+ $norm = preg_replace( '/[ΩωΏώῶῶ]/u', 'Ω', $norm );
+
+ # cyrillic
+ $norm = preg_replace( '/[Аа]/u', 'А', $norm );
+ $norm = preg_replace( '/[Бб]/u', 'Б', $norm );
+ $norm = preg_replace( '/[Вв]/u', 'В', $norm );
+ $norm = preg_replace( '/[Г㥴ҒғҔҕ]/u', 'Г', $norm );
+ $norm = preg_replace( '/[Дд]/u', 'Д', $norm );
+ $norm = preg_replace( '/[Ђђ]/u', 'Ђ', $norm );
+ $norm = preg_replace( '/[ЕеЁёӘә]/u', 'Е', $norm );
+ $norm = preg_replace( '/[Жж]/u', 'Ж', $norm );
+ $norm = preg_replace( '/[ЗзЭэЄє]/u', 'З', $norm );
+ $norm = preg_replace( '/[Ѕѕ]/u', 'Ѕ', $norm );
+ $norm = preg_replace( '/[ИиЙй]/u', 'И', $norm );
+ $norm = preg_replace( '/[ІіЇїӀ]/u', 'І', $norm );
+ $norm = preg_replace( '/[Јј]/u', 'Ј', $norm );
+ $norm = preg_replace( '/[Кк]/u', 'К', $norm );
+ $norm = preg_replace( '/[ЛлЉљ]/u', 'Л', $norm );
+ $norm = preg_replace( '/[Мм]/u', 'М', $norm );
+ $norm = preg_replace( '/[НнЊњ]/u', 'Н', $norm );
+ $norm = preg_replace( '/[ОоӨө]/u', 'О', $norm );
+ $norm = preg_replace( '/[Пп]/u', 'П', $norm );
+ $norm = preg_replace( '/[Рр]/u', 'Р', $norm );
+ $norm = preg_replace( '/[Сс]/u', 'С', $norm );
+ $norm = preg_replace( '/[Тт]/u', 'Т', $norm );
+ $norm = preg_replace( '/[ЋћҺһ]/u', 'Ћ', $norm );
+ $norm = preg_replace( '/[УуЎўҮү]/u', 'У', $norm );
+ $norm = preg_replace( '/[Фф]/u', 'Ф', $norm );
+ $norm = preg_replace( '/[Хх]/u', 'Х', $norm );
+ $norm = preg_replace( '/[ЦцЏџ]/u', 'Ц', $norm );
+ $norm = preg_replace( '/[Чч]/u', 'Ч', $norm );
+ $norm = preg_replace( '/[ШшЩщ]/u', 'Ш', $norm );
+ $norm = preg_replace( '/[Ыы]/u', 'Ы', $norm );
+ $norm = preg_replace( '/[ЬьЪъѢѣ]/u', 'Ь', $norm );
+ $norm = preg_replace( '/[Юю]/u', 'Ю', $norm );
+ $norm = preg_replace( '/[Яя]/u', 'Я', $norm );
+
+ # hebrew
+ # strip combining rafe, patah, qamats, dagesh/mapiq, shin dot, sin dot
+ $norm = preg_replace( '/[\x{05bf}\x{05b7}\x{05b8}\x{05bc}\x{05c1}\x{05c2}]/u', '', $norm );
+ $norm = preg_replace( '/־/u', '', $norm );
+ $norm = preg_replace( '/ײ/u', 'יי', $norm );
+ $norm = preg_replace( '/װ/u', 'וו', $norm );
+ $norm = preg_replace( '/[כך]/u', 'כ', $norm );
+ $norm = preg_replace( '/[מם]/u', 'מ', $norm );
+ $norm = preg_replace( '/[נן]/u', 'נ', $norm );
+ $norm = preg_replace( '/[פף]/u', 'פ', $norm );
+ $norm = preg_replace( '/[צץ]/u', 'צ', $norm );
+
+ # arabic
+ # strip tatweel, vowels, shada, sukun
+ $norm = preg_replace( '/[ـ\x{064b}-\x{0652}\x{200c}]/u', '', $norm );
+ $norm = preg_replace( '/[ء]/u', 'ء', $norm );
+ $norm = preg_replace( '/[اآأإٱٲٳٵ]/u', 'ا', $norm );
+ $norm = preg_replace( '/[بٮٻپڀ]/u', 'ب', $norm );
+ $norm = preg_replace( '/[تثٹٺټٽٿ]/u', 'ت', $norm );
+ $norm = preg_replace( '/[حجخځڂڃڄڅچڇڿ]/u', 'ح', $norm );
+ $norm = preg_replace( '/[دذڈډڊڋڌڍڎڏڐ]/u', 'د', $norm );
+ $norm = preg_replace( '/[رزڑڒړڔڕږڗژڙ]/u', 'ر', $norm );
+ $norm = preg_replace( '/[سشښڛڜۺ]/u', 'س', $norm );
+ $norm = preg_replace( '/[صضڝڞۻ]/u', 'ص', $norm );
+ $norm = preg_replace( '/[طظڟ]/u', 'ط', $norm );
+ $norm = preg_replace( '/[عغڠۼع]/u', 'ع', $norm );
+ $norm = preg_replace( '/[فڡڢڣڤڥڦ]/u', 'ف', $norm );
+ $norm = preg_replace( '/[قٯڧڨ]/u', 'ق', $norm );
+ $norm = preg_replace( '/[كکڪګڬڭڮگڰڱڲڳڴ]/u', 'ك', $norm );
+ $norm = preg_replace( '/[لڵڶڷڸ]/u', 'ل', $norm );
+ $norm = preg_replace( '/[م]/u', 'م', $norm );
+ $norm = preg_replace( '/[نڹںڻڼڽ]/u', 'ن', $norm );
+ $norm = preg_replace( '/[هةھۀہۂۃە]/u', 'ه', $norm );
+ $norm = preg_replace( '/[وؤٶٷۄۅۆۇۈۉۊۋۏ]/u', 'و', $norm );
+ $norm = preg_replace( '/[ىئيٸیۍێېۑےۓ]/u', 'ى', $norm );
+
+ # japanese
+ # strip middle dot, prolonged sound mark, circumflex, grave, voice, semivoice, hw voice, hw semivoice
+ $norm = preg_replace( '/[・・ーー^`゛゜゙゚]/u', '', $norm );
+ # strip combining voice, semivoice
+ $norm = preg_replace( '/[\x{3099}\x{309a}]/u', '', $norm );
+ $norm = preg_replace( '/[アァアァ]/u', 'ア', $norm );
+ $norm = preg_replace( '/[イィイィ]/u', 'イ', $norm );
+ $norm = preg_replace( '/[ウヴゥウゥ]/u', 'ウ', $norm );
+ $norm = preg_replace( '/[エェエェ]/u', 'エ', $norm );
+ $norm = preg_replace( '/[オォオォ]/u', 'オ', $norm );
+ $norm = preg_replace( '/[カガヵカ]/u', 'カ', $norm );
+ $norm = preg_replace( '/[キギキ]/u', 'キ', $norm );
+ $norm = preg_replace( '/[クグク]/u', 'ク', $norm );
+ $norm = preg_replace( '/[ケゲヶケ]/u', 'ケ', $norm );
+ $norm = preg_replace( '/[コゴコ]/u', 'コ', $norm );
+ $norm = preg_replace( '/[サザサ]/u', 'サ', $norm );
+ $norm = preg_replace( '/[シジシ]/u', 'シ', $norm );
+ $norm = preg_replace( '/[スズス]/u', 'ス', $norm );
+ $norm = preg_replace( '/[セゼセ]/u', 'セ', $norm );
+ $norm = preg_replace( '/[ソゾソ]/u', 'ソ', $norm );
+ $norm = preg_replace( '/[タダタ]/u', 'タ', $norm );
+ $norm = preg_replace( '/[チヂチ]/u', 'チ', $norm );
+ $norm = preg_replace( '/[ツヅッツッ]/u', 'ツ', $norm );
+ $norm = preg_replace( '/[テデテ]/u', 'テ', $norm );
+ $norm = preg_replace( '/[トドト]/u', 'ト', $norm );
+ $norm = preg_replace( '/[ナナ]/u', 'ナ', $norm );
+ $norm = preg_replace( '/[ニニ]/u', 'ニ', $norm );
+ $norm = preg_replace( '/[ヌヌ]/u', 'ヌ', $norm );
+ $norm = preg_replace( '/[ネネ]/u', 'ネ', $norm );
+ $norm = preg_replace( '/[ノノ]/u', 'ノ', $norm );
+ $norm = preg_replace( '/[ハバパハ]/u', 'ハ', $norm );
+ $norm = preg_replace( '/[ヒビピヒ]/u', 'ヒ', $norm );
+ $norm = preg_replace( '/[フブプフ]/u', 'フ', $norm );
+ $norm = preg_replace( '/[ヘベペヘ]/u', 'ヘ', $norm );
+ $norm = preg_replace( '/[ホボポホ]/u', 'ホ', $norm );
+ $norm = preg_replace( '/[ママ]/u', 'マ', $norm );
+ $norm = preg_replace( '/[ミミ]/u', 'ミ', $norm );
+ $norm = preg_replace( '/[ムム]/u', 'ム', $norm );
+ $norm = preg_replace( '/[メメ]/u', 'メ', $norm );
+ $norm = preg_replace( '/[モモ]/u', 'モ', $norm );
+ $norm = preg_replace( '/[ヤャヤャ]/u', 'ヤ', $norm );
+ $norm = preg_replace( '/[ユュユュ]/u', 'ユ', $norm );
+ $norm = preg_replace( '/[ヨョヨョ]/u', 'ヨ', $norm );
+ $norm = preg_replace( '/[ララ]/u', 'ラ', $norm );
+ $norm = preg_replace( '/[リリ]/u', 'リ', $norm );
+ $norm = preg_replace( '/[ルル]/u', 'ル', $norm );
+ $norm = preg_replace( '/[レレ]/u', 'レ', $norm );
+ $norm = preg_replace( '/[ロロ]/u', 'ロ', $norm );
+ $norm = preg_replace( '/[ワヮヷワ]/u', 'ワ', $norm );
+ $norm = preg_replace( '/[ヰヸ]/u', 'ヰ', $norm );
+ $norm = preg_replace( '/[ヱヹ]/u', 'ヱ', $norm );
+ $norm = preg_replace( '/[ヲヺヲ]/u', 'ヲ', $norm );
+ $norm = preg_replace( '/[ンン]/u', 'ン', $norm );
+
+ $norm = preg_replace( '/[あぁ]/u', 'あ', $norm );
+ $norm = preg_replace( '/[いぃ]/u', 'い', $norm );
+ $norm = preg_replace( '/[うゔぅ]/u', 'う', $norm );
+ $norm = preg_replace( '/[えぇ]/u', 'え', $norm );
+ $norm = preg_replace( '/[おぉ]/u', 'お', $norm );
+ $norm = preg_replace( '/[かがゕ]/u', 'か', $norm );
+ $norm = preg_replace( '/[きぎ]/u', 'き', $norm );
+ $norm = preg_replace( '/[くぐ]/u', 'く', $norm );
+ $norm = preg_replace( '/[けげゖ]/u', 'け', $norm );
+ $norm = preg_replace( '/[こご]/u', 'こ', $norm );
+ $norm = preg_replace( '/[さざ]/u', 'さ', $norm );
+ $norm = preg_replace( '/[しじ]/u', 'し', $norm );
+ $norm = preg_replace( '/[すず]/u', 'す', $norm );
+ $norm = preg_replace( '/[せぜ]/u', 'せ', $norm );
+ $norm = preg_replace( '/[そぞ]/u', 'そ', $norm );
+ $norm = preg_replace( '/[ただ]/u', 'た', $norm );
+ $norm = preg_replace( '/[ちぢ]/u', 'ち', $norm );
+ $norm = preg_replace( '/[つづっ]/u', 'つ', $norm );
+ $norm = preg_replace( '/[てで]/u', 'て', $norm );
+ $norm = preg_replace( '/[とど]/u', 'と', $norm );
+ $norm = preg_replace( '/[な]/u', 'な', $norm );
+ $norm = preg_replace( '/[に]/u', 'に', $norm );
+ $norm = preg_replace( '/[ぬ]/u', 'ぬ', $norm );
+ $norm = preg_replace( '/[ね]/u', 'ね', $norm );
+ $norm = preg_replace( '/[の]/u', 'の', $norm );
+ $norm = preg_replace( '/[はばぱ]/u', 'は', $norm );
+ $norm = preg_replace( '/[ひびぴ]/u', 'ひ', $norm );
+ $norm = preg_replace( '/[ふぶぷ]/u', 'ふ', $norm );
+ $norm = preg_replace( '/[へべぺ]/u', 'へ', $norm );
+ $norm = preg_replace( '/[ほぼぽ]/u', 'ほ', $norm );
+ $norm = preg_replace( '/[ま]/u', 'ま', $norm );
+ $norm = preg_replace( '/[み]/u', 'み', $norm );
+ $norm = preg_replace( '/[む]/u', 'む', $norm );
+ $norm = preg_replace( '/[め]/u', 'め', $norm );
+ $norm = preg_replace( '/[も]/u', 'も', $norm );
+ $norm = preg_replace( '/[やゃ]/u', 'や', $norm );
+ $norm = preg_replace( '/[ゆゅ]/u', 'ゆ', $norm );
+ $norm = preg_replace( '/[よょ]/u', 'よ', $norm );
+ $norm = preg_replace( '/[ら]/u', 'ら', $norm );
+ $norm = preg_replace( '/[り]/u', 'り', $norm );
+ $norm = preg_replace( '/[る]/u', 'る', $norm );
+ $norm = preg_replace( '/[れ]/u', 'れ', $norm );
+ $norm = preg_replace( '/[ろ]/u', 'ろ', $norm );
+ $norm = preg_replace( '/[わゎ]/u', 'わ', $norm );
+ $norm = preg_replace( '/[ゐ]/u', 'ゐ', $norm );
+ $norm = preg_replace( '/[ゑ]/u', 'ゑ', $norm );
+ $norm = preg_replace( '/[を]/u', 'を', $norm );
+ $norm = preg_replace( '/[ん]/u', 'ん', $norm );
+
+ return $norm;
+}
+
+?>
Index: DidYouMean/didyoumean.sql
===================================================================
--- DidYouMean/didyoumean.sql (revision 0)
+++ DidYouMean/didyoumean.sql (revision 0)
@@ -0,0 +1,15 @@
+CREATE TABLE /*$wgDBprefix*/dympage (
+ `dp_pageid` int(8) NOT NULL,
+ `dp_normid` int(8) NOT NULL,
+ PRIMARY KEY (`dp_pageid`),
+ UNIQUE KEY `dp_pageid` (`dp_pageid`),
+ KEY `dp_normid` (`dp_normid`)
+);
+
+CREATE TABLE /*$wgDBprefix*/dymnorm (
+ `dn_normid` int(8) AUTO_INCREMENT,
+ `dn_normtitle` varchar(255) binary NOT NULL,
+ PRIMARY KEY (`dn_normid`),
+ UNIQUE KEY `dn_normid` (`dn_normid`),
+ UNIQUE KEY `dn_normtitle` (`dn_normtitle`)
+);
Index: DidYouMean/install.php
===================================================================
--- DidYouMean/install.php (revision 0)
+++ DidYouMean/install.php (revision 0)
@@ -0,0 +1,99 @@
+<?php
+
+/**
+ * Installation script for the DidYouMean extension
+ *
+ * @addtogroup Extensions
+ * @author Andrew Dunbar <hippytrail@gmail.com>
+ * @copyright © 2007 Andrew Dunbar
+ * @licence Copyright holder allows use of the code for any purpose
+ */
+
+# We're going to have to assume we're running from one of two places
+## extensions/install.php (bad setup!)
+## extensions/DidYouMean/install.php (the dir name doesn't even matter)
+$maint = dirname( dirname( __FILE__ ) ) . '/maintenance';
+if( is_file( $maint . '/commandLine.inc' ) ) {
+ require_once( $maint . '/commandLine.inc' );
+} else {
+ $maint = dirname( dirname( dirname( __FILE__ ) ) ) . '/maintenance';
+ if( is_file( $maint . '/commandLine.inc' ) ) {
+ require_once( $maint . '/commandLine.inc' );
+ } else {
+ # We can't find it, give up
+ echo( "The installation script was unable to find the maintenance directories.\n\n" );
+ die( 1 );
+ }
+}
+
+# Set up some other paths
+$sql = dirname( __FILE__ ) . '/didyoumean.sql';
+
+# Whine if we don't have appropriate credentials to hand
+if( !isset( $wgDBadminuser ) || !isset( $wgDBadminpassword ) ) {
+ echo( "No superuser credentials could be found. Please provide the details\n" );
+ echo( "of a user with appropriate permissions to update the database. See\n" );
+ echo( "AdminSettings.sample for more details.\n\n" );
+ die( 1 );
+}
+
+# Get a connection
+$dbclass = $wgDBtype == 'MySql'
+ ? 'Database'
+ : 'Database' . ucfirst( strtolower( $wgDBtype ) );
+$dbc = new $dbclass;
+$dba = $dbc->newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname, 1 );
+
+# Check we're connected
+if( !$dba->isOpen() ) {
+ echo( "A connection to the database could not be established.\n\n" );
+ die( 1 );
+}
+
+# Do nothing if the tables exist
+if( !$dba->tableExists( 'dympage' ) || !$dba->tableExists( 'dymnorm' ) ) {
+ echo( "Sourcing: $sql\n" );
+ $res = $dba->sourceFile( $sql );
+ echo( "Result: $res\n" );
+ if( $res ) {
+ echo( "The tables have been set up correctly.\n" );
+
+ require_once( 'DYMNorm.php' );
+
+ $result = $dba->select(
+ 'page',
+ array ( 'page_title', 'page_id' ),
+ array (
+ 'page_namespace=0',
+ 'page_is_redirect=0'
+ )
+ );
+
+ while( $row = $dba->fetchObject( $result ) ) {
+ #echo "$row->page_title\n";
+
+ $norm = wfDymNormalise($row->page_title);
+
+ # *new* table using numeric columns where possible
+ $theid = $dba->selectField( 'dymnorm', 'dn_normid', array( 'dn_normtitle' => $norm ) );
+ if ($theid) {
+ echo( "old: $row->page_title ->\t$norm = $theid\n" );
+ } else {
+ $normid = $dba->nextSequenceValue( 'dymnorm_dn_normid_seq' );
+ $dba->insert( 'dymnorm', array( 'dn_normid' => $normid, 'dn_normtitle' => $norm ) );
+ $theid = $dba->insertId();
+ echo( "NEW: $row->page_title ->\t$norm = $theid\n" );
+ }
+ $dba->insert( 'dympage', array( 'dp_pageid' => $row->page_id, 'dp_normid' => $theid ) );
+ }
+ $dba->freeResult( $result );
+ }
+} else {
+ echo( "The tables already exist. No action was taken.\n" );
+}
+
+# Close the connection
+$dba->close();
+echo( "\n" );
+
+?>

File Metadata

Mime Type
text/x-diff
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3051
Default Alt Text
extensions-diff.txt (30 KB)

Event Timeline