Page MenuHomePhabricator
Authored By
MuratKaribay
May 25 2020, 9:39 AM
Size
10 KB
Referenced Files
None
Subscribers
None

KkConverter.php

<?php
require_once( dirname(__FILE__).'/../LanguageConverter.php' );
require_once( dirname(__FILE__).'/LanguageKk_cyrl.php' );
define( 'KK_C_UC', 'АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯ' ); # Kazakh Cyrillic uppercase
define( 'KK_C_LC', 'аәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя' ); # Kazakh Cyrillic lowercase
define( 'KK_L_UC', 'AÄBCÇDEFGĞHİIJKLMNŊOÖPQRSŞTUÜVWXYZ' ); # Kazakh Latin uppercase
define( 'KK_L_LC', 'aäbcçdefgğhiıjklmnŋoöpqrsştuüvwxyz' ); # Kazakh Latin lowercase
define( 'KK_A', 'ءابپتجحدرزسشعفقكلمنڭەوۇۋۆىيچھ' ); # Kazakh Arabic
define( 'HAMZA', 'ء' ); # U+0621 ARABIC LETTER HAMZA
/** Kazakh (Қазақша)
* converter routines
*
* @ingroup Language
*/
class KkConverter extends LanguageConverter{
function __construct($langobj,$maincode,
$variants=array(),
$variantfallbacks=array(),
$flags = array()){
parent::__construct($langobj,$maincode,
$variants,$variantfallbacks,$flags);
$this->loadRegs();
}
function loadDefaultTables(){
$kk2Cyrl = array();
$kk2Latn = array();
$kk2Arab = array();
$this->mTables = array(
'kk-cyrl' => new ReplacementArray($kk2Cyrl),
'kk-latn' => new ReplacementArray($kk2Latn),
'kk-arab' => new ReplacementArray($kk2Arab),
'kk' => new ReplacementArray()
);
}
function loadRegs(){
$this->mCyrl2Latn = array(
'/^И(['.KK_C_UC.']|$)/u' => 'İY$1', '/^И(['.KK_C_LC.']|$)/u' => 'İy$1',
'/^Ю(['.KK_C_UC.']|$)/u' => 'İYW$1', '/^Ю(['.KK_C_LC.']|$)/u' => 'İyw$1',
'/^Я(['.KK_C_UC.']|$)/u' => 'İYA$1', '/^Я(['.KK_C_LC.']|$)/u' => 'İya$1',
'/Ё/u' => 'Ö', '/ё/u' => 'ö',
'/И/u' => 'İY', '/и/u' => 'iy',
'/Ю/u' => 'İYW', '/ю/u' => 'iyw',
'/Я/u' => 'İYA', '/я/u' => 'iya',
'/У/u' => 'Ú', '/у/u' => 'ú',
'/Щ/u' => 'Ş', '/щ/u' => 'ş',
'/[ъЪ]/u' => '', '/[ьЬ]/u' => '',
'/А/u' => 'A', '/а/u' => 'a', '/Ә/u' => 'Ä', '/ә/u' => 'ä',
'/Б/u' => 'B', '/б/u' => 'b', '/В/u' => 'V', '/в/u' => 'v',
'/Г/u' => 'G', '/г/u' => 'g', '/Ғ/u' => 'Ğ', '/ғ/u' => 'ğ',
'/Д/u' => 'D', '/д/u' => 'd', '/Е/u' => 'E', '/е/u' => 'e',
'/Ё/u' => 'Ö', '/ё/u' => 'ö', '/Ж/u' => 'J', '/ж/u' => 'j',
'/З/u' => 'Z', '/з/u' => 'z', '/Й/u' => 'Y', '/й/u' => 'y',
'/К/u' => 'K', '/к/u' => 'k', '/Қ/u' => 'Q', '/қ/u' => 'q',
'/Л/u' => 'L', '/л/u' => 'l', '/М/u' => 'M', '/м/u' => 'm',
'/Н/u' => 'N', '/н/u' => 'n', '/Ң/u' => 'Ŋ', '/ң/u' => 'ŋ',
'/О/u' => 'O', '/о/u' => 'o', '/Ө/u' => 'Ö', '/ө/u' => 'ö',
'/П/u' => 'P', '/п/u' => 'p', '/Р/u' => 'R', '/р/u' => 'r',
'/С/u' => 'S', '/с/u' => 's', '/Т/u' => 'T', '/т/u' => 't',
'/Ұ/u' => 'U', '/ұ/u' => 'u', '/Ү/u' => 'Ü', '/ү/u' => 'ü',
'/Ф/u' => 'F', '/ф/u' => 'f', '/Х/u' => 'H', '/х/u' => 'h',
'/Һ/u' => 'H', '/һ/u' => 'h', '/Ц/u' => 's', '/ц/u' => 's',
'/Ч/u' => 'Ç', '/ч/u' => 'ç', '/Ш/u' => 'С', '/ш/u' => 'с',
'/Щ/u' => 'C', '/щ/u' => 'C', '/Ы/u' => 'I', '/ы/u' => 'ı',
'/І/u' => 'İ', '/і/u' => 'i', '/Э/u' => 'E', '/э/u' => 'e',
//Cyrillic -> Latin for letters И and У
'/ИІ/ui' => 'İYİ', '/иі/ui' => 'iyi',
'/ИЫ/ui' => 'IYI', '/иы/ui' => 'ıyı',
'/ИЯ/ui' => 'İYA', '/ия/ui' => 'iya',
);
$this->mCyrl2Arab = array(
## Punctuation -> Arabic
'/#|№|No\./u' => '؀', # &#x0600;
'/\,/' => '،', # &#x060C;
'/;/' => '؛', # &#x061B;
'/\?/' => '؟', # &#x061F;
'/%/' => '٪', # &#x066A;
'/\*/' => '٭', # &#x066D;
## Cyrillic -> Arabic
'/[еэ]/ui' => 'ە', '/[ъь]/ui' => '',
'/[аә]/ui' => 'ا', '/[оө]/ui' => 'و', '/[ұү]/ui' => 'ۇ', '/[ыі]/ui' => 'ى',
'/[и]/ui' => 'ي', '/ё/ui' => 'يو', '/ю/ui' => 'يۋ', '/я/ui' => 'يا', '/[й]/ui' => 'ي',
'/ц/ui' => 'س', '/щ/ui' => 'ش',
'/һ/ui' => 'ح', '/ч/ui' => 'چ',
'/б/ui' => 'ب', '/в/ui' => 'ۆ', '/г/ui' => 'گ', '/ғ/ui' => 'ع',
'/д/ui' => 'د', '/ж/ui' => 'ج', '/з/ui' => 'ز', '/к/ui' => 'ك',
'/қ/ui' => 'ق', '/л/ui' => 'ل', '/м/ui' => 'م', '/н/ui' => 'ن',
'/ң/ui' => 'ڭ', '/п/ui' => 'پ', '/р/ui' => 'ر', '/с/ui' => 'س',
'/т/ui' => 'ت', '/у/ui' => 'ۋ', '/ф/ui' => 'ف', '/х/ui' => 'ح',
'/ш/ui' => 'ش',
// Cyrillic -> Arabic MOJE UPRAVY - upravil som yya = ya
'/ия/ui' => 'يا',
);
}
function parseManualRule($rule,$flags=array()){
if(in_array('T',$flags)){
return parent::parseManualRule($rule,$flags);
}
// otherwise ignore all formatting
foreach($this->mVariants as $v){
$carray[$v] = $rule;
}
return $carray;
}
/*
* A function wrapper:
* - if there is no selected variant, leave the link
* names as they were
* - do not try to find variants for usernames
*/
function findVariantLink(&$link, &$nt, $ignoreOtherCond = false){
// check for user namespace
if(is_object($nt)){
$ns = $nt->getNamespace();
if($ns==NS_USER || $ns==NS_USER_TALK)
return;
}
$oldlink=$link;
parent::findVariantLink($link, $nt, $ignoreOtherCond);
if($this->getPreferredVariant()==$this->mMainLanguageCode)
$link=$oldlink;
}
/*
* An ugly function wrapper for parsing Image titles
* (to prevent image name conversion)
*/
function autoConvert($text, $toVariant=false){
global $wgTitle;
if(is_object($wgTitle) && $wgTitle->getNameSpace()==NS_FILE){
$imagename = $wgTitle->getNsText();
if(preg_match("/^$imagename:/",$text)) return $text;
}
return parent::autoConvert($text,$toVariant);
}
/**
* It translates text into variant
*/
function translate($text, $toVariant){
global $wgContLanguageCode;
$text = parent::translate($text, $toVariant);
$letters = '';
switch($toVariant){
case 'kk-cyrl':
$letters = KK_L_UC . KK_L_LC . 'ʺʹ#0123456789';
$wgContLanguageCode = 'kk';
break;
case 'kk-latn':
$letters = KK_C_UC . KK_C_LC . '№0123456789';
$wgContLanguageCode = 'kk-Latn';
break;
case 'kk-arab':
$letters = KK_C_UC . KK_C_LC . 'ʺʹ'.*/',;\?%\*№0123456789';
$wgContLanguageCode = 'kk-Arab';
break;
default:
$wgContLanguageCode = 'kk';
return $text;
}
// disable conversion variables like $1, $2...
$varsfix = '\$[0-9]';
$matches = preg_split('/' . $varsfix . '[^' . $letters . ']+/u', $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
$mstart = 0;
$ret = '';
foreach( $matches as $m ){
$ret.= substr($text, $mstart, $m[1]-$mstart);
$ret.= $this->regsConverter($m[0], $toVariant);
$mstart = $m[1] + strlen($m[0]);
}
return $ret;
}
function regsConverter($text, $toVariant){
if ($text == '') return $text;
$pat = array();
$rep = array();
switch($toVariant){
case 'kk-arab':
$letters = KK_C_LC.KK_C_UC/*.KK_L_LC.KK_L_UC*/;
$front = 'әөүіӘӨҮІ'/*.'äöüiÄÖÜİ'*/;
$excludes = 'еэгғкқЕЭГҒКҚ'/*.'egğkqEGĞKQ'*/;
// split text to words
$matches = preg_split( '/[\b\s\-\.:]+/', $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
$mstart = 0;
$ret = '';
foreach($matches as $m){
$ret.= substr($text, $mstart, $m[1] - $mstart);
// is matched the word to front vowels?
// exclude a words matched to е, э, г, к, к, қ,
// them should be without hamza
if(preg_match('/['.$front.']/u', $m[0]) && !preg_match('/['.$excludes.']/u', $m[0])){
$ret .= preg_replace('/['.$letters.']+/u', HAMZA.'$0', $m[0]);
} else{$ret .= $m[0];}
$mstart = $m[1] + strlen($m[0]);
}
$text =& $ret;
foreach($this->mCyrl2Arab as $pat => $rep){
$text = preg_replace($pat, $rep, $text);
}
return $text;
break;
case 'kk-latn':
foreach($this->mCyrl2Latn as $pat => $rep){
$text = preg_replace($pat, $rep, $text);
}
return $text;
break;
default:
return $text;
}
}
/*
* We want our external link captions to be converted in variants,
* so we return the original text instead -{$text}-, except for URLs
*/
function markNoConversion($text, $noParse=false){
if($noParse || preg_match("/^https?:\/\/|ftp:\/\/|irc:\/\//", $text))
return parent::markNoConversion($text);
return $text;
}
function convertCategoryKey($key){
return $this->autoConvert($key, 'kk');
}
}
/**
* class that handles Cyrillic, Latin and Arabic scripts for Kazakh
* right now it only distinguish kk_cyrl, kk_latn, kk_arab.
*
* @ingroup Language
*/
class LanguageKk extends LanguageKk_cyrl{
function __construct(){
global $wgHooks;
parent::__construct();
$variants = array('kk', 'kk-cyrl', 'kk-latn', 'kk-arab');
$variantfallbacks = array(
'kk' => 'kk-cyrl',
'kk-cyrl' => 'kk',
'kk-latn' => 'kk',
'kk-arab' => 'kk',
);
$this->mConverter = new KkConverter($this, 'kk', $variants, $variantfallbacks);
$wgHooks['ArticleSaveComplete'][] = $this->mConverter;
}
/**
* Work around for right-to-left direction support in kk-arab and kk-cn
*
* @return bool
*/
function isRTL(){
$variant = $this->getPreferredVariant();
if($variant == 'kk-arab'){
return true;
} else{return parent::isRTL();}
}
/*
* It fixes issue with ucfirst for transforming 'i' to 'İ'
*
*/
function ucfirst ($string){
$variant = $this->getPreferredVariant();
if (($variant == 'kk-latn') && $string[0] == 'i'){
$string = 'İ' . substr($string, 1);
} else {$string = parent::ucfirst($string);}
return $string;
}
/*
* It fixes issue with lcfirst for transforming 'I' to 'ı'
*
*/
function lcfirst ($string){
$variant = $this->getPreferredVariant();
if (($variant == 'kk-latn') && $string[0] == 'I'){
$string = 'ı' . substr($string, 1);
} else {$string = parent::lcfirst($string);}
return $string;
}
function convertGrammar($word, $case){
wfProfileIn( __METHOD__ );
$variant = $this->getPreferredVariant();
switch ($variant){
case 'kk-arab':
$word = parent::convertGrammarKk_arab($word, $case);
break;
case 'kk-latn':
$word = parent::convertGrammarKk_latn($word, $case);
break;
case 'kk-cyrl':
case 'kk':
default:
$word = parent::convertGrammarKk_cyrl($word, $case);
}
wfProfileOut(__METHOD__);
return $word;
}
}

File Metadata

Mime Type
text/x-php
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
8417657
Default Alt Text
KkConverter.php (10 KB)

Event Timeline