Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F31839892
KkConverter.php
MuratKaribay (MuratKaribay)
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Authored By
MuratKaribay
May 25 2020, 9:39 AM
2020-05-25 09:39:32 (UTC+0)
Size
10 KB
Referenced Files
None
Subscribers
None
KkConverter.php
View Options
<?php
require_once
(
dirname
(
__FILE__
).
'/../LanguageConverter.php'
);
require_once
(
dirname
(
__FILE__
).
'/LanguageKk_cyrl.php'
);
define
(
'KK_C_UC'
,
'АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯ'
);
# Kazakh Cyrillic uppercase
define
(
'KK_C_LC'
,
'аәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя'
);
# Kazakh Cyrillic lowercase
define
(
'KK_L_UC'
,
'AÄBCÇDEFGĞHİIJKLMNŊOÖPQRSŞTUÜVWXYZ'
);
# Kazakh Latin uppercase
define
(
'KK_L_LC'
,
'aäbcçdefgğhiıjklmnŋoöpqrsştuüvwxyz'
);
# Kazakh Latin lowercase
define
(
'KK_A'
,
'ءابپتجحدرزسشعفقكلمنڭەوۇۋۆىيچھ'
);
# Kazakh Arabic
define
(
'HAMZA'
,
'ء'
);
# U+0621 ARABIC LETTER HAMZA
/** Kazakh (Қазақша)
* converter routines
*
* @ingroup Language
*/
class
KkConverter
extends
LanguageConverter
{
function
__construct
(
$langobj
,
$maincode
,
$variants
=
array
(),
$variantfallbacks
=
array
(),
$flags
=
array
()){
parent
::
__construct
(
$langobj
,
$maincode
,
$variants
,
$variantfallbacks
,
$flags
);
$this
->
loadRegs
();
}
function
loadDefaultTables
(){
$kk2Cyrl
=
array
();
$kk2Latn
=
array
();
$kk2Arab
=
array
();
$this
->
mTables
=
array
(
'kk-cyrl'
=>
new
ReplacementArray
(
$kk2Cyrl
),
'kk-latn'
=>
new
ReplacementArray
(
$kk2Latn
),
'kk-arab'
=>
new
ReplacementArray
(
$kk2Arab
),
'kk'
=>
new
ReplacementArray
()
);
}
function
loadRegs
(){
$this
->
mCyrl2Latn
=
array
(
'/^И(['
.
KK_C_UC
.
']|$)/u'
=>
'İY$1'
,
'/^И(['
.
KK_C_LC
.
']|$)/u'
=>
'İy$1'
,
'/^Ю(['
.
KK_C_UC
.
']|$)/u'
=>
'İYW$1'
,
'/^Ю(['
.
KK_C_LC
.
']|$)/u'
=>
'İyw$1'
,
'/^Я(['
.
KK_C_UC
.
']|$)/u'
=>
'İYA$1'
,
'/^Я(['
.
KK_C_LC
.
']|$)/u'
=>
'İya$1'
,
'/Ё/u'
=>
'Ö'
,
'/ё/u'
=>
'ö'
,
'/И/u'
=>
'İY'
,
'/и/u'
=>
'iy'
,
'/Ю/u'
=>
'İYW'
,
'/ю/u'
=>
'iyw'
,
'/Я/u'
=>
'İYA'
,
'/я/u'
=>
'iya'
,
'/У/u'
=>
'Ú'
,
'/у/u'
=>
'ú'
,
'/Щ/u'
=>
'Ş'
,
'/щ/u'
=>
'ş'
,
'/[ъЪ]/u'
=>
''
,
'/[ьЬ]/u'
=>
''
,
'/А/u'
=>
'A'
,
'/а/u'
=>
'a'
,
'/Ә/u'
=>
'Ä'
,
'/ә/u'
=>
'ä'
,
'/Б/u'
=>
'B'
,
'/б/u'
=>
'b'
,
'/В/u'
=>
'V'
,
'/в/u'
=>
'v'
,
'/Г/u'
=>
'G'
,
'/г/u'
=>
'g'
,
'/Ғ/u'
=>
'Ğ'
,
'/ғ/u'
=>
'ğ'
,
'/Д/u'
=>
'D'
,
'/д/u'
=>
'd'
,
'/Е/u'
=>
'E'
,
'/е/u'
=>
'e'
,
'/Ё/u'
=>
'Ö'
,
'/ё/u'
=>
'ö'
,
'/Ж/u'
=>
'J'
,
'/ж/u'
=>
'j'
,
'/З/u'
=>
'Z'
,
'/з/u'
=>
'z'
,
'/Й/u'
=>
'Y'
,
'/й/u'
=>
'y'
,
'/К/u'
=>
'K'
,
'/к/u'
=>
'k'
,
'/Қ/u'
=>
'Q'
,
'/қ/u'
=>
'q'
,
'/Л/u'
=>
'L'
,
'/л/u'
=>
'l'
,
'/М/u'
=>
'M'
,
'/м/u'
=>
'm'
,
'/Н/u'
=>
'N'
,
'/н/u'
=>
'n'
,
'/Ң/u'
=>
'Ŋ'
,
'/ң/u'
=>
'ŋ'
,
'/О/u'
=>
'O'
,
'/о/u'
=>
'o'
,
'/Ө/u'
=>
'Ö'
,
'/ө/u'
=>
'ö'
,
'/П/u'
=>
'P'
,
'/п/u'
=>
'p'
,
'/Р/u'
=>
'R'
,
'/р/u'
=>
'r'
,
'/С/u'
=>
'S'
,
'/с/u'
=>
's'
,
'/Т/u'
=>
'T'
,
'/т/u'
=>
't'
,
'/Ұ/u'
=>
'U'
,
'/ұ/u'
=>
'u'
,
'/Ү/u'
=>
'Ü'
,
'/ү/u'
=>
'ü'
,
'/Ф/u'
=>
'F'
,
'/ф/u'
=>
'f'
,
'/Х/u'
=>
'H'
,
'/х/u'
=>
'h'
,
'/Һ/u'
=>
'H'
,
'/һ/u'
=>
'h'
,
'/Ц/u'
=>
's'
,
'/ц/u'
=>
's'
,
'/Ч/u'
=>
'Ç'
,
'/ч/u'
=>
'ç'
,
'/Ш/u'
=>
'С'
,
'/ш/u'
=>
'с'
,
'/Щ/u'
=>
'C'
,
'/щ/u'
=>
'C'
,
'/Ы/u'
=>
'I'
,
'/ы/u'
=>
'ı'
,
'/І/u'
=>
'İ'
,
'/і/u'
=>
'i'
,
'/Э/u'
=>
'E'
,
'/э/u'
=>
'e'
,
//Cyrillic -> Latin for letters И and У
'/ИІ/ui'
=>
'İYİ'
,
'/иі/ui'
=>
'iyi'
,
'/ИЫ/ui'
=>
'IYI'
,
'/иы/ui'
=>
'ıyı'
,
'/ИЯ/ui'
=>
'İYA'
,
'/ия/ui'
=>
'iya'
,
);
$this
->
mCyrl2Arab
=
array
(
## Punctuation -> Arabic
'/#|№|No
\.
/u'
=>
''
,
# ؀
'/
\,
/'
=>
'،'
,
# ،
'/;/'
=>
'؛'
,
# ؛
'/
\?
/'
=>
'؟'
,
# ؟
'/%/'
=>
'٪'
,
# ٪
'/
\*
/'
=>
'٭'
,
# ٭
## Cyrillic -> Arabic
'/[еэ]/ui'
=>
'ە'
,
'/[ъь]/ui'
=>
''
,
'/[аә]/ui'
=>
'ا'
,
'/[оө]/ui'
=>
'و'
,
'/[ұү]/ui'
=>
'ۇ'
,
'/[ыі]/ui'
=>
'ى'
,
'/[и]/ui'
=>
'ي'
,
'/ё/ui'
=>
'يو'
,
'/ю/ui'
=>
'يۋ'
,
'/я/ui'
=>
'يا'
,
'/[й]/ui'
=>
'ي'
,
'/ц/ui'
=>
'س'
,
'/щ/ui'
=>
'ش'
,
'/һ/ui'
=>
'ح'
,
'/ч/ui'
=>
'چ'
,
'/б/ui'
=>
'ب'
,
'/в/ui'
=>
'ۆ'
,
'/г/ui'
=>
'گ'
,
'/ғ/ui'
=>
'ع'
,
'/д/ui'
=>
'د'
,
'/ж/ui'
=>
'ج'
,
'/з/ui'
=>
'ز'
,
'/к/ui'
=>
'ك'
,
'/қ/ui'
=>
'ق'
,
'/л/ui'
=>
'ل'
,
'/м/ui'
=>
'م'
,
'/н/ui'
=>
'ن'
,
'/ң/ui'
=>
'ڭ'
,
'/п/ui'
=>
'پ'
,
'/р/ui'
=>
'ر'
,
'/с/ui'
=>
'س'
,
'/т/ui'
=>
'ت'
,
'/у/ui'
=>
'ۋ'
,
'/ф/ui'
=>
'ف'
,
'/х/ui'
=>
'ح'
,
'/ш/ui'
=>
'ش'
,
// Cyrillic -> Arabic MOJE UPRAVY - upravil som yya = ya
'/ия/ui'
=>
'يا'
,
);
}
function
parseManualRule
(
$rule
,
$flags
=
array
()){
if
(
in_array
(
'T'
,
$flags
)){
return
parent
::
parseManualRule
(
$rule
,
$flags
);
}
// otherwise ignore all formatting
foreach
(
$this
->
mVariants
as
$v
){
$carray
[
$v
]
=
$rule
;
}
return
$carray
;
}
/*
* A function wrapper:
* - if there is no selected variant, leave the link
* names as they were
* - do not try to find variants for usernames
*/
function
findVariantLink
(&
$link
,
&
$nt
,
$ignoreOtherCond
=
false
){
// check for user namespace
if
(
is_object
(
$nt
)){
$ns
=
$nt
->
getNamespace
();
if
(
$ns
==
NS_USER
||
$ns
==
NS_USER_TALK
)
return
;
}
$oldlink
=
$link
;
parent
::
findVariantLink
(
$link
,
$nt
,
$ignoreOtherCond
);
if
(
$this
->
getPreferredVariant
()==
$this
->
mMainLanguageCode
)
$link
=
$oldlink
;
}
/*
* An ugly function wrapper for parsing Image titles
* (to prevent image name conversion)
*/
function
autoConvert
(
$text
,
$toVariant
=
false
){
global
$wgTitle
;
if
(
is_object
(
$wgTitle
)
&&
$wgTitle
->
getNameSpace
()==
NS_FILE
){
$imagename
=
$wgTitle
->
getNsText
();
if
(
preg_match
(
"/^$imagename:/"
,
$text
))
return
$text
;
}
return
parent
::
autoConvert
(
$text
,
$toVariant
);
}
/**
* It translates text into variant
*/
function
translate
(
$text
,
$toVariant
){
global
$wgContLanguageCode
;
$text
=
parent
::
translate
(
$text
,
$toVariant
);
$letters
=
''
;
switch
(
$toVariant
){
case
'kk-cyrl'
:
$letters
=
KK_L_UC
.
KK_L_LC
.
'ʺʹ#0123456789'
;
$wgContLanguageCode
=
'kk'
;
break
;
case
'kk-latn'
:
$letters
=
KK_C_UC
.
KK_C_LC
.
'№0123456789'
;
$wgContLanguageCode
=
'kk-Latn'
;
break
;
case
'kk-arab'
:
$letters
=
KK_C_UC
.
KK_C_LC
.
'ʺʹ'
.*/
',;
\?
%
\*
№0123456789'
;
$wgContLanguageCode
=
'kk-Arab'
;
break
;
default
:
$wgContLanguageCode
=
'kk'
;
return
$text
;
}
// disable conversion variables like $1, $2...
$varsfix
=
'
\$
[0-9]'
;
$matches
=
preg_split
(
'/'
.
$varsfix
.
'[^'
.
$letters
.
']+/u'
,
$text
,
-
1
,
PREG_SPLIT_OFFSET_CAPTURE
);
$mstart
=
0
;
$ret
=
''
;
foreach
(
$matches
as
$m
){
$ret
.=
substr
(
$text
,
$mstart
,
$m
[
1
]-
$mstart
);
$ret
.=
$this
->
regsConverter
(
$m
[
0
],
$toVariant
);
$mstart
=
$m
[
1
]
+
strlen
(
$m
[
0
]);
}
return
$ret
;
}
function
regsConverter
(
$text
,
$toVariant
){
if
(
$text
==
''
)
return
$text
;
$pat
=
array
();
$rep
=
array
();
switch
(
$toVariant
){
case
'kk-arab'
:
$letters
=
KK_C_LC
.
KK_C_UC
/*.KK_L_LC.KK_L_UC*/
;
$front
=
'әөүіӘӨҮІ'
/*.'äöüiÄÖÜİ'*/
;
$excludes
=
'еэгғкқЕЭГҒКҚ'
/*.'egğkqEGĞKQ'*/
;
// split text to words
$matches
=
preg_split
(
'/[
\b\s\-\.
:]+/'
,
$text
,
-
1
,
PREG_SPLIT_OFFSET_CAPTURE
);
$mstart
=
0
;
$ret
=
''
;
foreach
(
$matches
as
$m
){
$ret
.=
substr
(
$text
,
$mstart
,
$m
[
1
]
-
$mstart
);
// is matched the word to front vowels?
// exclude a words matched to е, э, г, к, к, қ,
// them should be without hamza
if
(
preg_match
(
'/['
.
$front
.
']/u'
,
$m
[
0
])
&&
!
preg_match
(
'/['
.
$excludes
.
']/u'
,
$m
[
0
])){
$ret
.=
preg_replace
(
'/['
.
$letters
.
']+/u'
,
HAMZA
.
'$0'
,
$m
[
0
]);
}
else
{
$ret
.=
$m
[
0
];}
$mstart
=
$m
[
1
]
+
strlen
(
$m
[
0
]);
}
$text
=&
$ret
;
foreach
(
$this
->
mCyrl2Arab
as
$pat
=>
$rep
){
$text
=
preg_replace
(
$pat
,
$rep
,
$text
);
}
return
$text
;
break
;
case
'kk-latn'
:
foreach
(
$this
->
mCyrl2Latn
as
$pat
=>
$rep
){
$text
=
preg_replace
(
$pat
,
$rep
,
$text
);
}
return
$text
;
break
;
default
:
return
$text
;
}
}
/*
* We want our external link captions to be converted in variants,
* so we return the original text instead -{$text}-, except for URLs
*/
function
markNoConversion
(
$text
,
$noParse
=
false
){
if
(
$noParse
||
preg_match
(
"/^https?:
\/\/
|ftp:
\/\/
|irc:
\/\/
/"
,
$text
))
return
parent
::
markNoConversion
(
$text
);
return
$text
;
}
function
convertCategoryKey
(
$key
){
return
$this
->
autoConvert
(
$key
,
'kk'
);
}
}
/**
* class that handles Cyrillic, Latin and Arabic scripts for Kazakh
* right now it only distinguish kk_cyrl, kk_latn, kk_arab.
*
* @ingroup Language
*/
class
LanguageKk
extends
LanguageKk_cyrl
{
function
__construct
(){
global
$wgHooks
;
parent
::
__construct
();
$variants
=
array
(
'kk'
,
'kk-cyrl'
,
'kk-latn'
,
'kk-arab'
);
$variantfallbacks
=
array
(
'kk'
=>
'kk-cyrl'
,
'kk-cyrl'
=>
'kk'
,
'kk-latn'
=>
'kk'
,
'kk-arab'
=>
'kk'
,
);
$this
->
mConverter
=
new
KkConverter
(
$this
,
'kk'
,
$variants
,
$variantfallbacks
);
$wgHooks
[
'ArticleSaveComplete'
][]
=
$this
->
mConverter
;
}
/**
* Work around for right-to-left direction support in kk-arab and kk-cn
*
* @return bool
*/
function
isRTL
(){
$variant
=
$this
->
getPreferredVariant
();
if
(
$variant
==
'kk-arab'
){
return
true
;
}
else
{
return
parent
::
isRTL
();}
}
/*
* It fixes issue with ucfirst for transforming 'i' to 'İ'
*
*/
function
ucfirst
(
$string
){
$variant
=
$this
->
getPreferredVariant
();
if
((
$variant
==
'kk-latn'
)
&&
$string
[
0
]
==
'i'
){
$string
=
'İ'
.
substr
(
$string
,
1
);
}
else
{
$string
=
parent
::
ucfirst
(
$string
);}
return
$string
;
}
/*
* It fixes issue with lcfirst for transforming 'I' to 'ı'
*
*/
function
lcfirst
(
$string
){
$variant
=
$this
->
getPreferredVariant
();
if
((
$variant
==
'kk-latn'
)
&&
$string
[
0
]
==
'I'
){
$string
=
'ı'
.
substr
(
$string
,
1
);
}
else
{
$string
=
parent
::
lcfirst
(
$string
);}
return
$string
;
}
function
convertGrammar
(
$word
,
$case
){
wfProfileIn
(
__METHOD__
);
$variant
=
$this
->
getPreferredVariant
();
switch
(
$variant
){
case
'kk-arab'
:
$word
=
parent
::
convertGrammarKk_arab
(
$word
,
$case
);
break
;
case
'kk-latn'
:
$word
=
parent
::
convertGrammarKk_latn
(
$word
,
$case
);
break
;
case
'kk-cyrl'
:
case
'kk'
:
default
:
$word
=
parent
::
convertGrammarKk_cyrl
(
$word
,
$case
);
}
wfProfileOut
(
__METHOD__
);
return
$word
;
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
8417657
Default Alt Text
KkConverter.php (10 KB)
Attached To
Mode
T250604: Remove language variants kk-cn, kk-kz, kk-tr from the Kazakh language converter
Attached
Detach File
Event Timeline
Log In to Comment