Module:za-sortkey
- පහත දැක්වෙන උපදෙස්, Module:za-sortkey/documentation හි පිහිටා ඇත. Module:za-sortkey/documentation]]. [සංස්කරණය] Categories were auto-generated by Module:module categorization. [edit]
- ප්රයෝජනවත් සබැඳි: උප පිටු ලැයිස්තුව • සබැඳි • transclusions • testcases • sandbox
This module will sort Zhuang භාෂාව text.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{sortkey}}
.
Within a module, use Module:languages#Language:makeSortKey.
For testcases, see Module:za-sortkey/testcases.
Functions
makeSortKey(text, lang, sc)
- Generates a sortkey for a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the sort fails, returns
nil
.
a | ae | (ə) | b | by | c | d | e | f | g | gv | gy | h | i | k | l | m | mb | (ƃ) | my | n | nd | (ƌ) | ng | (ŋ) | ngv | (ŋv) | ny | o | oe | (ɵ) | p | r | s | t | u | v | w | (ɯ) | y |
A | A₂ | (A₂ₐ) | B | B₂ | C | D | E | F | G | G₂ | G₃ | H | I | K | L | M | M₂ | (M₂ₐ) | M₃ | N | N₂ | (N₂ₐ) | N₃ | (N₃ₐ) | N₄ | (N₄ₐ) | N₅ | O | O₂ | (O₂ₐ) | P | R | S | T | U | V | W | Wₐ | Y |
Note: letters from the old orthography (in brackets) are sorted immediately after their new equivalents.
z | (ƨ) | j | (з) | x | (ч) | q | (ƽ) | h | (ƅ) |
² | (²ᵃ) | ³ | (³ᵃ) | ⁴ | (⁴ᵃ) | ⁵ | (⁵ᵃ) | ⁶ | (⁶ᵃ) |
Note: "h" will sort as H
if used as a consonant, or ⁶
if used as a tone letter.
If a syllable has no tone letter but ends with a consonant, then the following tone values are used:
m | n | ng | (ŋ) | k | p | t | b | d | g |
M¹ | N¹ | N₃¹ | (N₃ₐ¹) | K⁷ | P⁷ | T⁷ | B⁸ | D⁸ | G⁸ |
If new_bor=y
or new_bor=1
are detected as parameters of {{za-pron}}
on the page, then tone 5 is substituted for tone 1 in the sortkey. If {{za-1957 spelling of}}
or {{za-1957 orthography of}}
are detected on the page, then the page for the new orthography is checked for new_bor=y
or new_bor=1
as well.
Examples
[සංස්කරණය]N₂A₂³ DAN₃³ VUEN₃²DA₂⁵ SIEN₃¹ DAN₃³ SIEN¹
- ndaej dangj vuengzdaeq sieng dangj sien
LWG⁸FWN₃²G₃AN₃¹
- lwgfwngzgyang
LAN₃⁶ BIT⁷ RO₂N₃² RA₂M⁴
- langh bit roengz raemx
FAN₃²CWN₃²GAN₃³ᵃ
- Faŋƨcɯŋƨgaŋз
GIEN²N₂AN₃¹CAN₃⁵
- gienzndangcangq
Tone 5 substitution:
GUN₃¹CAN³DAN₃³
- gungcanjdangj
GUN₃¹CAN³DAN₃³ᵃ
- guŋcanзdaŋз(due to the
new_bor=1
parameter on gungcanjdangj)
- bya (
B₂A¹
) - byaz (
B₂A²
) - byaƨ (
B₂A²ᵃ
) - byaj (
B₂A³
) - byaз (
B₂A³ᵃ
) - byax (
B₂A⁴
) - byaч (
B₂A⁴ᵃ
) - byaq (
B₂A⁵
) - byaƽ (
B₂A⁵ᵃ
) - byah (
B₂A⁶
) - byaƅ (
B₂A⁶ᵃ
) - byab (
B₂AB⁸
) - byad (
B₂AD⁸
) - byag (
B₂AG⁸
) - byak (
B₂AK⁷
) - byam (
B₂AM¹
) - byan (
B₂AN¹
) - byang (
B₂AN₃¹
) - byaŋ (
B₂AN₃¹!
) - byap (
B₂AP⁷
) - byat (
B₂AT⁷
)
- a'a (
A¹A¹
) - aba (
A¹BA¹
) - a'ba (
A¹BA¹
) - a'da (
A¹DA¹
) - ada (
A¹DA¹
) - a'ga (
A¹GA¹
) - aga (
A¹GA¹
) - a'ha (
A¹HA¹
) - aha (
A¹HA¹
) - aka (
A¹KA¹
) - a'ma (
A¹MA¹
) - ama (
A¹MA¹
) - a'na (
A¹NA¹
) - ana (
A¹NA¹
) - anga (
A¹N₃A¹
) - apa (
A¹PA¹
) - ata (
A¹TA¹
) - aza (
A²A¹
) - aƨa (
A²A¹!
) - aja (
A³A¹
) - aзa (
A³A¹!
) - axa (
A⁴A¹
) - aчa (
A⁴A¹!
) - aqa (
A⁵A¹
) - aƽa (
A⁵A¹!
) - ah'a (
A⁶A¹
) - aƅa (
A⁶A¹!
) - abza (
AB²A¹
) - abƨa (
AB²A¹!
) - abja (
AB³A¹
) - abзa (
AB³A¹!
) - abxa (
AB⁴A¹
) - abчa (
AB⁴A¹!
) - abqa (
AB⁵A¹
) - abƽa (
AB⁵A¹!
) - abh'a (
AB⁶A¹
) - abƅa (
AB⁶A¹!
) - ab'a (
AB⁸A¹
) - ab'ha (
AB⁸HA¹
) - abha (
AB⁸HA¹
) - adza (
AD²A¹
) - adƨa (
AD²A¹!
) - adja (
AD³A¹
) - adзa (
AD³A¹!
) - adxa (
AD⁴A¹
) - adчa (
AD⁴A¹!
) - adqa (
AD⁵A¹
) - adƽa (
AD⁵A¹!
) - adh'a (
AD⁶A¹
) - adƅa (
AD⁶A¹!
) - ad'a (
AD⁸A¹
) - ad'ha (
AD⁸HA¹
) - adha (
AD⁸HA¹
) - agza (
AG²A¹
) - agƨa (
AG²A¹!
) - agja (
AG³A¹
) - agзa (
AG³A¹!
) - agxa (
AG⁴A¹
) - agчa (
AG⁴A¹!
) - agqa (
AG⁵A¹
) - agƽa (
AG⁵A¹!
) - agƅ (
AG⁶ᵃ
) - agh'a (
AG⁶A¹
) - ag'a (
AG⁸A¹
) - ag'ha (
AG⁸HA¹
) - agha (
AG⁸HA¹
) - akza (
AK²A¹
) - akƨa (
AK²A¹!
) - akja (
AK³A¹
) - akзa (
AK³A¹!
) - akxa (
AK⁴A¹
) - akчa (
AK⁴A¹!
) - akqa (
AK⁵A¹
) - akƽa (
AK⁵A¹!
) - akh'a (
AK⁶A¹
) - akƅa (
AK⁶A¹!
) - akha (
AK⁷HA¹
) - ak'ha (
AK⁷HA¹
) - am'a (
AM¹A¹
) - am'ha (
AM¹HA¹
) - amha (
AM¹HA¹
) - amza (
AM²A¹
) - amƨa (
AM²A¹!
) - amja (
AM³A¹
) - amзa (
AM³A¹!
) - amxa (
AM⁴A¹
) - amчa (
AM⁴A¹!
) - amqa (
AM⁵A¹
) - amƽa (
AM⁵A¹!
) - amh'a (
AM⁶A¹
) - amƅa (
AM⁶A¹!
) - an'a (
AN¹A¹
) - an'ga (
AN¹GA¹
) - an'ha (
AN¹HA¹
) - anha (
AN¹HA¹
) - anza (
AN²A¹
) - anƨa (
AN²A¹!
) - anja (
AN³A¹
) - anзa (
AN³A¹!
) - anxa (
AN⁴A¹
) - anчa (
AN⁴A¹!
) - anqa (
AN⁵A¹
) - anƽa (
AN⁵A¹!
) - anh'a (
AN⁶A¹
) - anƅa (
AN⁶A¹!
) - aŋ (
AN₃¹!
) - ang'a (
AN₃¹A¹
) - ang'ha (
AN₃¹HA¹
) - angha (
AN₃¹HA¹
) - angza (
AN₃²A¹
) - aŋƨa (
AN₃²A¹!
) - angja (
AN₃³A¹
) - aŋзa (
AN₃³A¹!
) - angxa (
AN₃⁴A¹
) - aŋчa (
AN₃⁴A¹!
) - angqa (
AN₃⁵A¹
) - aŋƽa (
AN₃⁵A¹!
) - angh'a (
AN₃⁶A¹
) - aŋƅa (
AN₃⁶A¹!
) - apza (
AP²A¹
) - apƨa (
AP²A¹!
) - apja (
AP³A¹
) - apзa (
AP³A¹!
) - apxa (
AP⁴A¹
) - apчa (
AP⁴A¹!
) - apqa (
AP⁵A¹
) - apƽa (
AP⁵A¹!
) - aph'a (
AP⁶A¹
) - apƅa (
AP⁶A¹!
) - ap'ha (
AP⁷HA¹
) - apha (
AP⁷HA¹
) - atza (
AT²A¹
) - atƨa (
AT²A¹!
) - atja (
AT³A¹
) - atзa (
AT³A¹!
) - atxa (
AT⁴A¹
) - atчa (
AT⁴A¹!
) - atqa (
AT⁵A¹
) - atƽa (
AT⁵A¹!
) - ath'a (
AT⁶A¹
) - atƅa (
AT⁶A¹!
) - at'ha (
AT⁷HA¹
) - atha (
AT⁷HA¹
)
local export = {}
local u = require("Module:string/char")
local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"
local a, b, c, d, e, f, g = u(0xF000), u(0xF001), u(0xF002), u(0xF003)
local b2 = u(0xF100)
local g2, g3 = u(0xF200), u(0xF201)
local m2, m4 = u(0xF300), u(0xF301)
local n2, n4, n6, n7, n8 = u(0xF400), u(0xF401), u(0xF402), u(0xF403), u(0xF404)
local remove_diacritics = "'" -- apostrophe
local oneCharInit = {
["z"] = "2", ["ƨ"] = "2!", ["j"] = "3", ["з"] = "3!", ["x"] = "4", ["ч"] = "4!", ["q"] = "5", ["ƽ"] = "5!", ["ƅ"] = "6!"
}
local twoCharsInit = {
["by"] = b2, ["gv"] = g2, ["gy"] = g3, ["mb"] = m2, ["my"] = m4, ["nd"] = n2, ["ng"] = n4, ["ŋv"] = n7, ["ny"] = n8
}
local threeCharsInit = {
["ngv"] = n6
}
local conditionalTones1 = {
["h"] = "6"
}
local conditionalTones2 = {
["m"] = "m1", ["n"] = "n1", [n4] = n4 .. "1", ["ŋ"] = "ŋ1", ["k"] = "k7", ["p"] = "p7", ["t"] = "t7", ["b"] = "b8", ["d"] = "d8", ["g"] = "g8"
}
local oneCharFinal = {
["ə"] = "a" .. a .. "!", [b2] = "b" .. a, [g2] = "g" .. a, [g3] = "g" .. b, [m2] = "m" .. a, ["ƃ"] = "m" .. a .. "!", [m4] = "m" .. b, [n2] = "n" .. a, ["ƌ"] = "n" .. a .. "!", [n4] = "n" .. b, ["ŋ"] = "n" .. b .. "!", [n6] = "n" .. c, [n7] = "n" .. c .. "!", [n8] = "n" .. d, ["ɵ"] = "o" .. a .. "!", ["ɯ"] = "w!"
}
local twoCharsFinal = {
["ae"] = "a" .. a, ["oe"] = "o" .. a
}
function export.makeSortKey(text, lang, sc)
local origText = text
text = mw.ustring.lower(text)
-- convert any consonant clusters to single characters, which is necessary for later regexes, and unconditional tone letters to numbers
for from, to in pairs(threeCharsInit) do
text = text:gsub(from, to)
end
for from, to in pairs(twoCharsInit) do
text = text:gsub(from, to)
end
text = text:gsub(UTF8_char, oneCharInit)
-- conditionally convert any conditional tone letters to numbers (e.g. "h" can be a consonant or a tone letter)
for from, to in pairs(conditionalTones1) do
text = text:gsub(from .. "$", to)
text = mw.ustring.gsub(text, from .. "([^1-8aeiouwəɵɯ])", to .. "%1")
end
-- conditionally add a tone number to any syllable-final consonants which do not have them
for from, to in pairs(conditionalTones2) do
text = text:gsub(from .. "$", to)
text = mw.ustring.gsub(text, from .. "([^1-8aeiouwəɵɯ])", to .. "%1")
end
-- conditionally add a tone number to any syllable-final vowels which do not have them
text = mw.ustring.gsub(text, "([^1-8%s%p])$", "%11")
text = mw.ustring.gsub(text, "([1-8][" .. a .. "-" .. d .. "])1$", "%1")
text = mw.ustring.gsub(text, "([aeiouwəɵɯ])([^1-8aeiouwəɵɯ][^1-8])", "%11%2")
-- convert clusters and non-ASCII characters to final form, to achieve correct order
for from, to in pairs(twoCharsFinal) do
text = text:gsub(from, to)
end
text = text:gsub(UTF8_char, oneCharFinal)
-- move "!" to the end and remove any duplicates, to ensure old orthography terms are sorted immediately after their new equivalents
for old in text:gmatch("!") do text = text:gsub("(!)(.+)", "%2%1") end
text = text:gsub("!+", "!")
-- if tone 5 is substituted for tone 1 in pronunciation, also substitute in sortkey (i.e. as though "q" were written)
local page = mw.title.new(origText):getContent() or ""
if mw.ustring.match(page, "{{za%-pron|.*new_bor=1}}") or mw.ustring.match(page, "{{za%-pron|.*new_bor=y}}") then
text = mw.ustring.gsub(text, "1", "5")
-- if the page has the old orthography template, then check the modern orthography page and substitute if present there (i.e. as though "ƽ" were written)
elseif mw.ustring.match(page, "{{za%-1957 spelling of|.*}}") then
local parentPage = mw.title.new(mw.ustring.match(page, "{{za%-1957 spelling of|(.-)}}")):getContent() or ""
if mw.ustring.match(parentPage, "{{za%-pron|.*new_bor=1}}") or mw.ustring.match(parentPage, "{{za%-pron|.*new_bor=y}}") then
text = mw.ustring.gsub(text, "1", "5" .. a)
end
elseif mw.ustring.match(page, "{{za%-1957 orthography of|.*}}") then
local parentPage = mw.title.new(mw.ustring.match(page, "{{za%-1957 orthography of|(.-)}}")):getContent() or ""
if mw.ustring.match(parentPage, "{{za%-pron|.*new_bor=1}}") or mw.ustring.match(parentPage, "{{za%-pron|.*new_bor=y}}") then
text = mw.ustring.gsub(text, "1", "5" .. a)
end
end
-- decompose, remove appropriate diacritics, then recompose again
return mw.ustring.upper(mw.ustring.toNFC(mw.ustring.gsub(mw.ustring.toNFD(text), "[" .. remove_diacritics .. "]", "")))
end
local za = require("Module:languages").getByCode("za")
local function tag(text)
return require("Module:script utilities").tag_text(text, za)
end
local showsubst1 = {
["0"] = "⁰", ["1"] = "¹", ["2"] = "²", ["3"] = "³", ["4"] = "⁴", ["5"] = "⁵", ["6"] = "⁶", ["7"] = "⁷", ["8"] = "⁸"
}
local showsubst2 = {
["2!"] = "²ᵃ", ["3!"] = "³ᵃ", ["4!"] = "⁴ᵃ", ["5!"] = "⁵ᵃ", ["6!"] = "⁶ᵃ", ["A" .. a] = "A₂", ["A" .. a .. "!"] = "A₂ₐ", ["B" .. a] = "B₂", ["G" .. a] = "G₂", ["G" .. b] = "G₃", ["M" .. a] = "M₂", ["M" .. a .. "!"] = "M₂ₐ", ["M" .. b] = "M₃", ["N" .. a] = "N₂", ["N" .. a .. "!"] = "N₂ₐ", ["N" .. b] = "N₃", ["N" .. b .. "!"] = "N₃ₐ", ["N" .. c] = "N₄", ["N" .. c .. "!"] = "N₄ₐ", ["N" .. d] = "N₅", ["O" .. a] = "O₂", ["O" .. a .. "!"] = "O₂ₐ", ["W!"] = "Wₐ"
}
function export.showSortkey(frame)
local output = {}
for _, word in ipairs(frame.args) do
local sc = za:findBestScript(word):getCode()
local sortkey = export.makeSortKey(word, "za", sc)
for from, to in pairs(showsubst2) do
sortkey = mw.ustring.gsub(sortkey, from, to)
end
for from, to in pairs(showsubst1) do
sortkey = mw.ustring.gsub(sortkey, from, to)
end
local example = "\n* <code>" .. sortkey .. "</code>\n: " .. tag(word)
table.insert(output, example)
end
return table.concat(output)
end
function export.showSorting(frame)
local terms = {}
for _, term in ipairs(frame.args) do
table.insert(terms, term)
end
local makeSortKey = require("Module:memoize")(export.makeSortKey)
local function comp(term1, term2)
return makeSortKey(term1) < makeSortKey(term2)
end
table.sort(terms, comp)
for i, term in pairs(terms) do
local sc = za:findBestScript(term):getCode()
local sortkey = export.makeSortKey(term, "za", sc)
for from, to in pairs(showsubst2) do
sortkey = mw.ustring.gsub(sortkey, from, to)
end
for from, to in pairs(showsubst1) do
sortkey = mw.ustring.gsub(sortkey, from, to)
end
terms[i] = "\n* " .. tag(term) .. " (<code>" .. sortkey .. "</code>)"
end
return table.concat(terms)
end
return export