local tests = require('Module:UnitTests')
local m_typing = require('Module:typing-aids')
local get_by_code = require('Module:languages').getByCode
local decompose = mw.ustring.toNFD
local langs = {}
local tag_funcs = {}
-- Assumes one script per language.
local function tag_gen(test_text, langCode)
local func = tag_funcs[langCode]
if func then
return func
else
if not langs[langCode] then
langs[langCode] = get_by_code(langCode) or error('The language code ' .. langCode .. ' is invalid.')
end
local scCode = langs[langCode]:findBestScript(test_text):getCode() or
error('No script could be found for the text ' .. test_text .. ' and the language code ' .. langCode .. '.')
local before, after = '<span class="' .. scCode .. '" lang="' .. langCode .. '">', '</span>'
function func(text)
return before .. text .. after
end
tag_funcs[langCode] = func
return func
end
end
local options_cache = {}
function tests:check_output(code, expected, lang, transliteration, sc)
local result
if lang then
result = m_typing.replace{ lang, code, sc = sc }
else
result = m_typing.replace{code, sc = sc}
end
result = decompose(result)
expected = decompose(expected)
local options = options_cache[lang]
if not options and lang and not transliteration then
options = { display = tag_gen(result, lang) }
options_cache[lang] = options
end
self:equals(
code,
result,
expected,
options
)
end
function tests:do_tests(examples, lang, sc)
local transliteration = lang ~= nil and lang:find("%-tr$") ~= nil
for _, example in ipairs(examples) do
if #example == 3 and not transliteration then
self:check_output(example[1], example[3], lang, nil, sc)
if example[2] ~= example[1] then
self:check_output(example[2], example[3], lang, nil, sc)
end
else
self:check_output(example[1], example[2], lang, transliteration, sc)
end
end
end
function tests:test_all()
local examples = {
{ "*dye_'ws", "*dyḗws" },
{ "*n0mr0to's", "*n̥mr̥tós" },
{ "*tk'e'yti", "*tḱéyti" },
{ "*h1es-", "*h₁es-" },
{ "*t_ep-e'h1(ye)-ti", "*tₔp-éh₁(ye)-ti" },
{ "*h1e'k'wos", "*h₁éḱwos" },
{ "*bhebho'ydhe", "*bʰebʰóydʰe" },
{ "*dh3to's", "*dh₃tós" },
{ "*t'a_ko^`", "*þākǫ̂" },
{ "*T'eudo_balt'az", "*Þeudōbalþaz" },
{ "*bo_kijo_`", "*bōkijǭ" },
{ "*tat^t^o_", "*taťťō" },
{ "*d^o_'yyon", "*ďṓyyon" },
}
self:do_tests(examples)
end
local ae_examples = {
{ "ap", "ap", "𐬀𐬞" },
{ "xs.^uuas^", "xṣ̌uuaš", "𐬑𐬴𐬎𐬎𐬀𐬱" },
{ "v@hrka_na", "vəhrkāna", "𐬬𐬆𐬵𐬭𐬐𐬁𐬥𐬀" },
{ "nae_za", "naēza", "𐬥𐬀𐬉𐬰𐬀" },
{ "zaaO", "zā̊", "𐬰𐬃"},
{ "hizwaO", "hizuuå", "𐬵𐬌𐬰𐬎𐬎𐬂"},
}
function tests:test_Avestan()
self:do_tests(ae_examples, "ae")
end
function tests:test_Avestan_tr()
self:do_tests(ae_examples, "ae-tr")
end
function tests:test_Akkadian()
local examples = {
{ "ša", "𒊭" },
-- { "transliteration", "result" },
}
self:do_tests(examples, "akk")
end
local hy_examples = {
{ "azgaynac`um", "azgaynacʿum", "ազգայնացում" },
{ "terew", "terew", "տերև" },
{ "burz^uazia", "buržuazia", "բուրժուազիա" },
{ "kol_mnaki", "kołmnaki", "կողմնակի" },
}
function tests:test_Armenian()
self:do_tests(hy_examples, "hy")
end
function tests:test_Armenian_tr()
self:do_tests(hy_examples, "hy-tr")
end
function tests:test_Arabic()
local examples = {
{ "al-Huruuf al-qamariyyat'", "الْحُرُوف الْقَمَرِيَّة" },
{ "al-Huruuf al-xamsiyyat'", "الْحُرُوف الشَّمْسِيَّة" },
{ "ealifu WlwaSli", "أَلِفُ ٱلْوَصْلِ" },
{ "maae", "مَاء" },
{ "muemin", "مُؤْمِن" },
{ "eiDaafat'", "إِضَافَة" },
{ "eaab", "آب" },
{ "qureaan", "قُرْآن" },
{ "qiTTat'", "قِطَّة" },
{ "faEEaal", "فَعَّال" },
{ "xayeu", "شَيْءُ" },
{ "xayeaN", "شَيْءً" },
{ "daaeimaN", "دَائِمًا" },
{ "mabduueat'", "مَبْدُوءَة" },
{ "mabduu'at'", "مَبْدُوءَة" },
{ "badaaeiyyuN", "بَدَائِيٌّ" },
{ "badaaeat'", "بَدَاءَة" },
{ "maktuub", "مَكْتُوب" },
{ "taHriir", "تَحْرِير" },
{ "EuZmaaa", "عُظْمَى" },
{ "ean0", "أَنْ" },
{ "law0", "لَوْ" },
{ "xay'aN", "شَيْءً" },
{ "ta7riir", "تَحْرِير" },
{ "3axarat'", "عَشَرَة" },
}
self:do_tests(examples, "ar")
end
function tests:test_Persian()
local examples = {
{ "brAdr", "برادر" },
}
self:do_tests(examples, "fa")
end
function tests:test_PIE()
local examples = {
{ "*dye_'ws", "*dyḗws" },
{ "*n0mr0to's", "*n̥mr̥tós" },
{ "*tk'e'yti", "*tḱéyti" },
{ "*h1es-", "*h₁es-" },
{ "*t_ep-e'h1(ye)-ti", "*tₔp-éh₁(ye)-ti" },
{ "*h1e'k'wos", "*h₁éḱwos" },
{ "*bhebho'ydhe", "*bʰebʰóydʰe" },
{ "*dh3to's", "*dh₃tós" },
{ "*dhewg'h-", "*dʰewǵʰ-" },
}
self:do_tests(examples, "ine-pro")
end
function tests:test_Germanic()
local examples = {
{ "*t'a_ko^`", "*þākǫ̂" },
{ "*T'eudo_balt'az", "*Þeudōbalþaz" },
{ "*bo_kijo_`", "*bōkijǭ" },
}
self:do_tests(examples, "gem-pro")
end
function tests:test_Gothic()
local examples = {
{ "ƕaiwa", "𐍈𐌰𐌹𐍅𐌰" },
{ "anþar", "𐌰𐌽𐌸𐌰𐍂" },
{ "fidwōr", "𐍆𐌹𐌳𐍅𐍉𐍂" },
{ "fidwor", "𐍆𐌹𐌳𐍅𐍉𐍂" },
{ "mikils", "𐌼𐌹𐌺𐌹𐌻𐍃" },
{ "hēr", "𐌷𐌴𐍂" },
{ "her", "𐌷𐌴𐍂" },
{ "vac", "𐍈𐌰𐌸" },
-- { "", "" },
}
self:do_tests(examples, "got")
end
function tests:test_Hellenic()
local examples = {
{ "*tat^t^o_", "*taťťō" },
{ "*d^o_'yyon", "*ďṓyyon" },
{ "*gw@n'n'o_", "*gʷəňňō" },
{ "*gw@n^n^o_", "*gʷəňňō" },
{ "*kwhe_r", "*kʷʰēr" },
{ "*khwe_r", "*kʷʰēr" },
}
self:do_tests(examples, "grk-pro")
end
function tests:test_Greek()
local examples = {
{ "a__i", "ᾱͅ" },
{ "a)lhqh/s", "ἀληθής" },
{ "a)lhqhs*", "ἀληθησ" },
{ "a)lhqhs-", "ἀληθησ-" },
{ "a^)nh/r", "ᾰ̓νήρ" },
{ "Phlhi+a/dhs", "Πηληϊάδης" },
{ "Phlhi^+a^/dhs", "Πηληῐ̈ᾰ́δης" },
{ "Πηληϊ^ά^δης", "Πηληῐ̈ᾰ́δης" },
{ "e)a_/n", "ἐᾱ́ν" },
{ "ἐά_ν", "ἐᾱ́ν" },
{ "pa=sa^", "πᾶσᾰ" },
{ "u_(mei=s", "ῡ̔μεῖς" },
{ "a/)^ner", "ᾰ̓́νερ" },
{ "a/^)ner", "ᾰ̓́νερ" },
{ "a)/^ner", "ᾰ̓́νερ" },
{ "a)^/ner", "ᾰ̓́νερ" },
{ "dai+/frwn", "δαΐφρων" },
{ "dai/+frwn", "δαΐφρων" },
}
self:do_tests(examples, "grc")
end
function tests:test_Hittite()
local examples = {
{ "a-ku", "𒀀𒆪" },
{ "an-tu-wa-ah-ha-as", "𒀭𒌅𒉿𒄴𒄩𒀸" },
{ "an-tu-wa-aḫ-ḫa-aš", "𒀭𒌅𒉿𒄴𒄩𒀸" },
{ "<sup>DINGIR</sup>IŠKUR", "𒀭𒅎" }, -- Akkadian actually?
}
self:do_tests(examples, "hit")
end
function tests:test_Kannada()
local examples = {
{ "yaMtra", "ಯಂತ್ರ" },
{ "sadāśiva", "ಸದಾಶಿವ" },
{ "muṣṭi", "ಮುಷ್ಟಿ" },
{ "dhairya", "ಧೈರ್ಯ" },
{ "ELu", "ಏಳು" },
{ "iMguzETiyA", "ಇಂಗುಶೇಟಿಯಾ" },
{ "upayOga", "ಉಪಯೋಗ" },
}
self:do_tests(examples, "kn")
end
local sa_examples = {
{ "saMskRta/", "saṃskṛtá", "संस्कृत" },
{ "kSatri/ya", "kṣatríya", "क्षत्रिय" },
{ "rAja suprabuddha", "rāja suprabuddha", "राज सुप्रबुद्ध"},
{ "zAkyamuni", "śākyamuni", "शाक्यमुनि"},
{ "siMha", "siṃha", "सिंह"},
{ "nAman", "nāman", "नामन्"},
{ "anA/", "anā́", "अना" },
{ "ayuSmAn", "ayuṣmān", "अयुष्मान्"},
{ "ghatsyati", "ghatsyati", "घत्स्यति"},
{ "tApa-i", "tāpa-i", "तापइ" },
{ "tApaï", "tāpaï", "तापइ" },
}
function tests:test_Sanskrit()
self:do_tests(sa_examples, "sa")
end
function tests:test_Sanskrit_tr()
self:do_tests(sa_examples, "sa-tr")
end
function tests:test_Maithili()
local examples = {
{ "maithilI", "𑒧𑒻𑒟𑒱𑒪𑒲" },
{ "ghO_r_A", "𑒒𑒼𑒛𑓃𑒰" },
{ "ga_rh_a", "𑒑𑒜𑓃" },
{ "mokAma", "𑒧𑒽𑒏𑒰𑒧" },
{ "pa~cakhaNDI", "𑒣𑒿𑒔𑒐𑒝𑓂𑒛𑒲" },
{ "heraba", "𑒯𑒺𑒩𑒥" },
}
self:do_tests(examples, "mai")
end
function tests:test_Marwari()
local examples = {
{ "mahAjanI", "𑅬𑅱𑅛𑅧𑅑" },
{ "mukAMm", "𑅬𑅒𑅕𑅧𑅬" },
{ "AvalA", "𑅐𑅯𑅮" },
{ "AgarA", "𑅐𑅗𑅭" },
{ "upama", "𑅒𑅨𑅬" },
{ "iMdaura", "𑅑𑅧𑅥𑅒𑅭" },
}
self:do_tests(examples, "mwr")
end
function tests:test_Old_Persian()
local examples = {
{ "aitiiy", "𐎠𐎡𐎫𐎡𐎹" },
{ "raucah", "𐎼𐎢𐎨𐏃" },
{ "ham", "𐏃𐎶" },
{ "jiva", "𐎪𐎺"},
{ "daraniyakara", "𐎭𐎼𐎴𐎹𐎣𐎼" },
{ "daragama", "𐎭𐎼𐎥𐎶" },
}
self:do_tests(examples, "peo")
end
function tests:test_Parthian()
local examples = {
{ "tšynd", "𐫤𐫢𐫏𐫗𐫅" },
{ "xʾrtʾg", "𐫟𐫀𐫡𐫤𐫀𐫃" },
{ "hʾmhyrz", "𐫍𐫀𐫖𐫍𐫏𐫡𐫉" },
{ "ʿšnwhr", "𐫙𐫢𐫗𐫇𐫍𐫡"},
{ "hʾwsʾr", "𐫍𐫀𐫇𐫘𐫀𐫡" },
}
self:do_tests(examples, "xpr", "Mani")
end
function tests:test_Japanese()
local examples = {
{ "iro ha nihoheto", "いろ は にほへと" },
{ "uwyi no okuyama", "うゐ の おくやま" },
{ "FAMIRI-MA-TO", "ファミリーマート" },
{ "altu", "あっ" },
{ "hi/mi/tu", "ひ・み・つ" },
{ "han'i", "はんい" },
{ "hanni", "はんい" },
{ "konnyou", "こんよう" },
{ "mannnaka", "まんなか" },
{ "attiike", "あっちいけ" },
{ "acchiike", "あっちいけ" },
{ "upnusi", "うpぬし" },
}
self:do_tests(examples, "ja")
end
function tests:test_Old_Church_Slavonic()
local examples = {
{ "ljudije", "людиѥ" },
{ "azuh", "азъ" },
{ "buky", "боукꙑ" },
{ "mŭčati", "мъчати" },
{ "Iosija", "Иосиꙗ" },
}
self:do_tests(examples, "cu")
end
local omr_examples = {
{ "kuhA", "kuhā", "𑘎𑘳𑘮𑘰" },
{ "nibara", "nibara", "𑘡𑘲𑘤𑘨" },
{ "nIbara", "nībara", "𑘡𑘲𑘤𑘨" },
{ "Ai", "āi", "𑘁𑘃" },
{ "AI", "āī", "𑘁𑘃" },
{ "suta", "suta", "𑘭𑘳𑘝" },
{ "sUta", "suta", "𑘭𑘳𑘝" },
{ "uta", "uta", "𑘄𑘝" },
{ "Uta", "uta", "𑘄𑘝" },
{ "na-i", "na-i", "𑘡𑘃" },
{ "naï", "naï", "𑘡𑘃" },
{ "a-ila", "a-ila", "𑘀𑘃𑘩" },
{ "aïla", "aïla", "𑘀𑘃𑘩" },
{ "bhavai", "bhavai", "𑘥𑘪𑘺" },
{ "cauka", "cauka", "𑘓𑘼𑘎" },
{ "ca-utha", "ca-utha", "𑘓𑘄𑘞" },
{ "caütha", "caütha", "𑘓𑘄𑘞" },
{ "a-ukSa", "a-ukṣa", "𑘀𑘄𑘎𑘿𑘬" },
{ "aükSa", "aükṣa", "𑘀𑘄𑘎𑘿𑘬" },
{ "AThoLI", "āṭhoḷī", "𑘁𑘙𑘻𑘯𑘲" },
{ "raMbhA", "raṃbhā", "𑘨𑘽𑘥𑘰" },
{ "hRdA", "hṛdā", "𑘮𑘵𑘟𑘰" },
{ "Rkha", "ṛkha", "𑘆𑘏" },
{ "SaDa", "ṣaḍa", "𑘬𑘚" },
{ "kSeNa", "kṣeṇa", "𑘎𑘿𑘬𑘹𑘜" },
{ "zobhaNe", "śobhaṇe", "𑘫𑘻𑘥𑘜𑘹" },
{ "arha", "arha", "𑘀𑘨𑘿𑘮" },
{ "mar_hATI", "maṟhāṭī", "𑘦𑘨𑘿𑘮𑘰𑘘𑘲" },
}
function tests:test_Old_Marathi()
self:do_tests(omr_examples, "omr")
end
function tests:test_Old_Marathi_tr()
self:do_tests(omr_examples, "omr-tr")
end
function tests:test_Ossetian()
local examples = {
{ "fynʒ", "фындз" },
{ "æxsæv", "ӕхсӕв" },
{ "c’æx", "цъӕх" },
{ "biræǧ", "бирӕгъ" },
{ "Ræstʒinad", "Рӕстдзинад" },
}
self:do_tests(examples, "os")
end
function tests:test_Imperial_Aramaic()
local examples = {
{ "'nḥn", "𐡀𐡍𐡇𐡍" },
}
self:do_tests(examples, "arc", "Armi")
end
function tests:test_Old_South_Arabian()
local examples = {
{ "s²ms¹", "𐩦𐩣𐩪" },
}
self:do_tests(examples, "xsa")
end
function tests:test_Siddham()
local examples = {
{ "kanta", "𑖎𑖡𑖿𑖝" },
{ "purAna", "𑖢𑖲𑖨𑖯𑖡"},
{ "Na-i", "𑖜𑖂"},
{ "kaNNa", "𑖎𑖜𑖿𑖜"},
{ "samAia", "𑖭𑖦𑖯𑖂𑖀"},
{ "tujjhu", "𑖝𑖲𑖕𑖿𑖖𑖲"},
{ "kahante", "𑖎𑖮𑖡𑖿𑖝𑖸"},
}
self:do_tests(examples, "inc-kam")
end
function tests:test_Kaithi()
local examples = {
{ "hanU", "𑂯𑂢𑂴" },
{ "pa_rh_ahi", "𑂣𑂜𑂯𑂱" },
{ "siya~", "𑂮𑂱𑂨𑂀" },
{ "jhara-i", "𑂕𑂩𑂅" },
{ "jharaï", "𑂕𑂩𑂅" },
{ "Agi", "𑂄𑂏𑂱" },
{ "āgi", "𑂄𑂏𑂱" },
}
self:do_tests(examples, "bho")
end
function tests:test_Saurashtra()
local examples = {
{ "pani", "ꢦꢥꢶ" },
{ "vAg", "ꢮꢵꢔ꣄" },
{ "ghoDo", "ꢕꣁꢞꣁ" },
{ "dukkar", "ꢣꢸꢒ꣄ꢒꢬ꣄" },
{ "l:ovo", "ꢭꢴꣁꢮꣁ" },
}
self:do_tests(examples, "saz")
end
function tests:test_Sindhi()
local examples = {
{ "siMdhī", "𑋝𑋡𑋟𑋐𑋢" },
{ "bhAGo", "𑋖𑋠𑊿𑋧" },
{ "mAlu", "𑋗𑋠𑋚𑋣" },
{ "jeko", "𑋂𑋥𑊺𑋧" },
{ "xabara", "𑊻𑋩𑋔𑋙" },
{ "muqAmu", "𑋗𑋣𑊺𑋩𑋠𑋗𑋣" },
{ "meM", "𑋗𑋥𑋟" },
{ "gunAhu", "𑊼𑋣𑋑𑋠𑋞𑋣" },
{ "_gh_araza", "𑊼𑋩𑋙𑋂𑋩" },
{ "_gh_ufA", "𑊼𑋩𑋣𑋓𑋩𑋠" },
{ "bA_gh_u", "𑋔𑋠𑊼𑋩𑋣" },
{ "ba_gh_adAdu", "𑋔𑊼𑋩𑋏𑋠𑋏𑋣" },
{ "ghaTaNu", "𑊾𑋆𑋌𑋣" },
}
self:do_tests(examples, "sd")
end
--[[
function tests:test_Old_North_Arabian()
-- We need tests to verify that letters with diacritics or modifiers
-- transliterate correctly.
local examples = {
{ "'lšdy", "𐪑𐪁𐪆𐪕𐪚" },
}
self:do_tests(examples, "sem-tha")
end
--]]
--[[
To add another example, place the following code
within the braces of an "examples" table:
{ "shortcut", "expected result" },
{ "", "" },
or for Sanskrit,
{ "Harvard-Kyoto", "IAST", "Devanagari" },
{ "", "", "" },
]]
return tests