Module:hi-IPA

Wiktionary වෙතින්

Hindi IPA pronunciation module. See {{hi-IPA}}.

Testcases[සංස්කරණය]

Module:hi-IPA/testcases:

17 of 59 tests failed. (refresh)

TextExpectedActualComments
test_all:
Passedअशिष्ट॰ता (aśiṣṭa.tā)ə.ʃɪʂʈ.t̪ɑːə.ʃɪʂʈ.t̪ɑːsyllabification
Passedअशिष्ट-ता (aśiṣṭ-tā)ə.ʃɪʂʈ.t̪ɑːə.ʃɪʂʈ.t̪ɑːsyllabification
Failedअल्प्संख्यक (alpsaṅkhyak)əlp.səŋ.kʰjəkəl.psəŋ.kʰjəksyllabification
Failedअंडकोष (aṇḍkoṣ)əɳɖ.koːʂəɳ.ɖkoːʂsyllabification
Passedअंग्रेज़ (aṅgrez)əŋ.ɡɾeːzəŋ.ɡɾeːzsyllabification
Failedअंटर्क्टिका (aṇṭarkṭikā)əɳ.ʈəɾk.ʈɪ.kɑːəɳ.ʈəɾ.kʈɪ.kɑːsyllabification
Passedमैं (ma͠i)mɛ̃ːmɛ̃ː
Passedदेश (deś)d̪eːʃd̪eːʃ
Passedमेरा (merā)meː.ɾɑːmeː.ɾɑː
Passedखिलौना (khilaunā)kʰɪ.lɔː.nɑːkʰɪ.lɔː.nɑː
Passedनौटंकी (nauṭaṅkī)nɔː.ʈəŋ.kiːnɔː.ʈəŋ.kiː
Passedहौं (ha͠u)ɦɔ̃ːɦɔ̃ː
Failedमुँह (mũh)mũːʱmʊ̃ɦ
Failedमाह (māh)mɑːʱmɑːɦ
Failedबहना (bahnā)bəʱ.nɑːbəɦ.nɑː
Failedविवाह (vivāh)ʋɪ.ʋɑːʱʋɪ.ʋɑːɦ
Passedग़म (ġam)ɣəmɣəm
Passedख़रगोश (xargoś)xəɾ.ɡoːʃxəɾ.ɡoːʃ
Passedइकट्ठा (ikaṭṭhā)ɪ.kəʈ.ʈʰɑːɪ.kəʈ.ʈʰɑː
Passedसंस्थान (sansthān)sən.st̪ʰɑːnsən.st̪ʰɑːn
Passedमधु (madhu)mə.d̪ʱuːmə.d̪ʱuːfinal u is lengthened, aspiration should not be split in syllabification
Failedमियाँ (miyā̃)miː.jɑ̃ːmɪ.jɑ̃ːi + y lengthens i
Failedमुहाफ़ज़ाह (muhāfzāh)mʊ.ɦɑːf.zɑːʱmʊ.ɦɑːf.zɑːɦ
Passedस्त्रीत्व (strītva)st̪ɾiːt̪.ʋᵊst̪ɾiːt̪.ʋᵊ
Passedशास्त्र (śāstra)ʃɑːs.t̪ɾᵊʃɑːs.t̪ɾᵊ
Passedसमाचार (samācār)sə.mɑː.t͡ʃɑːɾsə.mɑː.t͡ʃɑːɾ
Passedश्रावण (śrāvaṇ)ʃɾɑː.ʋəɳʃɾɑː.ʋəɳ
Passedहमें (hamẽ)ɦə.mẽːɦə.mẽː
Passedमें (mẽ)mẽːmẽː
Failedभैया (bhaiyā)bʱə.iː.jɑːbʱə̯i.jɑː
Failedसुलह (sulah)sʊ.ləʱsʊ.ləɦ
Passedदृष्टि (dŕṣṭi)d̪ɾɪʂ.ʈiːd̪ɾɪʂ.ʈiː
Passedसोई (soī)soː.iːsoː.iː
Passedखाइए (khāie)kʰɑː.ɪ.eːkʰɑː.ɪ.eː
Passedशक्ति (śakti)ʃək.t̪iːʃək.t̪iː
Passedउस्ताद (ustād)ʊs.t̪ɑːd̪ʊs.t̪ɑːd̪
Passedपंकज (paṅkaj)pəŋ.kəd͡ʒpəŋ.kəd͡ʒ
Passedमाला (mālā)mɑː.lɑːmɑː.lɑː
Passedदीवार (dīvār)d̪iː.ʋɑːɾd̪iː.ʋɑːɾ
Passedसुरुची (surucī)sʊ.ɾʊ.t͡ʃiːsʊ.ɾʊ.t͡ʃiː
Passedनिरस्त्र (nirastra)nɪ.ɾəs.t̪ɾᵊnɪ.ɾəs.t̪ɾᵊ
Passedनिर्वृत्त (nirvŕtt)nɪɾ.ʋɾɪt̪t̪nɪɾ.ʋɾɪt̪t̪
Passedमृत्युंजय (mŕtyuñjay)mɾɪt̪.jʊn.d͡ʒəjmɾɪt̪.jʊn.d͡ʒəj
Failedपितृओं (pitŕon)pɪt̪.ɾõːpɪt̪.ɾoːn
Passedगर्भ॰पात (garbha.pāt)ɡəɾbʱ.pɑːt̪ɡəɾbʱ.pɑːt̪
Passedगर्भ (garbh)ɡəɾbʱɡəɾbʱ
Passedवस्त्र (vastra)ʋəs.t̪ɾᵊʋəs.t̪ɾᵊ
Passedयक्ष्मा (yakṣmā)jək.ʂmɑːjək.ʂmɑː
Passedउत्प्रेक्षा (utprekṣā)ʊt̪.pɾeːk.ʂɑːʊt̪.pɾeːk.ʂɑː
Passedझुंझलाहट (jhuñjhlāhaṭ)d͡ʒʱʊn.d͡ʒʱlɑː.ɦəʈd͡ʒʱʊn.d͡ʒʱlɑː.ɦəʈ
Passedसंख्या (saṅkhyā)səŋ.kʰjɑːsəŋ.kʰjɑː
Failedघुँघरू (ghũghrū)ɡʱʊŋ.ɡʱɾuːɡʱʊ̃ɡʱ.ɾuː
Passedसंभ्रांत (sambhrānt)səm.bʱɾɑːnt̪səm.bʱɾɑːnt̪
Passedइन्फ़्लुएंज़ा (influenzā)ɪn.flʊ.eːn.zɑːɪn.flʊ.eːn.zɑː
Failedइंफ़्लुएंज़ा (imfluenzā)ɪn.flʊ.eːn.zɑːɪm.flʊ.eːn.zɑː
Failedहिमाचल प्रदेश (himācal pradeś)/ɦɪ.mɑː.t͡ʃəl pɾə.d̪eːʃ/ɦɪ.mɑː.t͡ʃəl‿pɾə.d̪eːʃ
Failedइंक़लाब (iṅqlāb)ɪɴ.qə.l̪ɑːbɪŋ.qlɑːbanusvara before uvulars
Passedचेरापूंजी (cerāpūñjī)t͡ʃeː.ɾɑː.puːn.d͡ʒiːt͡ʃeː.ɾɑː.puːn.d͡ʒiː
Failedचेरापूंजी (cerāpūñjī)t͡ʃeː.ɾäː.pũːɲ.d͡ʒiːt͡ʃeː.ɾäː.pũːn̪.d͡ʒiːnasal allophone before postalveolar

local export = {}

local lang = require("Module:languages").getByCode("hi")
local sc = require("Module:scripts").getByCode("Deva")
local m_IPA = require("Module:IPA")
local m_a = require("Module:accent qualifier")

local gsub = mw.ustring.gsub
local gmatch = mw.ustring.gmatch
local find = mw.ustring.find

local correspondences = {
	["ṅ"] = "ŋ", ["g"] = "ɡ", 
	["c"] = "t͡ʃ", ["j"] = "d͡ʒ", 
	["ṭ"] = "ʈ", ["ḍ"] = "ɖ", ["ṇ"] = "ɳ",
	["t"] = "t̪", ["d"] = "d̪",
	["y"] = "j", ["r"] = "ɾ", ["v"] = "ʋ", ["l"] = "l̪",
	["ś"] = "ʃ", ["ṣ"] = "ʂ", ["ź"] = "ʒ", ["h"] = "ɦ",
	["ṛ"] = "ɽ", ["ž"] = "ʒ", ["ḻ"] = "l̪", ["ġ"] = "ɣ", ["q"] = "q", ["x"] = "x", ["ṉ"] = "n̪", ["ṟ"] = "ɾ",

	["a"] = "ə", ["ā"] = "ɑː", ["i"] = "ɪ",
	["ī"] = "iː", ["o"] = "oː", ["e"] = "eː",
	["u"] = "ʊ", ["ū"] = "uː", ["ŏ"] = "ɔ", ["ĕ"] = "æ",

	["ẽ"] = "ẽː", ["ũ"] = "ʊ̃", ["õ"] = "õː", ["ã"] = "ə̃", ["ā̃"] = "ɑ̃ː",  ["ĩ"] = "ɪ̃", ["ī̃"] = "ĩː",

	["ॐ"] = "oːm", ["ḥ"] = "(ɦ)", ["'"] = "(ʔ)",
}

local perso_arabic = {
	["x"] = "kh", ["ġ"] = "g", ["q"] = "k", ["ž"] = "z", ["z"] = "j", ["f"] = "ph", ["'"] = "",
}

local urdu = {
	["ṣ"] = "ʃ", ["ṇ"] = "n",
}

local deccani = {
	["q"] = "x",
}

local lengthen = {
	["a"] = "ā", ["i"] = "ī", ["u"] = "ū",
}

local vowels = "aāiīuūoŏĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃ː"
local vowel = "[aāiīuūoŏĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃]ː?"
local weak_h = "([gjdḍbṛnm])h"
local aspirate = "([kctṭp])"
local syllabify_pattern = "([" .. vowels .. "]̃?)([^" .. vowels .. "%.%-]+)([" .. vowels .. "]̃?)"

local function find_consonants(text)
	local current = ""
	local cons = {}
	for cc in mw.ustring.gcodepoint(text .. " ") do
		local ch = mw.ustring.char(cc)
		if find(current .. ch, "^[kgṅcjñṭḍṇtdnpbmyrlvśṣshqxġzžḻṛṟfθṉḥ]$") or find(current .. ch, "^[kgcjṭḍṇtdpbṛ]h$") then
			current = current .. ch
		else
			table.insert(cons, current)
			current = ch
		end
	end
	return cons
end

local function syllabify(text)
	for count = 1, 2 do
		text = gsub(text, syllabify_pattern, function(a, b, c)
			b_set = find_consonants(b)
			table.insert(b_set, #b_set > 1 and 2 or 1, ".")
			return a .. table.concat(b_set) .. c
			end)
		text = gsub(text, "(" .. vowel .. ")(?=" .. vowel .. ")", "%1.")
	end
	for count = 1, 2 do
		text = gsub(text, "(" .. vowel .. ")(" .. vowel .. ")", "%1.%2")
	end
	return text
end

local identical = "knlsfzθ"
for character in gmatch(identical, ".") do
	correspondences[character] = character
end

local function transliterate(text)
	return (lang:transliterate(text))
end

function export.link(term)
	return require("Module:links").full_link{ term = term, lang = lang, sc = sc }
end

function export.narrow_IPA(ipa)
	-- what /ɑ/ really is
	ipa = gsub(ipa, 'ɑ', 'ä')
	-- dentals
	ipa = gsub(ipa, '([snl])', '%1̪')
	-- nasals induce nasalization
	ipa = gsub(ipa, '([əäɪiʊueɛoɔæ])(ː?)([nɳŋm])', '%1̃%2%3')
	-- cc, jj
	ipa = gsub(ipa, 't͡ʃ(%.?)t͡ʃ', 't̚%1t͡ʃ')
	ipa = gsub(ipa, 'd͡ʒ(%.?)d͡ʒ', 'd̚%1d͡ʒ')
	-- syllable boundary consonants
	ipa = gsub(ipa, '([kg])%.([kg])', '%1̚.%2')
	ipa = gsub(ipa, '([ʈɖ])%.([ʈɖ])', '%1̚.%2')
	ipa = gsub(ipa, '([td]̪?)%.([tdn])', '%1̚.%2')
	ipa = gsub(ipa, '([pb])%.([pb])', '%1̚.%2')
	-- aspiration rules
	ipa = gsub(ipa, 'əɦ%.([kgŋtdɲʈɖɳnpbmɾlzqfʂʃsʒɭɣɹʋj])', 'ɛɦ.%1')
	ipa = gsub(ipa, 'ʊɦ%.([kgŋtdɲʈɖɳnpbmɾlzqfʂʃsʒɭɣɹʋj])', 'ɔɦ.%1')
	ipa = gsub(ipa, 'ə%.ɦə', 'ɛ.ɦɛ')
	ipa = gsub(ipa, 'ʊ%.ɦə', 'ɔ.ɦɔ')
	ipa = gsub(ipa, 'ə%.ɦʊ', 'ɔ.ɦɔ')
	-- v/w
	ipa = gsub(ipa, '([kgŋtdɲʈɖɳnpbm]̪?%.?)ʋ', '%1w')
	-- retroflex s rules
	ipa = gsub(ipa, 'ʂ(%.?[^ʈɖ])', 'ʃ%1')
        ipa = gsub(ipa, '([ŋn])%.([q])', 'ɴ.%2')
	ipa = gsub(ipa, 'ʂ$', 'ʃ')
	ipa = gsub(ipa, "ɪ%.j", "i.j")
	return ipa
end

function export.toIPA(text, style)
	text = gsub(text, '॰', '-')
	local translit = text
	if lang:findBestScript(text):isTransliterated() then
		translit = transliterate(text)
	end
	if not translit then
		error('The term "' .. text .. '" could not be transliterated.')
	end
	
	if style == "nonpersianized" then
		translit = gsub(translit, "[xġqžzf']", perso_arabic)
	end

	if style == "dakhini" then
		translit = gsub(translit, "[q]", deccani)
	end
	
	-- force final schwa for Hindi
	translit = gsub(translit, "a~$", "ə")

	if style == "desanskritanize" then
		translit = gsub(translit, "(...)ə$", "%1ɑ(ː)")
		translit = gsub(translit, "[ṣṇ]", urdu)
	end
	
	-- vowels
	translit = gsub(translit, "͠", "̃")
	translit = gsub(translit, 'a(̃?)i', 'ɛ%1ː')
	translit = gsub(translit, 'a(̃?)u', 'ɔ%1ː')
	translit = gsub(translit, "%-$", "")
	translit = gsub(translit, "^%-", "")
	translit = gsub(translit, "ŕ$", "r")
	translit = gsub(translit, "ŕ(" .. vowel .. ")", "r%1")
	translit = gsub(translit, "ŕ", "ri")
	translit = gsub(translit, ",", "")
	translit = gsub(translit, " ", "..")
	translit = syllabify(translit)
	translit = gsub(translit, "%.ː", "ː.")
	translit = gsub(translit, "%.̃", "̃")
	
	-- gy
	translit = gsub(translit, 'jñ', 'gy')

	translit = gsub(translit, aspirate .. "h", '%1ʰ')
	translit = gsub(translit, weak_h, '%1ʱ')
	
	local result = gsub(translit, ".", correspondences)
	
	-- remove final schwa (Pandey, 2014)
	-- actually weaken
	result = gsub(result, "(...)ə$", "%1ᵊ")
	result = gsub(result, "(...)ə ", "%1ᵊ ")
	result = gsub(result, "(...)ə%.?%-", "%1ᵊ-")
	result = gsub(result, "%.?%-", ".")

	result = gsub(result, "%.%.", "‿")
	
	-- formatting
	result = gsub(result, "ː̃", "̃ː")
	result = gsub(result, "ː%.̃", "̃ː.")
	result = gsub(result, "%.$", "")
       -- ñ
        result = gsub(result, "ñ", "n")

	-- i and u lengthening
	result = gsub(result, "ʊ(̃?)(ʱ?)$", "u%1ː%2")
	result = gsub(result, "ɪ(̃?)(ʱ?)$", "i%1ː%2")
	
	result = gsub(result, "ɛː(%.?)j", function(a)
		local res = "ə̯i"
		res = res .. a .. "j"
		return res
	end)
	result = gsub(result, "ɔː(%.?)ʋ", function(a)
		local res = "ə̯u"
		res = res .. a .. "ʋ"
		return res
	end)
	
	return result
end

function export.make(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	
	local p, results = {}, {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		p = { pagetitle }
	end
	
	for _, Hindi in ipairs(p) do
		local persianized = export.toIPA(Hindi, "persianized")
		local nonpersianized = export.toIPA(Hindi, "nonpersianized")
		table.insert(results, { pron = "/" .. persianized .. "/" })
		local narrow = export.narrow_IPA(persianized)
		if narrow ~= persianized then table.insert(results, { pron = "[" .. narrow .. "]" }) end
		if persianized ~= nonpersianized then
			table.insert(results, { pron = "/" .. nonpersianized .. "/" })
			local narrow = export.narrow_IPA(nonpersianized)
			if narrow ~= nonpersianized then table.insert(results, { pron = "[" .. narrow .. "]" }) end
		end
	end
	
	return m_a.show({'Delhi'}) .. ' ' .. m_IPA.format_IPA_full(lang, results)
end

function export.make_ur(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	local lang = require("Module:languages").getByCode("ur")
	local sc = require("Module:scripts").getByCode("ur-Arab")
	
	local p, results = {}, {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		error("No transliterations given.")
	end
	
	for _, Urdu in ipairs(p) do
		local desanskritanize = export.toIPA(Urdu, "desanskritanize")
		table.insert(results, { pron = "/" .. desanskritanize .. "/" })
	end
	
	return m_a.show({'urd'}) .. ' ' .. m_IPA.format_IPA_full(lang, results)
end

function export.make_deccani(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	local lang = require("Module:languages").getByCode("ur")
	local sc = require("Module:scripts").getByCode("ur-Arab")
	
	local p, results = {}, {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		error("No transliterations given.")
	end
	
	for _, Urdu in ipairs(p) do
		local dakhini = export.toIPA(Urdu, "dakhini")
		table.insert(results, { pron = "/" .. dakhini .. "/" })
	end
	
	return m_a.show({'Deccani'}) .. ' ' .. m_IPA.format_IPA_full(lang, results)
end

return export
"https://si.wiktionary.org/w/index.php?title=Module:hi-IPA&oldid=41989" වෙතින් සම්ප්‍රවේශනය කෙරිණි