Jump to content

Module:Jpan-sortkey

Wiktionary වෙතින්

This module will sort text in the ජපන් අක්ෂරක්‍රමය. It is used to sort Southern Amami-Oshima, ජපන්, Hachijō, Kikai, සර්ව භාෂාමය, Miyako, Old Japanese, Oki-No-Erabu, Northern Amami-Oshima, Yaeyama, Okinawan, Toku-No-Shima, Kunigami, Yonaguni, and Yoron. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{sortkey}}. Within a module, use Module:languages#Language:makeSortKey.

For testcases, see Module:Jpan-sortkey/testcases.

Functions

makeSortKey(text, lang, sc)
Generates a sortkey for a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the sort fails, returns nil.

local export = {}

local concat = table.concat
local find = mw.ustring.find
local gsub = mw.ustring.gsub
local insert = table.insert
local match = mw.ustring.match
local sub = mw.ustring.sub
local toNFC = mw.ustring.toNFC

local range = mw.loadData("Module:ja/data/range")
local kanji_pattern = range.kanji
local ideograph_pattern = range.ideograph
local kana_graph_pattern = range.kana_graph
local latin_pattern = range.latin

local get_by_code = require("Module:languages").getByCode
local Hani_sort = require("Module:Hani-sortkey").makeSortKey
local track = require("Module:debug/track")

function export.makeSortKey(text, lang, sc)
	-- Determine reading.
	local seen_pages, langname = {}
	while lang ~= "mul" and (not seen_pages[text]) and find(text, "[0-9" .. kanji_pattern .. ideograph_pattern .. kana_graph_pattern .. latin_pattern .. "]") do
		repeat
			langname = langname or
				get_by_code(lang)
				:getCanonicalName()
			seen_pages[text] = true
			local content = mw.title.new(toNFC(text)):getContent()
			content = require("Module:utilities").get_section(content, langname, 2)
			if not content then
				break
			end
			local findTemplates = require("Module:template parser").findTemplates
			local kanjitab, br
			for template, args in findTemplates(content) do
				local templates = {
					[lang .. "-head"] = true,
					[lang .. "-pos"] = true,
				}
				if templates[template] and args[2] then
					text = args[2]:gsub("[ %-%.^%%]", "")
					br = true
					break
				elseif (template == "head" or template == "head-lite") and args[1] == lang then
					for i, arg in ipairs(args) do
						if arg == "kana" then
							local kana = args[i+1]
							if kana then
								text = kana
								br = true
								break
							end
						end
					end
				end
				templates = {
					[lang .. "-noun"] = true,
					[lang .. "-verb"] = true,
					[lang .. "-adj"] = true,
					[lang .. "-phrase"] = true,
					[lang .. "-verb form"] = true,
					[lang .. "-verb-suru"] = true,
					[lang .. "-see"] = true,
					[lang .. "-see-kango"] = true,
					[lang .. "-gv"] = true,
				}
				if templates[template] and args[1] then
					text = args[1]:gsub("[ %-%.^%%]", "")
					br = true
					break
				elseif template == lang .. "-kanjitab" then
					kanjitab = kanjitab or args
				end
			end
			if (not br) and kanjitab then
				track{"Jpan-sortkey/kanjitab", "Jpan-sortkey/kanjitab/" .. lang}
				if kanjitab.sortkey then
					text = kanjitab.sortkey
					break
				end
				-- extract kanji and non-kanji
				local kanji = {}
				local non_kanji = {}
				
				local kanji_border = 1
				gsub(text, "()([" .. kanji_pattern .. "々])()", function(p1, w1, p2)
					insert(non_kanji, sub(text, kanji_border, p1 - 1))
					kanji_border = p2
					insert(kanji, w1)
				end)
				insert(non_kanji, sub(text, kanji_border))
				-- 々
				for i, v in ipairs(kanji) do
					if v == "々" then kanji[i] = kanji[i - 1] end
				end
				-- process readings
				local readings = {}
				local readings_actual = {}
				local reading_length_total = 0
				for i in ipairs(kanjitab) do
					local reading_kana, reading_length = match(kanjitab[i] or "", "^([^0-9]*)([0-9]*)$")
					reading_kana = reading_kana ~= "" and reading_kana or nil
					reading_length = reading_kana and tonumber(reading_length) or 1

					insert(readings, {reading_kana, reading_length})
					reading_length_total = reading_length_total + reading_length
					for i = reading_length_total + 1, #kanji do
						insert(readings, {nil, 1})
					end
					if reading_kana then
						local actual_reading = kanjitab["k" .. i]
						local okurigana = kanjitab["o" .. i]
						readings_actual[i] = {(actual_reading or reading_kana) .. (okurigana or ""), reading_length}
					else
						readings_actual[i] = {nil, 1}
					end
				end
				local sortkey = {non_kanji[1]}
				local id = 1
				for _, v in ipairs(readings_actual) do
					id = id + v[2]
					v[1] = v[1] ~= "-" and v[1]
					insert(sortkey, (v[1] or "") .. (non_kanji[id] or ""))
				end
				sortkey = concat(sortkey)
				if sortkey ~= "" then
					text = sortkey
				end
			end
		until true
	end
	
	-- Use hiragana sort.
	text = require("Module:Hira-sortkey").makeSortKey(text, lang, sc)
	
	-- Run through Hani sort, to catch any stray kanji. This shouldn't happen but often does, and we still want to handle them sensibly in the time before the entry is fixed.
	local ret = Hani_sort(text, lang, sc)
	
	if not (lang == "mul" or ret == text) then
		track{"Jpan-sortkey/fallback", "Jpan-sortkey/fallback/" .. lang}
	end
	
	return ret
end

return export
"https://si.wiktionary.org/w/index.php?title=Module:Jpan-sortkey&oldid=164972" වෙතින් සම්ප්‍රවේශනය කෙරිණි