Jump to content

Module:cop-sortkey

Wiktionary වෙතින්

This module will sort කොප්ටික් භාෂාව text. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{sortkey}}. Within a module, use Module:languages#Language:makeSortKey.

For testcases, see Module:cop-sortkey/testcases.

Functions

makeSortKey(text, lang, sc)
Generates a sortkey for a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the sort fails, returns nil.
  • ⲁⲗⲁⲕ (ΑΛΚ1Α )
  • ⲁⲗⲟⲕ (ΑΛΚ1Ο )
  • ⲁⲗⲕⲉ (ΑΛΚ2Ε )
  • ⲁⲗⲓⲕⲓ (ΑΛΚ2ΙΙ )
  • ⲁⲗⲕⲟⲩ (ΑΛΚ2Υ )
  • ⲟⲩⲣ (ΥΡ1 )
  • ⲟⲩⲏⲣ (ΥΡ1Η )
  • ⲟⲩⲱⲣ (ΥΡ1Ω )
  • ⲟⲩⲉⲓⲣⲉ (ΥΡ2ΙΕ )
  • ⲟⲩⲟⲣⲉ (ΥΡ2ΟΕ )
  • ⲟⲩⲣⲱ (ΥΡ2Ω )
  • ⲁⲗⲟⲩ (ΑΛ2Υ )
  • ⲁⲗⲱ (ΑΛ2Ω )
  • ⲁⲗⲧⲏⲁⲥ (ΑΛΤΣ1ΗΑ )
  • ⲁⲗⲁⲩ (ΑΛΥ1Α )
  • ⲁⲗⲏⲟⲩ (ΑΛΥ1Η )
  • ⲁⲗⲓⲟⲩⲓ (ΑΛΥ2ΙΙ )
  • ⲁⲗⲱⲟⲩⲉ (ΑΛΥ2ΩΕ )
  • ⲁⲗϣⲱⲟⲩ (ΑΛϢΥ1Ω )
  • ⲙⲁⲧⲟⲩ (ΜΤ2ΑΥ )
  • ⲙⲁⲩ (ΜΥ1Α )
  • ⲙⲁⲁⲩ (ΜΥ1ΑΑ )
  • ⲙⲟⲟⲩ (ΜΥ1Ο )
  • ⲙⲁⲩⲁⲁ- (ΜΥ2ΑΑΑ )
  • ⲙⲉⲉⲩⲉ (ΜΥ2ΕΕΕ )
  • ⲙⲁⲟⲩⲥⲉ (ΜΥΣ2ΑΕ )
  • ⲙⲟⲩⲟⲩⲧ (ΜΥΤ1Υ )
  • ⲙⲫⲏ (ΜΦ2Η )
  • ⲧⲁⲗ (ΤΛ1Α )
  • ⲧⲏⲗ⸗ (ΤΛ1Η )
  • ⲧⲁⲗⲟ (ΤΛ2ΑΟ )
  • ϯⲗⲓ (ΤΛ2ΙΙ )
  • ⲧⲱⲓⲗⲓ (ΤΛ2ΩΙΙ )
  • ⲕⲱ (Κ2Ω )
  • ⲕⲱ ⲉⲃⲟⲗ (Κ2Ω ΕΒΛ1Ο )
  • ˋϣⲗⲏⲗ (ϢΛΛ1Η )

export = {}

local match = mw.ustring.match
local function ugsub(text, regex, replacement)
	local out = mw.ustring.gsub(text, regex, replacement)
	return out
end

local alphabet = "ⲁⲃⲅⲇⲉⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱϣϥⳉϧϩϫϭw"
local vowels = "ⲁⲉⲏⲓⲟⲩⲱ"
local vowel = "[" .. vowels .. "]"
local consonants = ugsub(alphabet, vowel, "")
local consonant = "[" .. consonants .. "]"

local replacements = {
	["ⲟⲩ"] = "ⲩ",
	["ⳤ"] = "ⲕⲉ",
	["ⲉⲓ"] = "ⲓ",
	["ϯ"] = "ⲧⲓ",
	["-"] = "",
	["⸗"] = "",
	["ˋ"] = "",
}

local CopticToGreek = {
	["ⲁ"] = "α",
	["ⲃ"] = "β",
	["ⲅ"] = "γ",
	["ⲇ"] = "δ",
	["ⲉ"] = "ε",
	["ⲍ"] = "ζ",
	["ⲏ"] = "η",
	["ⲑ"] = "θ",
	["ⲓ"] = "ι",
	["ⲕ"] = "κ",
	["ⲗ"] = "λ",
	["ⲙ"] = "μ",
	["ⲛ"] = "ν",
	["ⲝ"] = "ξ",
	["ⲟ"] = "ο",
	["ⲡ"] = "π",
	["ⲣ"] = "ρ",
	["ⲥ"] = "σ",
	["ⲧ"] = "τ",
	["ⲩ"] = "υ",
	["ⲫ"] = "φ",
	["ⲭ"] = "χ",
	["ⲯ"] = "ψ",
	["ⲱ"] = "ω",
}

function export.makeSortKey(text, lang, sc)
	if not text then
		return nil
	elseif sc and sc ~= "Copt" then
		return mw.ustring.upper(text)
	end
	
	local str_gsub = string.gsub
	
	text = mw.ustring.lower(text)
	
	for letter, replacement in pairs(replacements) do
		text = str_gsub(text, letter, replacement)
	end
	
	local origText = text
	
	text = ugsub(text, "ⲩ(" .. vowel .. ")", "w%1")
	text = ugsub(text, "(" .. vowel .. ")ⲩ", "%1w")
	
--	mw.log(origText, text)
	
	local sort = {}
	
	for word in mw.ustring.gmatch(text, "%S+") do
		-- Add initial vowel (if any).
		table.insert(sort, match(word, "^" .. vowel) )
		-- Add consonants (in order).
		table.insert(sort, ugsub(word, vowel .. "+", ""))
		
		--[[
			Add the number "1" if word ends in consonant.
			"1" sorts before Greek–Coptic and Coptic Unicode blocks.
		]]
		if mw.ustring.match(word, consonant .. "$") then
			table.insert(sort, "1")
		elseif mw.ustring.match(word, vowel .. "$") then
			table.insert(sort, "2")
		end
		
		-- Get non-initial vowels (in order) by removing initial vowel and all consonants.
		table.insert(sort, ugsub(ugsub(word, "^" .. vowel, ""), consonant, ""))
		
		table.insert(sort, " ")
	end
	
	sort = table.concat(sort)
	
	sort = str_gsub(sort, "w", "ⲩ")
	
	--[[
		Convert Greek-derived Coptic characters to Greek ones.
		Otherwise, the uniquely Coptic letters would sort first, because
		they were added to Unicode earlier.
		ϣϥⳉϧϩϫϭ		ⲁⲃⲅⲇⲉⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱ

		αβγδεζηθικλμνξοπρστυφχψω	ϣϥⳉϧϩϫϭ		
	]]
	sort = str_gsub(sort, "[\194-\244][\128-\191]+", CopticToGreek)
	
	return mw.ustring.upper(sort)
end

local lang = require("Module:languages").getByCode("cop")
local sc = require("Module:scripts").getByCode("Copt")
local function tag(text)
	return require("Module:script utilities").tag_text(text, lang, sc)
end

function export.showSorting(frame)
	local terms = {}
	
	for i, term in ipairs(frame.args) do
		table.insert(terms, term)
	end
	
	local function comp(term1, term2)
		return export.makeSortKey(term1) < export.makeSortKey(term2)
	end
	
	table.sort(terms, comp)
	
	for i, term in pairs(terms) do
		terms[i] = "\n* " .. tag(term) .. " (<code>" .. export.makeSortKey(term) .. "</code>)"
	end
	
	return table.concat(terms)
end

return export
"https://si.wiktionary.org/w/index.php?title=Module:cop-sortkey&oldid=168091" වෙතින් සම්ප්‍රවේශනය කෙරිණි