Module:category tree/poscatboiler/data/affixes and compounds
Appearance
- පහත දැක්වෙන උපදෙස්, Template:poscatboiler data submodule documentation මගින් ජනනය කොට ඇත. [සංස්කරණය කරන්න]
- ප්රයෝජනවත් සබැඳි: root page • root page’s subpages • සබැඳි • transclusions • testcases • sandbox
This data submodule defines part of Wiktionary's category structure.
For an introduction to the poscatboiler
system and a description of how to add or modify categories, see Module:category tree/poscatboiler/data/documentation.
local labels = {}
local raw_categories = {}
local handlers = {}
local m_sinhala = require("Module:sinhala")
-----------------------------------------------------------------------------
-- --
-- LABELS --
-- --
-----------------------------------------------------------------------------
labels["alliterative compounds"] = {
description = "{{{langname}}} noun phrases composed of two or more stems that alliterate.",
parents = {"සංයුක්ත යෙදුම්", "alliterative phrases"},
}
labels["antonymous compounds"] = {
description = "{{{langname}}} compounds in which one part is an antonym of the other.",
parents = {"dvandva compounds", sort = "antonym"},
}
labels["bahuvrihi compounds"] = {
description = "{{{langname}}} compounds in which the first part (A) modifies the second (B), and whose meaning follows a [[metonymic]] pattern: “<person> having a B that is A.”",
parents = {"සංයුක්ත යෙදුම්", "exocentric compounds"},
}
-- Add "compound POS" categories for various parts of speech.
local compound_poses = {
"adjectives",
"adverbs",
"conjunctions",
"determiners",
"interjections",
"නාම පද",
"numerals",
"particles",
"postpositions",
"උපසර්ග",
"prepositions",
"pronouns",
"proper nouns",
"suffixes",
"verbs",
}
for _, pos in ipairs(compound_poses) do
labels["සංයුක්ත " .. pos] = {
description = "{{{langname}}} " .. pos .. " composed of two or more stems.",
parents = {{name = "සංයුක්ත යෙදුම්", sort = " "}, pos},
}
end
labels["compound determinatives"] = {
description = "{{{langname}}} determinatives composed of two or more stems.",
parents = {"සංයුක්ත යෙදුම්", "determiners"},
}
labels["සංයුක්ත යෙදුම්"] = {
description = "{{{langname}}} terms composed of two or more stems.",
umbrella_parents = "භාෂාව අනුව යෙදුම්, නිරුක්ති උප ප්රවර්ග අනුව",
parents = {"යෙදුම්, නිරුක්තිය අනුව"},
}
labels["dvandva compounds"] = {
description = "{{{langname}}} terms composed of two or more stems whose stems could be connected by an 'and'.",
parents = {"සංයුක්ත යෙදුම්"},
}
labels["dvigu compounds"] = {
description = "{{{langname}}} [[tatpuruṣa]] compounds where the modifying member is a number",
parents = {"tatpurusa compounds"},
}
labels["endocentric compounds"] = {
description = "{{{langname}}} terms composed of two or more stems, one of which is the [[w:head (linguistics)|head]] of that compound.",
parents = {"සංයුක්ත යෙදුම්"},
}
labels["endocentric noun-noun compounds"] = {
description = "{{{langname}}} terms composed of two or more stems, one of which is the [[w:head (linguistics)|head]] of that compound.",
breadcrumb = "noun-noun",
parents = {"endocentric compounds", "සංයුක්ත යෙදුම්"},
}
labels["endocentric verb-noun compounds"] = {
description = "{{{langname}}} compounds in which the first element is a verbal stem, the second a nominal stem and the head of the compound.",
breadcrumb = "verb-noun",
parents = {"endocentric compounds", "verb-noun compounds"},
}
labels["exocentric compounds"] = {
description = "{{{langname}}} terms composed of two or more stems, none of which is the [[w:head (linguistics)|head]] of that compound.",
parents = {"සංයුක්ත යෙදුම්"},
}
labels["exocentric verb-noun compounds"] = {
description = "{{{langname}}} compounds in which the first element is a transitive verb, the second a noun functioning as its direct object, and whose referent is the person or thing doing the action.",
breadcrumb = "verb-noun",
parents = {"exocentric compounds", "verb-noun compounds"},
}
labels["karmadharaya compounds"] = {
description = "{{{langname}}} terms composed of two or more stems in which the main stem determines the case endings.",
parents = {"tatpurusa compounds"},
}
labels["itaretara dvandva compounds"] = {
description = "{{{langname}}} terms composed of two or more stems whose stems could be connected by an 'and'.",
breadcrumb = "itaretara",
parents = {"dvandva compounds"},
}
labels["rhyming compounds"] = {
description = "{{{langname}}} noun phrases composed of two or more stems that rhyme.",
parents = {"සංයුක්ත යෙදුම්", "rhyming phrases"},
}
labels["samahara dvandva compounds"] = {
description = "{{{langname}}} terms composed of two or more stems whose stems could be connected by an 'and'.",
breadcrumb = "samahara",
parents = {"dvandva compounds"},
}
labels["shitgibbons"] = {
description = "{{{langname}}} terms that consist of a single-syllable [[expletive]] followed by a two-syllable [[trochee]] that serves as a [[nominalizer]] or [[intensifier]].",
parents = {"endocentric compounds"},
}
labels["synonymous compounds"] = {
description = "{{{langname}}} compounds in which one part is a synonym of the other.",
parents = {"dvandva compounds", sort = "synonym"},
}
labels["tatpurusa compounds"] = {
description = "{{{langname}}} terms composed of two or more stems",
parents = {"සංයුක්ත යෙදුම්"},
}
labels["verb-noun compounds"] = {
description = "{{{langname}}} compounds in which the first element is a transitive verb, the second a noun functioning as its direct object, and whose referent is the person or thing doing the action, or an adjective describing such a person or thing.",
parents = {"verb-object compounds"},
}
labels["verb-object compounds"] = {
description = "{{{langname}}} compounds in which the first element is a transitive verb, the second a term (usually but not always a noun) functioning as its (normally direct) object, and whose referent is the person or thing doing the action, or an adjective describing such a person or thing.",
additional = "Examples in English are {{m|en|pickpocket|lit=someone who picks pockets}} and {{m|en|catch-all|lit=something that catches everything}}.",
parents = {"සංයුක්ත යෙදුම්"},
}
labels["verb-verb compounds"] = {
description = "{{{langname}}} compounds composed of two or more verbs in apposition, often either synonyms or antonyms, and whose referent refers to the result of performing those actions.",
parents = {"සංයුක්ත යෙදුම්"},
}
labels["vrddhi derivatives"] = {
description = "{{{langname}}} terms derived from a Proto-Indo-European root by the process of [[w:vṛddhi|vṛddhi]] derivation.",
parents = {"යෙදුම්, නිරුක්තිය අනුව"},
}
labels["vrddhi gerundives"] = {
description = "{{{langname}}} [[gerundive]]s derived from a Proto-Indo-European root by the process of [[w:vṛddhi|vṛddhi]] derivation.",
parents = {"vrddhi derivatives"},
}
labels["vyadhikarana compounds"] = {
description = "{{{langname}}} terms composed of two or more stems in which the non-main stem determines the case endings.",
parents = {"tatpurusa compounds"},
}
for _, fixtype in ipairs({"circumfix", "infix", "interfix", "prefix", "suffix",}) do
labels["යෙදුම්, " .. m_sinhala.sinhala(fixtype .. "es") .. " අනුව"] = {
description = "{{{langname}}} යෙදුම්, ඒවායේ " .. m_sinhala.sinhala(fixtype .. "es") .. " වලට අනුව කාණ්ඩ වලට වෙන්කොට ඇති.",
umbrella_parents = "භාෂාව අනුව යෙදුම්, නිරුක්ති උප ප්රවර්ග අනුව",
parents = {{name = "යෙදුම්, නිරුක්තිය අනුව", sort = fixtype}, m_sinhala.sinhala(fixtype .. "es")},
}
end
-- Add 'umbrella_parents' key if not already present.
for key, data in pairs(labels) do
-- NOTE: umbrella.parents overrides umbrella_parents if both are given.
if not data.umbrella_parents then
data.umbrella_parents = "Types of compound terms by language"
end
end
-----------------------------------------------------------------------------
-- --
-- RAW CATEGORIES --
-- --
-----------------------------------------------------------------------------
raw_categories["Types of compound terms by language"] = {
description = "Umbrella categories covering topics related to types of compound terms.",
additional = "{{{umbrella_meta_msg}}}",
parents = {
"ඡත්ර මෙටා ප්රවර්ග",
{name = "සංයුක්ත යෙදුම්", is_label = true, sort = " "},
{name = "භාෂාව අනුව යෙදුම්, නිරුක්ති උප ප්රවර්ග අනුව", sort = " "},
},
}
-----------------------------------------------------------------------------
-- --
-- HANDLERS --
-- --
-----------------------------------------------------------------------------
-----------------------------------------------------------------------------
------------------------------ Affix handlers -------------------------------
-----------------------------------------------------------------------------
table.insert(handlers, function(data)
local labelpref, pos, zz_term_and, affixtype, zz_and_id = data.label:match("^((.*), (.+) (.*) සහිත)(.*)$")
local term_and_id
if zz_term_and ~= nil then
term_and_id = zz_term_and
if zz_and_id ~= nil then
term_and_id = term_and_id .. zz_and_id
end
end
if labelpref ~= nil then
-- නව ආකාරය සඳහා අවශ්ය වෙනස
labelpref = labelpref:gsub(zz_term_and, "%%s")
end
if pos == "යෙදුම්" then
pos = "terms"
end
if affixtype == "ප්රත්ය" then
affixtype = "suffix"
end
if affixtype == "උපසර්ග" then
affixtype = "prefix"
end
if affixtype then
local term, id = term_and_id:match("^(.+) %(([^()]+)%)$")
term = term or term_and_id
-- Convert term/alt into affixes if needed
local desc = {
["prefix"] = ", %s උපසර්ගයෙන් ආරම්භ වන",
["suffix"] = ", %s ප්රත්යයෙන් අවසන් වන",
["circumfix"] = "bookended with the circumfix",
["infix"] = "spliced with the infix",
["interfix"] = "joined with the interfix",
-- Transfixes not supported currently.
-- ["transfix"] = "patterned with the transfix",
}
if not desc[affixtype] then
return nil
end
-- Here, {LANG} is replaced with the actual language, {TERM_AND_ID} with the actual term (or with 'TERM<id:ID>'
-- if there is an ID), {BASE} with '<var>base</var>', {BASE2} with '<var>base2</var>', {BASE_EXPL} with an
-- explanation of what "base" means, {BASE_BASE2_EXPL} with an explanation of what "base" and "base2" mean, and
-- {POS} with '|pos=POS' if there is a `pos` other than "terms", otherwise a blank string.
local what_categorizes = {
["prefix"] = "{{tl|af|{LANG}|{TERM_AND_ID}|{BASE}{POS}}} or {{tl|affix|{LANG}|{TERM_AND_ID}|{BASE}{POS}}} (හෝ වැඩි-කැමැත්තක්-නොදක්වන ආකාර වන {{tl|pre}} හෝ {{tl|prefix}}) මගින් සිදු කරයි. මෙහි {BASE_EXPL}",
["suffix"] = "{{tl|af|{LANG}|{BASE}|{TERM_AND_ID}{POS}}} or {{tl|affix|{LANG}|{BASE}|{TERM_AND_ID}{POS}}} (හෝ වැඩි-කැමැත්තක්-නොදක්වන ආකාර වන {{tl|suf}} හෝ {{tl|suffix}}) මගින් සිදු කරයි. මෙහි {BASE_EXPL}",
["circumfix"] = "{{tl|af|{LANG}|{BASE}|{TERM_AND_ID}{POS}}} or {{tl|affix|{LANG}|{BASE}|{TERM_AND_ID}{POS}}}, where {BASE_EXPL}",
["infix"] = "{{tl|infix|{LANG}|{BASE}|{TERM_AND_ID}{POS}}}, where {BASE_EXPL}",
["interfix"] = "{{tl|af|{LANG}|{BASE}|{TERM_AND_ID}{POS}|{BASE2}}} or {{tl|affix|{LANG}|{BASE}|{TERM_AND_ID}|{BASE2}{POS}}}, where {BASE_BASE2_EXPL}",
}
local args = require("Module:parameters").process(data.args, {
["alt"] = true,
["sc"] = true,
["sort"] = true,
["tr"] = true,
["ts"] = true,
})
local sc = data.sc or args.sc and require("Module:scripts").getByCode(args.sc, "sc") or nil
local m_affix = require("Module:affix")
-- Call make_affix to add display hyphens if they're not already present.
local _, display_term, lookup_term = m_affix.make_affix(term, data.lang, sc, affixtype, nil, true)
local _, display_alt = m_affix.make_affix(args.alt, data.lang, sc, affixtype)
local _, display_tr = m_affix.make_affix(args.tr, data.lang, require("Module:scripts").getByCode("Latn"), affixtype)
local _, display_ts = m_affix.make_affix(args.ts, data.lang, require("Module:scripts").getByCode("Latn"), affixtype)
local m_script_utilities = require("Module:script utilities")
local id_text = id and " (" .. id .. ")" or ""
-- Compute parents.
local parents = {}
if id then
if pos == "words" then
-- don't allow formerly-named categories with "words"
return nil
end
if pos == "terms" then
table.insert(parents, {name = (labelpref):format(term), sort = id, args = args})
else
table.insert(parents, {name = ("යෙදුම්, %s %s සහිත"):format(term_and_id, m_sinhala.sinhala(affixtype .. "es")), sort = id .. ", " .. pos, args = args})
table.insert(parents, {name = (labelpref):format(term), sort = id, args = args})
end
elseif pos == "words" then
-- don't allow formerly-named categories with "words"
return nil
elseif pos ~= "terms" then
table.insert(parents, {name = ("යෙදුම්, %s %s සහිත"):format(term, m_sinhala.sinhala(affixtype .. "es")), sort = pos, args = args})
end
table.insert(parents, {name = "යෙදුම්, " .. m_sinhala.sinhala(affixtype .. "es") .. " අනුව", sort = (data.lang:makeSortKey((data.lang:makeEntryName(args.sort or term))))})
-- If other affixes are mapped to this one, show them.
local additional
if data.lang then
local langcode = data.lang:getCode()
if m_affix.langs_with_lang_specific_data[langcode] then
local langdata = mw.loadData(m_affix.affix_lang_data_module_prefix .. langcode)
local variants = {}
if langdata.affix_mappings then
for variant, canonical in pairs(langdata.affix_mappings) do
-- Above, we converted the stripped link term as we received it to the lookup form, so we
-- can look up the variants that are mapped to this term. Once we find them, map them to
-- display form.
local is_variant = false
if type(canonical) == "table" then
for _, canonical_v in pairs(canonical) do
if canonical_v == lookup_term then
is_variant = true
break
end
end
else
is_variant = canonical == lookup_term
end
if is_variant then
local _, display_variant = m_affix.make_affix(variant, data.lang, sc, affixtype)
table.insert(variants, "{{m|" .. langcode .. "|" .. display_variant .. "}}")
end
end
if #variants > 0 then
table.sort(variants)
additional = ("This category also includes terms %sed with %s."):format(affixtype,
require("Module:table").serialCommaJoin(variants))
end
end
end
end
if data.lang then
local what_categorizes_msg = what_categorizes[affixtype]
if not what_categorizes_msg then
error(("Internal error: No what_categorizes value for affixtype '%s' for label '%s', lang '%s'"):
format(affixtype, data.label, data.lang:getCode()))
end
what_categorizes_msg = "මෙම ප්රවර්ගය තුළට යෙදුම් එක් කිරීම " .. (what_categorizes_msg
:gsub("{LANG}", data.lang:getCode())
:gsub("{TERM_AND_ID}", require("Module:string utilities").replacement_escape(
id and ("%s<id:%s>"):format(term, id) or term))
:gsub("{POS}", require("Module:string utilities").replacement_escape(
pos == "terms" and "" or ("|pos=%s"):format(pos)))
:gsub("{BASE}", "<var>base</var>")
:gsub("{BASE2}", "<var>base2</var>")
:gsub("{BASE_EXPL}", "<code><var>base</var></code> යනු යෙදුම ව්යුත්පන්න වී ඇති මූලික ලෙමාව වෙයි")
:gsub("{BASE_BASE2_EXPL}", "<code><var>base</var></code> and <code><var>base2</var></code> are the " ..
"යෙදුම ව්යුත්පන්න වී ඇති මූලික ලෙමා")
) .. "."
if additional then
additional = additional .. "\n\n" .. what_categorizes_msg
else
additional = what_categorizes_msg
end
end
return {
description = "{{{langname}}} " .. m_sinhala.sinhala(pos) .. " " .. (desc[affixtype]):format(require("Module:links").full_link({
lang = data.lang, term = display_term, alt = display_alt, sc = sc, id = id, tr = display_tr, ts = display_ts}, "term")) .. ".",
additional = additional,
breadcrumb = pos == "terms" and m_script_utilities.tag_text(display_alt or display_term, data.lang, sc, "term") .. id_text or pos,
displaytitle = "{{{langname}}} " .. (labelpref):format(m_script_utilities.tag_text(term, data.lang, sc, "term")) .. id_text,
parents = parents,
umbrella = false,
}, true -- true = args handled
end
end)
return {LABELS = labels, RAW_CATEGORIES = raw_categories, HANDLERS = handlers}