Module:etymon/categories
පෙනුම
- මෙම module සතුව උපදෙස් උප පිටුවක් නොපවතියි. Please නිර්මාණය කරන්න.
- ප්රයෝජනවත් සබැඳි: root page • root page’s subpages • සබැඳි • transclusions • testcases • sandbox
local export = {}
local data = mw.loadData("Module:etymon/data")
local STATUS = data.STATUS
local TRANSITIVE = data.TRANSITIVE
local keywords = data.keywords
-- Evaluate whether a keyword is transitive for a given term
local function is_transitive(transitive_mode, page_lang, term_lang)
if transitive_mode == TRANSITIVE.ALWAYS then
return true
elseif transitive_mode == TRANSITIVE.NEVER then
return false
elseif transitive_mode == TRANSITIVE.CROSS_LANG then
return page_lang:getCode() ~= term_lang:getCode()
elseif transitive_mode == TRANSITIVE.CROSS_LANG_NO_INTERNAL_SOURCE then
return page_lang:getCode() ~= term_lang:getCode()
end
error("Unknown transitive mode: " .. tostring(transitive_mode))
end
-- Get keyword config with language-specific overrides
local function get_keyword_config(keyword, lang_exc)
local base_config = keywords[keyword]
if not base_config then
return nil -- Invalid keyword
end
local overrides = lang_exc and lang_exc.keyword_overrides and lang_exc.keyword_overrides[keyword]
if not overrides then
return base_config
end
-- Merge overrides into base config
local merged = {}
for k, v in pairs(base_config) do
merged[k] = v
end
for k, v in pairs(overrides) do
merged[k] = v
end
return merged
end
function export.get_cat_name(source)
local etymology_module = require("Module:etymology")
local _, cat_name = etymology_module.get_display_and_cat_name(source, true)
return cat_name
end
-- Normalize affix type aliases
local aftype_aliases = {
["pre"] = "prefix",
["suf"] = "suffix",
["in"] = "infix",
["inter"] = "interfix",
["circum"] = "circumfix",
["naf"] = "non-affix",
["root"] = "non-affix",
}
-- Collect affix categories from top-level group containers
local function collect_affix_categories(node, page_lang, available_etymon_ids, senseid_parent_etymon, lang_exc)
local affix_module = require("Module:affix")
local parts = {}
local part_index = 1
for _, container in ipairs(node.children or {}) do
local config = container.keyword_info
if config and config.affix_categories then
for _, term in ipairs(container.terms or {}) do
if not term.unknown_term then
local part_data = {
term = term.title,
tr = term.tr,
ts = term.ts,
alt = term.alt,
itemno = part_index,
orig_index = part_index
}
-- Determine affix type: explicit aftype > pos=root > auto-detect
local aftype = term.aftype
if aftype then
aftype = aftype_aliases[aftype] or aftype
part_data.type = aftype
elseif term.args and term.args.pos and term.args.pos == "root" then
part_data.type = "non-affix"
end
if term.lang:getCode() ~= page_lang:getCode() then
part_data.lang = term.lang
end
local target_ids = available_etymon_ids[term.target_key]
local has_multiple_ids = target_ids and #target_ids > 1
local id_exists_in_disambiguation = false
local matched_id = nil
-- Count available senseids for the target page
local senseid_count = 0
local target_prefix = term.target_key .. ":"
if senseid_parent_etymon then
for key, _ in pairs(senseid_parent_etymon) do
if key:sub(1, #target_prefix) == target_prefix then
senseid_count = senseid_count + 1
end
end
end
local has_multiple_senseids = senseid_count > 1
if term.id then
-- Check if user provided a valid senseid
local senseid_key = term.target_key .. ":" .. term.id
if senseid_parent_etymon and senseid_parent_etymon[senseid_key] then
if has_multiple_senseids then
-- Ambiguous senseid: use senseid
matched_id = term.id
id_exists_in_disambiguation = true
elseif has_multiple_ids then
-- Unique senseid but ambiguous etymon: use etymon ID
matched_id = term.etymon_id or term.id
id_exists_in_disambiguation = true
end
else
-- Check if user provided a valid etymon ID
if has_multiple_ids and target_ids then
for _, id_data in ipairs(target_ids) do
local stored_id = type(id_data) == "table" and id_data.id or id_data
if stored_id == term.id then
-- Ambiguous etymon: use etymon ID
id_exists_in_disambiguation = true
matched_id = term.id
break
end
end
end
-- Fallback: check resolved etymon_id (e.g. from previous steps)
if not id_exists_in_disambiguation and has_multiple_ids and term.etymon_id and target_ids then
for _, id_data in ipairs(target_ids) do
local stored_id = type(id_data) == "table" and id_data.id or id_data
if stored_id == term.etymon_id then
id_exists_in_disambiguation = true
matched_id = term.etymon_id
break
end
end
end
end
end
-- Use the matched ID if found
if term.override or id_exists_in_disambiguation then
part_data.id = matched_id or term.id
end
table.insert(parts, part_data)
part_index = part_index + 1
end
end
end
end
if #parts == 0 then return {} end
local affix_data = {
lang = page_lang,
parts = parts,
pos = "term",
sort_key = nil
}
local affix_categories = affix_module.get_affix_categories_only(affix_data)
local result = {}
for _, cat in ipairs(affix_categories) do
if type(cat) == "table" then
table.insert(result, cat.cat)
else
table.insert(result, cat)
end
end
return result
end
-- Add borrowing-related categories (top-level only)
local function collect_borrowing_categories(categories, page_lang, term, config)
local etymology_module = require("Module:etymology")
if config.borrowing_type == "borrowed" then
local temp_categories = {}
etymology_module.insert_borrowed_cat(temp_categories, page_lang, term.lang)
for _, cat in ipairs(temp_categories) do
categories[cat] = true
end
end
if config.specialized_borrowing then
local etymology_specialized_module = require("Module:etymology/specialized")
local result = etymology_specialized_module.specialized_borrowing {
bortype = config.specialized_borrowing,
lang = page_lang,
sources = { term.lang },
terms = { { lang = term.lang, term = "-" } },
notext = true,
nocat = false,
}
for cat_name in result:gmatch("%[%[Category:([^%]]+)%]%]") do
categories[cat_name] = true
end
end
end
-- Add source-based derivation categories (top-level only)
local function collect_source_derivation_categories(categories, page_lang, term, config)
if not config.source_category_type then
return
end
local etymology_module = require("Module:etymology")
local temp_categories = {}
etymology_module.insert_source_cat_get_display {
lang = page_lang,
source = term.lang,
categories = temp_categories,
borrowing_type = config.source_category_type,
nocat = false,
}
for _, cat in ipairs(temp_categories) do
categories[cat] = true
end
end
-- Add source language categories
local function collect_source_categories(categories, page_lang, term, chain, get_norm_lang_func)
local etymology_module = require("Module:etymology")
if page_lang:getCode() == get_norm_lang_func(term.lang):getCode() then
return
end
local temp_categories = {}
etymology_module.insert_source_cat_get_display {
lang = page_lang,
source = get_norm_lang_func(term.lang),
categories = temp_categories,
nocat = false,
}
for _, cat in ipairs(temp_categories) do
categories[cat] = true
end
if chain.inherited then
temp_categories = {}
etymology_module.insert_source_cat_get_display {
lang = page_lang,
source = get_norm_lang_func(term.lang),
categories = temp_categories,
borrowing_type = "terms inherited",
nocat = false,
}
for _, cat in ipairs(temp_categories) do
categories[cat] = true
end
end
end
-- Add root/word categories
local function collect_pos_categories(categories, page_lang, root_title, term, available_etymon_ids, chain,
get_norm_lang_func, lang_exc, keyword)
local pos_types = { root = "root", word = "word" }
-- Determine pos: from term's postype, keyword's pos_override, or args.pos
local pos
local config = get_keyword_config(keyword, lang_exc)
if term.postype then
-- Term-level postype modifier takes highest priority
pos = term.postype
elseif config and config.pos_override then
pos = config.pos_override
elseif type(term.args) == "table" and term.args.pos then
pos = term.args.pos
end
local pos_type = pos_types[pos]
if not pos_type or term.unknown_term then
return
end
-- Skip root/word categories for descendants of affix groups
if pos_type and chain.inside_affix then
return
end
local same_language = get_norm_lang_func(page_lang):getFullCode() == get_norm_lang_func(term.lang):getFullCode()
-- Skip self-references
if same_language and root_title == term.title then
return
end
-- Use makeEntryName to strip diacritics for category names
local entry_name = term.lang:makeEntryName(term.title)
local lang_name = page_lang:getCanonicalName()
local cat_name
if chain.passed_through then
local etymon_lang_name = export.get_cat_name(term.lang)
cat_name = lang_name .. " terms derived from the " .. etymon_lang_name .. " " .. pos_type .. " " .. entry_name
else
cat_name = lang_name .. " terms belonging to the " .. pos_type .. " " .. entry_name
end
-- Add ID disambiguation if needed (for roots/words: use etymon_id if resolved via senseid, otherwise use id)
local target_ids = available_etymon_ids[term.target_key]
local effective_id = term.etymon_id or term.id -- etymon_id if senseid, otherwise id is already an etymon id
if target_ids and effective_id then
local same_pos_count = 0
for _, id_data in ipairs(target_ids) do
if type(id_data) == "table" and id_data.pos == pos then
same_pos_count = same_pos_count + 1
end
end
if same_pos_count > 1 then
cat_name = cat_name .. " (" .. effective_id .. ")"
end
end
categories[cat_name] = true
end
-- Compute chain state for a term based on parent chain and keyword config
-- Hyphen patterns for affix detection (regular hyphen + script-specific)
local AFFIX_HYPHEN_PATTERN = "[%-%־ـ᠊]" -- regular hyphen, Hebrew maqqef, Arabic tatweel, Mongolian hyphen
-- Check if a term is an actual affix (not a non-affix member of an affix group)
local function is_actual_affix(term)
-- Check explicit aftype modifier
if term.aftype then
local normalized = aftype_aliases[term.aftype] or term.aftype
return normalized ~= "non-affix"
end
-- Check if pos=root (treated as non-affix)
if term.args and term.args.pos and term.args.pos == "root" then
return false
end
-- Auto-detect by hyphen: prefix ends with -, suffix starts with -, etc.
if term.title then
local title = term.title
-- Strip leading * for reconstructed terms before checking hyphens
title = title:gsub("^%*", "")
-- Check for hyphens at start or end (handles script-specific hyphens too)
if title:match("^" .. AFFIX_HYPHEN_PATTERN) or title:match(AFFIX_HYPHEN_PATTERN .. "$") then
return true
end
end
-- Default: not an affix
return false
end
local function compute_category_chain(parent_chain, config, page_lang, term_lang, get_norm_lang_func, parent_term_lang, term)
-- Track if we're inside an actual affix (for suppressing root categories on descendants)
-- Only set if the term is an actual affix (prefix, suffix, etc.), not a non-affix member
local inside_affix = parent_chain.inside_affix
if config.affix_categories and term and is_actual_affix(term) then
inside_affix = true
end
-- If no_child_categories is set, disable everything
if config.no_child_categories then
return {
passed_through = parent_chain.passed_through or page_lang:getCode() ~= get_norm_lang_func(term_lang):getCode(),
inherited = false,
source = false,
pos = false,
recurse = false,
inside_affix = inside_affix,
}
end
local term_is_transitive = is_transitive(config.transitive, page_lang, term_lang)
local new_source = parent_chain.source and term_is_transitive
-- For CROSS_LANG_NO_INTERNAL_SOURCE: track internal derivation language context
-- Check if this term is internal relative to parent term's language (if parent_term_lang provided)
-- or relative to page language (if no parent_term_lang)
local internal_lang = parent_chain.internal_lang
local is_internal_in_context = false
if config.transitive == TRANSITIVE.CROSS_LANG_NO_INTERNAL_SOURCE then
local check_lang = parent_term_lang or page_lang
local term_lang_code = get_norm_lang_func(term_lang):getCode()
local check_lang_code = get_norm_lang_func(check_lang):getCode()
if internal_lang then
-- Already in an internal derivation context: check if this term is also internal
is_internal_in_context = term_lang_code == internal_lang
else
-- Check if this term is internal relative to parent term (or page if no parent)
is_internal_in_context = term_lang_code == check_lang_code
end
end
-- Source chain behavior for CROSS_LANG_NO_INTERNAL_SOURCE
if config.transitive == TRANSITIVE.CROSS_LANG_NO_INTERNAL_SOURCE then
if is_internal_in_context then
-- Internal derivation
new_source = false
internal_lang = get_norm_lang_func(term_lang):getCode()
else
-- Cross-language
new_source = parent_chain.source and term_is_transitive
internal_lang = nil
end
end
local new_pos = parent_chain.pos
return {
passed_through = parent_chain.passed_through or page_lang:getCode() ~= get_norm_lang_func(term_lang):getCode(),
inherited = parent_chain.inherited and config.inherited_chain,
source = new_source,
pos = new_pos,
internal_lang = internal_lang,
recurse = new_source or new_pos,
inside_affix = inside_affix,
}
end
function export.render(opts)
opts = opts or {}
local data_tree = opts.data_tree
local page_lang = opts.page_lang
local available_etymon_ids = opts.available_etymon_ids
local senseid_parent_etymon = opts.senseid_parent_etymon
local get_norm_lang_func = opts.get_norm_lang_func
local lang_exc = opts.lang_exc
local categories = {}
local seen = {}
local lang_name = page_lang:getCanonicalName()
local root_title = data_tree.title
-- Collect the tree recursively
local function collect(node, parent_chain, is_toplevel)
-- Avoid processing same node twice
if not node.unknown_term and node.title then
local key = node.lang:getFullCode() .. ":" .. (node.title or "") .. ":" .. (node.id or "")
if seen[key] then return end
seen[key] = true
end
-- Collect affix categories at top level only
if is_toplevel then
local affix_cats = collect_affix_categories(node, page_lang, available_etymon_ids, senseid_parent_etymon, lang_exc)
for _, cat in ipairs(affix_cats) do
categories[lang_name .. " " .. cat] = true
end
end
-- Process each container
for _, container in ipairs(node.children or {}) do
local keyword = container.keyword
local config = get_keyword_config(keyword, lang_exc)
-- Skip invalid keywords
if config then
-- Process each term in the container
for _, term in ipairs(container.terms or {}) do
local term_chain = compute_category_chain(parent_chain, config, page_lang, term.lang, get_norm_lang_func, node.lang, term)
local no_child_categories = config.no_child_categories == true
local term_is_transitive = is_transitive(config.transitive, page_lang, term.lang)
-- Top-level only processing
if is_toplevel then
-- Missing/ambiguous etymon tracking
if not term.unknown_term and (term.status == STATUS.MISSING or term.status == STATUS.REDLINK) then
categories[lang_name .. " entries referencing missing etymons"] = true
end
if not term.unknown_term and term.status == STATUS.AMBIGUOUS then
categories[lang_name .. " entries referencing ambiguous etymons"] = true
end
-- Top-level category (e.g., "undefined derivations")
if config.toplevel_category then
categories[lang_name .. " " .. config.toplevel_category] = true
end
-- Borrowing categories (bor, lbor, slbor, ubor, obor)
if config.borrowing_type or config.specialized_borrowing then
collect_borrowing_categories(categories, page_lang, term, config)
end
-- Borrowing categories from <bor> modifier on :af terms
if keyword == "af" and term.bor then
local bor_config = { borrowing_type = "borrowed" }
collect_borrowing_categories(categories, page_lang, term, bor_config)
end
-- Source-based derivation categories (sl, calque, pcal)
if config.source_category_type then
collect_source_derivation_categories(categories, page_lang, term, config)
end
-- Skip all child categorisation if no_child_categories is set
if not no_child_categories then
-- Source categories only if transitive
if term_is_transitive then
collect_source_categories(categories, page_lang, term, term_chain, get_norm_lang_func)
end
-- Pos categories always (unless no_child_categories)
collect_pos_categories(categories, page_lang, root_title, term, available_etymon_ids, term_chain,
get_norm_lang_func, lang_exc, keyword)
end
else
-- Below top level, respect the parent chain
if parent_chain.source then
collect_source_categories(categories, page_lang, term, term_chain, get_norm_lang_func)
end
if parent_chain.pos then
collect_pos_categories(categories, page_lang, root_title, term, available_etymon_ids, term_chain,
get_norm_lang_func, lang_exc, keyword)
end
end
-- Recurse into term's children if needed and status allows
if term_chain.recurse and (term.status == STATUS.OK or term.status == STATUS.INLINE) then
collect(term, term_chain, false)
end
end
end
end
end
-- Initial chain state
local initial_chain = {
passed_through = false,
inherited = true,
source = true,
pos = true,
internal_lang = nil,
recurse = true,
inside_affix = false,
}
collect(data_tree, initial_chain, true)
local cat_list = {}
for cat in pairs(categories) do
table.insert(cat_list, cat)
end
return cat_list
end
return export