Jump to content

Module:etymon/data

Wiktionary වෙතින්


local export = {}

export.STATUS = {
	OK = "ok",
	INLINE = "inline",
	MISSING = "missing",
	REDLINK = "redlink",
	AMBIGUOUS = "ambiguous",
}

export.TRANSITIVE = {
	ALWAYS = "always",                                            -- always recurse into children
	NEVER = "never",                                              -- never recurse into children
	CROSS_LANG = "cross_lang",                                    -- only recurse when source lang differs from target lang (but pos chain continues)
	CROSS_LANG_NO_INTERNAL_SOURCE = "cross_lang_no_internal_source", -- like CROSS_LANG, but source breaks for internal derivations in the same language context
}

-- Deep merge tables (nested tables are merged recursively, later values override earlier)
local function deep_merge(...)
	local result = {}
	for _, t in ipairs({ ... }) do
		for k, v in pairs(t) do
			if type(v) == "table" and type(result[k]) == "table" then
				result[k] = deep_merge(result[k], v)
			else
				result[k] = v
			end
		end
	end
	return result
end

local function make_glossary_link(term, display_text)
	if not term then return display_text end
	return "[[Appendix:Glossary#" .. term:gsub(" ", "_") .. "|" .. display_text .. "]]"
end

-- Extract base word and connector from text like "Borrowed from" or "calque of"
local function split_glossary_text(text)
	for _, pattern in ipairs({ "^(.-)(%s+[Oo][Ff])$", "^(.-)(%s+[Ff][Rr][Oo][Mm])$" }) do
		local base, rest = text:match(pattern)
		if base then return base, rest end
	end
	return text, ""
end

local TRANSITIVE = export.TRANSITIVE

local function create_keyword(opts)
	local entry = {
		is_group = opts.is_group or false,
		abbrev = opts.abbrev,
		glossary = opts.glossary,

		transitive = opts.transitive or TRANSITIVE.ALWAYS, -- default "always"
		inherited_chain = opts.inherited_chain or false,
		affix_categories = opts.affix_categories or false,
		borrowing_type = opts.borrowing_type,
		specialized_borrowing = opts.specialized_borrowing,
		toplevel_category = opts.toplevel_category,
		no_child_categories = opts.no_child_categories or false,
		source_category_type = opts.source_category_type,
		invisible = opts.invisible or false,
		pos_override = opts.pos_override,
		new_sentence = opts.new_sentence or false,
		separate_clause = opts.separate_clause or false,
		aliases = opts.aliases,
	}

	-- Only set text/phrase for visible keywords
	if not opts.invisible then
		entry.phrase = opts.phrase
		if opts.text then
			if opts.glossary then
				local base_word, rest = split_glossary_text(opts.text)
				entry.text = make_glossary_link(opts.glossary, base_word) .. rest
			else
				entry.text = opts.text
			end
		end
	end

	return entry
end

-- Shared defaults for keyword groups
local DEFAULTS = {
	-- Keywords that pass through inheritance chain
	inheritance = {
		transitive = TRANSITIVE.ALWAYS,
		inherited_chain = true,
	},
	-- Standard transitive derivation
	transitive = {
		transitive = TRANSITIVE.ALWAYS,
	},
	-- Standard for internal derivations: transitive across languages, but not within them
	internal_derivation = {
		transitive = TRANSITIVE.CROSS_LANG,
	},
	-- Borrowing keywords
	borrowing = {
		transitive = TRANSITIVE.ALWAYS,
	},
	-- Affix group keywords (compound words, blends, etc.)
	affix_group = {
		is_group = true,
		transitive = TRANSITIVE.CROSS_LANG,
		affix_categories = true,
	},
	-- Calque-like keywords (calque, partial calque, semantic loan)
	calque_like = {
		transitive = TRANSITIVE.NEVER,
		no_child_categories = true,
		new_sentence = true,
	},
	-- Non-transitive influence
	influence_like = {
		transitive = TRANSITIVE.NEVER,
		no_child_categories = true,
	},
}

export.keywords = {
	--
	-- Inheritance keywords
	--
	["from"] = create_keyword(deep_merge(DEFAULTS.internal_derivation, {
		text = "From", phrase = "from",
	})),
	["inherited"] = create_keyword(deep_merge(DEFAULTS.inheritance, {
		text = "Inherited from",
		phrase = "from",
		glossary = "inherited",
		aliases = { "inh" },
	})),

	--
	-- Basic derivation keywords
	--
	["uder"] = create_keyword(deep_merge(DEFAULTS.transitive, {
		text = "From",
		phrase = "from",
		toplevel_category = "undefined derivations",
	})),
	["derived"] = create_keyword(deep_merge(DEFAULTS.transitive, {
		text = "Derived from",
		phrase = "from",
		abbrev = "der.",
		glossary = "derived terms",
		aliases = { "der" },
	})),

	--
	-- Affix/compound group keywords
	--
	["affix"] = create_keyword(deep_merge(DEFAULTS.affix_group, {
		text = "From",
		phrase = "from",
		aliases = { "af" },
	})),
	["blend"] = create_keyword(deep_merge(DEFAULTS.affix_group, {
		text = "Blend of",
		phrase = "a blend of",
		abbrev = "blend",
		glossary = "blend",
		toplevel_category = "blends",
	})),
	["univerbation"] = create_keyword(deep_merge(DEFAULTS.affix_group, {
		text = "Univerbation of",
		phrase = "univerbation of",
		abbrev = "univ.",
		glossary = "univerbation",
		toplevel_category = "univerbations",
		aliases = { "univ" },
	})),

	--
	-- Borrowing keywords
	--
	["bor"] = create_keyword(deep_merge(DEFAULTS.borrowing, {
		text = "Borrowed from",
		phrase = "borrowed from",
		abbrev = "bor.",
		glossary = "loanword",
		borrowing_type = "borrowed",
		aliases = { "borrowed" },
	})),
	["lbor"] = create_keyword(deep_merge(DEFAULTS.borrowing, {
		text = "Learned borrowing from",
		phrase = "a learned borrowing from",
		abbrev = "lbor.",
		glossary = "learned borrowing",
		specialized_borrowing = "learned",
	})),
	["obor"] = create_keyword(deep_merge(DEFAULTS.borrowing, {
		text = "Orthographic borrowing from",
		phrase = "an orthographic borrowing from",
		abbrev = "obor.",
		glossary = "orthographic borrowing",
		specialized_borrowing = "orthographic",
	})),
	["slbor"] = create_keyword(deep_merge(DEFAULTS.borrowing, {
		text = "Semi-learned borrowing from",
		phrase = "a semi-learned borrowing from",
		abbrev = "slbor.",
		glossary = "semi-learned borrowing",
		specialized_borrowing = "semi-learned",
	})),
	["ubor"] = create_keyword(deep_merge(DEFAULTS.borrowing, {
		text = "Unadapted borrowing from",
		phrase = "an unadapted borrowing from",
		abbrev = "ubor.",
		glossary = "unadapted borrowing",
		specialized_borrowing = "unadapted",
	})),

	--
	-- Calque-like keywords (non-transitive, start new sentence)
	--
	["calque"] = create_keyword(deep_merge(DEFAULTS.calque_like, {
		text = "Calque of",
		phrase = "a calque of",
		abbrev = "calq.",
		glossary = "calque",
		specialized_borrowing = "calque",
		aliases = { "cal", "clq" },
	})),
	["partial calque"] = create_keyword(deep_merge(DEFAULTS.calque_like, {
		text = "Partial calque of",
		phrase = "a partial calque of",
		abbrev = "pcalq.",
		glossary = "partial calque",
		specialized_borrowing = "partial-calque",
		aliases = { "pcal" },
	})),
	["semantic loan"] = create_keyword(deep_merge(DEFAULTS.calque_like, {
		text = "Semantic loan of",
		phrase = "a semantic loan of",
		abbrev = "sl.",
		glossary = "semantic loan",
		specialized_borrowing = "semantic-loan",
		aliases = { "sl" },
	})),

	--
	-- Influence keywords (non-transitive, separate clause)
	--
	["influence"] = create_keyword(deep_merge(DEFAULTS.influence_like, {
		text = "Influenced by",
		phrase = "influenced by",
		abbrev = "influ.",
		glossary = "contamination",
		separate_clause = true,
	})),

	--
	-- Morphological derivation keywords
	--
	["clipping"] = create_keyword(deep_merge(DEFAULTS.transitive, {
		text = "Clipping of",
		phrase = "clipping of",
		abbrev = "clip.",
		glossary = "clipping",
		toplevel_category = "clippings",
		aliases = { "clip" },
	})),
	["ellipsis"] = create_keyword(deep_merge(DEFAULTS.transitive, {
		text = "Ellipsis of",
		phrase = "ellipsis of",
		abbrev = "ellip.",
		glossary = "ellipsis",
		toplevel_category = "ellipses",
		aliases = { "ellip" },
	})),
	["back-formation"] = create_keyword(deep_merge(DEFAULTS.transitive, {
		text = "Back-formation from",
		phrase = "a back-formation from",
		abbrev = "bf.",
		glossary = "back-formation",
		toplevel_category = "back-formations",
		aliases = { "bf" },
	})),
	["transliteration"] = create_keyword(deep_merge(DEFAULTS.transitive, {
		text = "Transliteration of",
		phrase = "borrowed from",
		abbrev = "translit.",
		glossary = "transliteration",
		aliases = { "translit" },
	})),
	["vrd"] = create_keyword(deep_merge(DEFAULTS.transitive, {
		text = "Vṛddhi derivative of",
		phrase = "a vṛddhi derivative of",
		abbrev = "vṛd.",
		glossary = "vṛddhi derivative",
	})),
	["apheretic"] = create_keyword(deep_merge(DEFAULTS.transitive, {
		text = "Apheretic form of",
		phrase = "an apheretic form of",
		abbrev = "aph.",
		glossary = "apheresis",
		aliases = { "apheresis", "aphetic" },
	})),
	["denominal"] = create_keyword(deep_merge(DEFAULTS.transitive, {
		text = "Denominal verb from",
		phrase = "denominal verb from",
		abbrev = "denom.",
		glossary = "denominal",
		toplevel_category = "denominal verbs",
		aliases = { "denom" },
	})),
	["deverbal"] = create_keyword(deep_merge(DEFAULTS.transitive, {
		text = "Deverbal from",
		phrase = "deverbal from",
		abbrev = "deverb.",
		glossary = "deverbal",
		toplevel_category = "deverbals",
	})),
	["reduplication"] = create_keyword(deep_merge(DEFAULTS.transitive, {
		text = "Reduplication of",
		phrase = "reduplication of",
		abbrev = "redup.",
		glossary = "reduplication",
		toplevel_category = "reduplications",
		aliases = { "redup" },
	})),
	["abbreviation"] = create_keyword(deep_merge(DEFAULTS.transitive, {
		text = "Abbreviation of",
		phrase = "abbreviation of",
		abbrev = "abbr.",
		glossary = "abbreviation",
		aliases = { "abbr" },
	})),
	["acronym"] = create_keyword(deep_merge(DEFAULTS.transitive, {
		text = "Acronym of",
		phrase = "acronym of",
		abbrev = "acronym",
		glossary = "acronym",
		aliases = { "acro" },
	})),
	["initialism"] = create_keyword(deep_merge(DEFAULTS.transitive, {
		text = "Initialism of",
		phrase = "initialism of",
		abbrev = "init.",
		glossary = "initialism",
		aliases = { "init" },
	})),
	["metathesis"] = create_keyword(deep_merge(DEFAULTS.transitive, {
		text = "Metathesis of",
		phrase = "metathesis of",
		abbrev = "meta.",
		glossary = "metathesis",
		toplevel_category = "words derived through metathesis",
		aliases = { "meta" },
	})),

	--
	-- Invisible keywords (no text output)
	--
	["root"] = create_keyword {
		transitive = TRANSITIVE.ALWAYS,
		invisible = true,
		pos_override = "root",
	},
	["afeq"] = create_keyword(deep_merge(DEFAULTS.affix_group, {
		text = "From",
		phrase = "from",
		transitive = TRANSITIVE.NEVER,
		invisible = true,
	})),
}

local aliases_to_register = {}
local canonical_aliases = {}

for name, keyword_data in pairs(export.keywords) do
	if keyword_data.aliases then
		canonical_aliases[name] = keyword_data.aliases
		for _, alias in ipairs(keyword_data.aliases) do
			if export.keywords[alias] then
				error("Alias '" ..
				alias .. "' defined in keyword '" .. name .. "' collides with existing keyword '" .. alias .. "'.")
			end
			if aliases_to_register[alias] then
				error("Alias '" ..
				alias .. "' defined in keyword '" .. name .. "' is already claimed by another keyword.")
			end
			aliases_to_register[alias] = keyword_data
		end
		keyword_data.aliases = nil
	end
end

for alias, data in pairs(aliases_to_register) do
	export.keywords[alias] = data
end

--
-- Language exception presets
--
local EXCEPTION_PRESETS = {
	-- Fully disallowed: no tree, no text, no categories
	disallowed = {
		disallow = { tree = true, text = true },
		suppress_categories = true,
	},
	-- Suppress transliteration only
	no_translit = {
		suppress_tr = true,
	},
	-- Suppress all categories only
	no_categories = {
		suppress_categories = true,
	},
}

--[=[
Available exception options:
	disallow = {               Related options for disallowing output:
		tree                   Disallow etymology trees for this language
		text                   Disallow etymology text generation for this language
		ref                    Reference link shown when tree/text is disallowed
	}
	suppress_tr                Suppress transliteration in links
	suppress_categories        Suppress all category generation
	normalize_to               Normalize language code to a different code
	normalize_from_families    Apply normalization to languages in these families
	normalize_exclude_families Exclude these families from normalization
	keyword_overrides          Per-keyword categorisation overrides (e.g. { ["af"] = { transitive = TRANSITIVE.NEVER } })
]=]
local function create_exception(preset, overrides)
	local base = preset and EXCEPTION_PRESETS[preset] or {}
	return deep_merge(base, overrides or {})
end

export.config = {
	lang_exceptions = {

		["zh"] = create_exception("disallowed", {
			disallow = { ref = "[[Wiktionary:Beer parlour/2025/May#Template:etymon for Chinese]]" },
			suppress_tr = true,
			normalize_to = "zh",
			normalize_from_families = { "zhx" },
			normalize_exclude_families = { "qfa-cnt" },
		}),
	},
}

--
-- Propagate keyword overrides to aliases
--
if export.config.lang_exceptions then
	for lang_code, exception in pairs(export.config.lang_exceptions) do
		if exception.keyword_overrides then
			for canonical, aliases in pairs(canonical_aliases) do
				if exception.keyword_overrides[canonical] then
					local override_data = exception.keyword_overrides[canonical]
					for _, alias in ipairs(aliases) do
						if not exception.keyword_overrides[alias] then
							exception.keyword_overrides[alias] = override_data
						end
					end
				end
			end
		end
	end
end

return export
"https://si.wiktionary.org/w/index.php?title=Module:etymon/data&oldid=226941" වෙතින් සම්ප්‍රවේශනය කෙරිණි