Jump to content

Module:etymon/text

Wiktionary වෙතින්


local export = {}

local data = mw.loadData("Module:etymon/data")
local STATUS = data.STATUS

local references_module = "Module:references"

function export.render(opts)
	opts = opts or {}
	local data_tree = opts.data_tree
	local format_term_func = opts.format_term_func
	local max_depth = opts.max_depth
	local stop_at_blue_link = opts.stop_at_blue_link
	local curr_page = opts.curr_page
	local nodot = opts.nodot
	local stop_at_lang = opts.stop_at_lang

	local children = data_tree.children
	if not children or #children == 0 then
		return ""
	end

	local top_l2 = data_tree.lang:getFullCode() .. ":" .. curr_page

	-- Get refs for a term
	local function get_term_refs(term, term_lang, depth)
		local term_l2 = term_lang:getFullCode() .. ":" .. curr_page
		if term.parsed_ref and (depth == 1 or term_l2 == top_l2) then
			return require(references_module).format_references(term.parsed_ref)
		end
		return ""
	end

	-- Build a text part for a single term
	local function build_term_part(term, current_lang, depth)
		local text = ""
		local new_lang = current_lang
		local lang_changed = term.lang:getCanonicalName() ~= current_lang:getCanonicalName()

		-- Use centralized format_term (handles suppress_term, unknown_term, and regular terms)
		local term_text = format_term_func(term)

		if lang_changed then
			new_lang = term.lang
			if term_text then
				text = term.lang:makeWikipediaLink() .. " " .. term_text
			else
				-- suppress_term with language change: show only language
				text = term.lang:makeWikipediaLink()
			end
		else
			text = term_text or ""
		end

		return {
			type = "term",
			text = text,
			refs = get_term_refs(term, new_lang, depth),
			lang = new_lang,
			is_uncertain = term.is_uncertain or false,
		}
	end

	-- Build text parts for a container
	local function build_container_part(container, node, depth, allow_continuation)
		local keyword_info = container.keyword_info
		local keyword_modifiers = container.keyword_modifiers or {}
		local terms = container.terms or {}

		if not keyword_info or keyword_info.invisible or #terms == 0 then
			return nil
		end

		local is_group = keyword_info.is_group
		local keyword_uncertain = keyword_modifiers.unc or false

		-- Determine text and phrase (allowing for overrides)
		local intro_text = keyword_info.text
		local phrase = keyword_info.phrase

		if keyword_modifiers.text then
			-- User-provided override: assumed to be lowercase
			phrase = keyword_modifiers.text
			-- Auto-capitalize for intro text (e.g., "derived from" -> "Derived from")
			intro_text = mw.ustring.upper(phrase:sub(1, 1)) .. phrase:sub(2)
		end

		-- Get keyword references
		local keyword_refs = ""
		if keyword_modifiers.ref then
			local parsed_keyword_refs = require(references_module).parse_references(keyword_modifiers.ref)
			if parsed_keyword_refs and parsed_keyword_refs ~= "" then
				keyword_refs = require(references_module).format_references(parsed_keyword_refs)
			end
		end

		-- Build term parts
		local term_parts = {}
		local current_lang = node.lang

		for _, term in ipairs(terms) do
			local term_part = build_term_part(term, current_lang, depth)
			if term_part.text ~= "" then
				table.insert(term_parts, term_part)
				current_lang = term_part.lang
			end
		end

		-- Check uncertainty distribution
		local uncertain_count = 0
		for _, term_part in ipairs(term_parts) do
			if term_part.is_uncertain then
				uncertain_count = uncertain_count + 1
			end
		end

		-- If keyword itself is uncertain, treat all terms as uncertain
		local all_uncertain = keyword_uncertain or (uncertain_count == #term_parts and #term_parts > 0)
		local has_mixed_uncertainty = not keyword_uncertain and uncertain_count > 0 and uncertain_count < #term_parts

		-- Check if there are more steps (only if continuation is allowed)
		local has_more_steps = false
		local next_node = nil
		local first_term = terms[1]

		-- Check if we should stop at this language
		local reached_stop_lang = false
		if stop_at_lang then
			for _, term in ipairs(terms) do
				if term.lang and term.lang:getCode() == stop_at_lang then
					reached_stop_lang = true
					break
				end
			end
		end

		if allow_continuation and not is_group and #terms == 1 and not reached_stop_lang then
			local first_term_children = first_term.children
			if first_term_children and #first_term_children > 0 and (not max_depth or depth < max_depth) then
				local next_container = first_term_children[1]
				local next_keyword_info = next_container and next_container.keyword_info
				if not (next_keyword_info and next_keyword_info.invisible) then
					if stop_at_blue_link then
						if first_term.status ~= STATUS.OK then
							has_more_steps = true
							next_node = first_term
						end
					else
						has_more_steps = true
						next_node = first_term
					end
				end
			end
		end

		return {
			type = "container",
			intro_text = intro_text,
			phrase = phrase,
			is_uncertain = all_uncertain,
			has_mixed_uncertainty = has_mixed_uncertainty,
			term_parts = term_parts,
			is_group = is_group,
			has_more_steps = has_more_steps,
			next_node = next_node,
			new_sentence = keyword_info.new_sentence or false,
			separate_clause = keyword_info.separate_clause or false,
			conj = keyword_modifiers.conj, -- custom conjunction: "and", "or", "and/or", etc.
			lit = keyword_modifiers.lit,
			keyword_refs = keyword_refs,
		}
	end

	-- Build the full tree of text parts
	local function build_text_tree(node, depth, allow_continuation)
		local containers = node.children
		if not containers or #containers == 0 then
			return nil
		end

		local container_parts = {}

		-- Count only visible containers
		local visible_container_count = 0
		for _, container in ipairs(containers) do
			local keyword_info = container.keyword_info
			if not (keyword_info and keyword_info.invisible) then
				visible_container_count = visible_container_count + 1
			end
		end

		-- If there are multiple visible containers at this level, don't allow continuation for any
		local has_multiple_containers = visible_container_count > 1
		local should_allow_continuation = allow_continuation and not has_multiple_containers

		for _, container in ipairs(containers) do
			local part = build_container_part(container, node, depth, should_allow_continuation)
			if part then
				-- Recursively build children if there are more steps
				if part.has_more_steps and part.next_node then
					part.continuation = build_text_tree(part.next_node, depth + 1, true)
				end
				table.insert(container_parts, part)
			end
		end

		if #container_parts == 0 then
			return nil
		end

		return {
			type = "tree",
			container_parts = container_parts,
			depth = depth,
		}
	end

	-- Check if tree has mixed joining types
	local function check_complexity(tree)
		if not tree then return nil end

		local parts = tree.container_parts
		if #parts <= 1 then
			-- Single container
			if parts[1] and parts[1].continuation then
				return check_complexity(parts[1].continuation)
			end
			return nil
		end

		-- Multiple containers
		local has_or_join = false
		local has_new_sentence = false
		local has_separate_clause = false

		for i = 2, #parts do
			local part = parts[i]
			-- Ignore etydate parts for complexity checks
			if part.type ~= "etydate" then
				if part.new_sentence then
					has_new_sentence = true
				elseif part.separate_clause then
					has_separate_clause = true
				else
					has_or_join = true
				end
			end
		end

		local join_type_count = 0
		if has_or_join then join_type_count = join_type_count + 1 end
		if has_new_sentence then join_type_count = join_type_count + 1 end
		if has_separate_clause then join_type_count = join_type_count + 1 end

		if join_type_count > 1 then
			error(
				"Cannot generate etymology text: mixed joining styles (e.g., alternatives joined with 'or' cannot be combined with calques or influences in the same list).")
		end

		return nil
	end

	-- Analyze tree and assign punctuation
	local function analyze_punctuation(tree, is_toplevel)
		if not tree then return end

		local parts = tree.container_parts
		local num_parts = #parts

		for i, part in ipairs(parts) do
			local is_first = (i == 1)
			local is_last = (i == num_parts)
			local next_part = parts[i + 1]

			-- Analyze term punctuation within container
			-- Terms use Oxford comma style: "A, B, or C"
			-- Custom conjunction can be specified via conj modifier (e.g., "and/or", "and")
			local num_terms = #part.term_parts
			local term_conj = part.conj or "or" -- default to "or"
			for j, term_part in ipairs(part.term_parts) do
				local is_last_term = (j == num_terms)

				if part.is_group then
					-- Group: terms joined with " + "
					term_part.joiner = is_last_term and "" or " + "
				elseif num_terms > 1 then
					-- Multiple terms not in a group: Oxford comma style
					if is_last_term then
						term_part.joiner = ""
					elseif j == num_terms - 1 then
						-- Second to last term
						if num_terms == 2 then
							term_part.joiner = " " .. term_conj .. " "
						else
							term_part.joiner = ", " .. term_conj .. " "
						end
					else
						term_part.joiner = ", "
					end
				else
					-- Single term
					term_part.joiner = ""
				end
			end

			-- Determine container punctuation based on what comes next
			if part.continuation then
				-- Has continuation
				part.punctuation = ","
				-- Recursively analyze continuation
				analyze_punctuation(part.continuation, false)
			elseif is_last then
				-- Last container
				part.punctuation = (is_toplevel and nodot) and "" or "."
			elseif next_part and next_part.new_sentence then
				-- Next container starts a new sentence
				part.punctuation = "."
			elseif next_part and next_part.separate_clause then
				-- Next container is a separate clause
				part.punctuation = ","
			else
				-- Not last, next is joined with "or"
				-- Containers use repeated "or" style: "A, or B, or C"
				part.punctuation = ","
			end

			-- Determine joiner to next part
			-- Containers use repeated "or" style: ", or" between each
			-- Custom conjunction can be specified via conj modifier
			local container_conj = part.conj or "or" -- default to "or"
			if not is_last then
				if next_part and next_part.new_sentence then
					-- New sentence
					part.joiner = " "
				elseif next_part and next_part.separate_clause then
					-- Separate clause
					part.joiner = " "
				else
					-- Same sentence: use custom conjunction or default "or"
					part.joiner = " " .. container_conj .. " "
				end
			else
				part.joiner = ""
			end

			-- Determine intro formatting
			-- Capitalize if first at top level, OR if this container starts a new sentence
			if (is_first and is_toplevel) or part.new_sentence then
				part.intro_capitalized = true
				part.use_full_intro = true
			else
				part.intro_capitalized = false
				part.use_full_intro = false
			end
		end
	end

	-- Assemble text from analyzed tree
	local function assemble_text(tree)
		if not tree then return "" end

		local result = ""

		for i, part in ipairs(tree.container_parts) do
			if part.type == "etydate" then
				result = result .. part.etydate_text
				if part.punctuation and part.punctuation ~= "" then
					result = result .. part.punctuation
				end
				if part.etydate_refs and part.etydate_refs ~= "" then
					result = result .. require(references_module).format_references(part.etydate_refs)
				end
				if part.joiner and part.joiner ~= "" then
					result = result .. part.joiner
				end
			else
				-- Build intro
				local intro
				if part.use_full_intro then
					if part.is_uncertain then
						intro = "Possibly " .. part.phrase
					else
						intro = part.intro_text
					end
			else
				if part.is_uncertain then
					intro = "possibly " .. part.phrase
				else
					intro = part.phrase
				end
			end

			result = result .. intro

			-- Build terms
			if #part.term_parts > 0 then
				result = result .. " "

				for j, term_part in ipairs(part.term_parts) do
					-- Add "possibly" prefix for uncertain terms when there's mixed uncertainty
					if part.has_mixed_uncertainty and term_part.is_uncertain then
						result = result .. "possibly "
					end

					result = result .. term_part.text

					-- Add joiner between terms
					if term_part.joiner ~= "" then
						-- Check if joiner contains comma (punctuation)
						local comma_pos = term_part.joiner:find(",")
						if comma_pos then
							-- Add up to and including comma
							result = result .. term_part.joiner:sub(1, comma_pos)
							-- Add refs after comma
							if term_part.refs ~= "" then
								result = result .. term_part.refs
							end
							-- Add rest of joiner
							result = result .. term_part.joiner:sub(comma_pos + 1)
						else
							-- No comma, add refs before joiner
							if term_part.refs ~= "" then
								result = result .. term_part.refs
							end
							result = result .. term_part.joiner
						end
					end
				end

				-- For the last term, add punctuation then refs
				local last_term = part.term_parts[#part.term_parts]
				if last_term and last_term.joiner == "" then
					if part.punctuation ~= "" then
						-- If we have literal text, punctuation goes AFTER it
						if part.lit then
							-- Add refs first (attached to term)
							if last_term.refs ~= "" then
								result = result .. last_term.refs
							end
							-- Add keyword refs
							if part.keyword_refs and part.keyword_refs ~= "" then
								result = result .. part.keyword_refs
							end
							-- Add literal text
							result = result .. ", literally “" .. part.lit .. "”"
							-- Add punctuation
							result = result .. part.punctuation
						else
							-- Normal behavior: punctuation then refs
							result = result .. part.punctuation
							if last_term.refs ~= "" then
								result = result .. last_term.refs
							end
							-- Add keyword refs after term refs
							if part.keyword_refs and part.keyword_refs ~= "" then
								result = result .. part.keyword_refs
							end
						end
					else
						-- No punctuation
						if last_term.refs ~= "" then
							result = result .. last_term.refs
						end
						-- Add keyword refs
						if part.keyword_refs and part.keyword_refs ~= "" then
							result = result .. part.keyword_refs
						end
						-- Add literal text if present (even without punctuation)
						if part.lit then
							result = result .. ", literally “" .. part.lit .. "”"
						end
					end
				end
			else
				-- No terms, just add punctuation and keyword refs
				if part.punctuation ~= "" then
					result = result .. part.punctuation
				end
				-- Add keyword refs even when there are no terms
				if part.keyword_refs and part.keyword_refs ~= "" then
					result = result .. part.keyword_refs
				end
			end

			-- Add continuation
			if part.continuation then
				result = result .. " " .. assemble_text(part.continuation)
			end

			-- Add joiner to next container
			if part.joiner ~= "" then
				result = result .. part.joiner
			end
		end
	end

		return result
	end

	local text_tree = build_text_tree(data_tree, 1, true)
	if not text_tree then
		return ""
	end

	-- Add etydate container
	if data_tree.etydate and data_tree.etydate ~= "" then
		table.insert(text_tree.container_parts, {
			type = "etydate",
			etydate_text = data_tree.etydate,
			etydate_refs = data_tree.etydate_refs,
			term_parts = {},
			new_sentence = true,
		})
	end

	check_complexity(text_tree)
	analyze_punctuation(text_tree, true)
	return assemble_text(text_tree)
end

return export
"https://si.wiktionary.org/w/index.php?title=Module:etymon/text&oldid=226943" වෙතින් සම්ප්‍රවේශනය කෙරිණි