Jump to content

Module:string/pattern simplifier

Wiktionary වෙතින්


local concat = table.concat
local find = string.find
local gsub = string.gsub
local match = string.match
local sub = string.sub

local memo = {}

local function memoize(input, result)
	memo[input] = result
	return result
end

return function(input)
	local memoized = memo[input]
	if memoized then
		return memoized
	end
	local pattern, pos, captures, start, n, output = input, 1, 0, 1, 0
	while true do
		local char, nxt_pos
		pos, char, nxt_pos = match(pattern, "()([%%(.%[[\194-\244][\128-\191]*)()", pos)
		if not char then
			break
		end
		local nxt = sub(pattern, nxt_pos, nxt_pos)
		if char == "%" then
			if nxt == "b" then
				if match(sub(pattern, pos + 2, pos + 3), "[\194-\244]") then
					return memoize(input, false)
				end
				pos = pos + 4
			elseif find("acdlpsuwxACDLPSUWXZ", nxt, 1, true) then
				return memoize(input, false)
			else
				pos = pos + 2
			end
		elseif char == "(" then
			if nxt == ")" or captures == 32 then
				return memoize(input, false)
			end
			captures = captures + 1
			pos = pos + 1
		elseif char == "." then
			if nxt == "?" then
				output = output or {}
				n = n + 1
				output[n] = sub(pattern, start, pos - 1) .. "[%z\1-\127\194-\244]?[\128-\191]*"
				pos = pos + 2
				start = pos
			elseif nxt == "*" or nxt == "+" or nxt == "-" then
				pos = pos + 2
			else
				output = output or {}
				n = n + 1
				output[n] = sub(pattern, start, pos - 1) .. "[%z\1-\127\194-\244][\128-\191]*"
				pos = pos + 1
				start = pos
			end
		elseif char == "[" then
			if nxt == "^" then
				return memoize(input, false)
			end
			pos = pos + 1
			local trail
			while true do
				pos, char, nxt_pos = match(pattern, "()([%%%]\194-\244][\128-\191]*)()", pos)
				if not char then
					return memoize(input, false)
				elseif char == "%" then
					if find("acdlpsuwxACDLPSUWXZ", sub(pattern, nxt_pos, nxt_pos), 1, true) then
						return memoize(input, false)
					end
					pos = pos + 2
				elseif char == "]" then
					if trail then
						local nxt = sub(pattern, nxt_pos, nxt_pos)
						if nxt == "?" then
							pos = pos + 1
						elseif nxt == "*" or nxt == "+" or nxt == "-" then
							return memoize(input, false)
						end
						output = output or {}
						n = n + 1
						output[n] = sub(pattern, start, pos) .. trail .. "?"
						pos = pos + 1
						start = pos
					else
						pos = pos + 1
					end
					break
				elseif #char == 2 then
					local char_trail = sub(char, 2, 2)
					if not trail then
						trail = char_trail
					elseif trail ~= char_trail then
						return memoize(input, false)
					end
					output = output or {}
					n = n + 1
					output[n] = sub(pattern, start, pos)
					pos = pos + 2
					start = pos
				else
					return memoize(input, false)
				end
			end
		elseif nxt == "?" then
			output = output or {}
			n = n + 1
			output[n] = sub(pattern, start, pos - 1) .. gsub(char, ".", "%0?")
			pos = nxt_pos + 1
			start = pos
		elseif nxt == "*" or nxt == "+" or nxt == "-" then
			if #char ~= 2 then
				return memoize(input, false)
			end
			output = output or {}
			n = n + 1
			output[n] = sub(pattern, start, pos - 1) .. "[" .. char .. "]"
			pos = nxt_pos
			start = pos
		else
			pos = nxt_pos
		end
	end
	if start == 1 then
		return memoize(input, pattern)
	end
	n = n + 1
	output[n] = sub(pattern, start)
	return memoize(input, concat(output))
end
"https://si.wiktionary.org/w/index.php?title=Module:string/pattern_simplifier&oldid=164317" වෙතින් සම්ප්‍රවේශනය කෙරිණි