Module:string/pattern simplifier
Appearance
- මෙම module සතුව උපදෙස් උප පිටුවක් නොපවතියි. Please නිර්මාණය කරන්න.
- ප්රයෝජනවත් සබැඳි: root page • root page’s subpages • සබැඳි • transclusions • testcases • sandbox
local concat = table.concat
local find = string.find
local gsub = string.gsub
local match = string.match
local sub = string.sub
local memo = {}
local function memoize(input, result)
memo[input] = result
return result
end
return function(input)
local memoized = memo[input]
if memoized then
return memoized
end
local pattern, pos, captures, start, n, output = input, 1, 0, 1, 0
while true do
local char, nxt_pos
pos, char, nxt_pos = match(pattern, "()([%%(.%[[\194-\244][\128-\191]*)()", pos)
if not char then
break
end
local nxt = sub(pattern, nxt_pos, nxt_pos)
if char == "%" then
if nxt == "b" then
if match(sub(pattern, pos + 2, pos + 3), "[\194-\244]") then
return memoize(input, false)
end
pos = pos + 4
elseif find("acdlpsuwxACDLPSUWXZ", nxt, 1, true) then
return memoize(input, false)
else
pos = pos + 2
end
elseif char == "(" then
if nxt == ")" or captures == 32 then
return memoize(input, false)
end
captures = captures + 1
pos = pos + 1
elseif char == "." then
if nxt == "?" then
output = output or {}
n = n + 1
output[n] = sub(pattern, start, pos - 1) .. "[%z\1-\127\194-\244]?[\128-\191]*"
pos = pos + 2
start = pos
elseif nxt == "*" or nxt == "+" or nxt == "-" then
pos = pos + 2
else
output = output or {}
n = n + 1
output[n] = sub(pattern, start, pos - 1) .. "[%z\1-\127\194-\244][\128-\191]*"
pos = pos + 1
start = pos
end
elseif char == "[" then
if nxt == "^" then
return memoize(input, false)
end
pos = pos + 1
local trail
while true do
pos, char, nxt_pos = match(pattern, "()([%%%]\194-\244][\128-\191]*)()", pos)
if not char then
return memoize(input, false)
elseif char == "%" then
if find("acdlpsuwxACDLPSUWXZ", sub(pattern, nxt_pos, nxt_pos), 1, true) then
return memoize(input, false)
end
pos = pos + 2
elseif char == "]" then
if trail then
local nxt = sub(pattern, nxt_pos, nxt_pos)
if nxt == "?" then
pos = pos + 1
elseif nxt == "*" or nxt == "+" or nxt == "-" then
return memoize(input, false)
end
output = output or {}
n = n + 1
output[n] = sub(pattern, start, pos) .. trail .. "?"
pos = pos + 1
start = pos
else
pos = pos + 1
end
break
elseif #char == 2 then
local char_trail = sub(char, 2, 2)
if not trail then
trail = char_trail
elseif trail ~= char_trail then
return memoize(input, false)
end
output = output or {}
n = n + 1
output[n] = sub(pattern, start, pos)
pos = pos + 2
start = pos
else
return memoize(input, false)
end
end
elseif nxt == "?" then
output = output or {}
n = n + 1
output[n] = sub(pattern, start, pos - 1) .. gsub(char, ".", "%0?")
pos = nxt_pos + 1
start = pos
elseif nxt == "*" or nxt == "+" or nxt == "-" then
if #char ~= 2 then
return memoize(input, false)
end
output = output or {}
n = n + 1
output[n] = sub(pattern, start, pos - 1) .. "[" .. char .. "]"
pos = nxt_pos
start = pos
else
pos = nxt_pos
end
end
if start == 1 then
return memoize(input, pattern)
end
n = n + 1
output[n] = sub(pattern, start)
return memoize(input, concat(output))
end