Module:string/validatePattern
පෙනුම
- මෙම module සතුව උපදෙස් උප පිටුවක් නොපවතියි. Please නිර්මාණය කරන්න.
- ප්රයෝජනවත් සබැඳි: root page • root page’s subpages • සබැඳි • transclusions • testcases • sandbox
local byte = string.byte
local error = error
local find = string.find
local match = string.match
local function parse_charset(pattern, pos)
local nxt, ch = byte(pattern, pos)
-- "^" indicates a negative charset, so the search begins from the next
-- character.
if nxt == 0x5E then -- ^
pos = pos + 1
nxt = byte(pattern, pos)
end
-- "]" is non-magic if it's the first character of a charset (including
-- after "^"), so ignore it.
if nxt == 0x5D then -- ]
pos = pos + 1
end
repeat
ch, pos = match(pattern, "([%%%]])()", pos)
-- Escaping "%".
if ch == "%" then
pos = pos + 1
-- End of charset.
elseif ch == "]" then
return pos
end
until not ch
-- End of string throws an error, as the charset is incomplete.
return false, "unclosed charset: must be closed with ']'"
end
local function validate_pattern(pattern, str_lib)
if pattern == "" then
return true
-- "\000" can be used in ustring patterns, or with string.find iff the
-- `plain` flag is set.
elseif str_lib == "string" and find(pattern, "\000", nil, true) then
return false, "string library pattern cannot contain the null character '\\000'"
elseif str_lib == "ustring" and #pattern > 10000 then
return false, "ustring library pattern cannot be longer than 10,000 bytes"
end
local pos, cap_open, cap_complete, ch = 1, 0, 0
repeat
ch, pos = match(pattern, "([%%()[])()", pos)
-- Escaping "%".
if ch == "%" then
local nxt = byte(pattern, pos)
-- Balanced string "%bxy".
if nxt == 0x62 then -- b
-- Must be followed by two characters, which are always treated
-- as literals (even "%").
if byte(pattern, pos + 2) == nil then
return false, "incomplete balanced string: '%b' must be followed by two characters"
end
pos = pos + 3
-- Frontier pattern %f[abc].
elseif nxt == 0x66 then -- f
if byte(pattern, pos + 1) ~= 0x5B then -- [
return false, "incomplete frontier pattern: '%f' must be followed by a charset"
end
-- Charset after "%f".
local result, err_msg = parse_charset(pattern, pos + 2)
if not result then
return false, err_msg
end
pos = result
-- Back-reference to a complete capture group (e.g. "(foo)%1"); not
-- possible to reference groups above "%9".
elseif nxt >= 0x31 and nxt <= 0x39 then -- 1-9
-- References to open, unstarted or undefined capture groups are
-- invalid (e.g. "%1(foo)").
local n = nxt - 0x30
if n > cap_complete then
return false, "invalid capture index '%" .. n .. "'"
end
pos = pos + 1
-- "%0" is a reference to the full match, which can never be valid
-- in patterns since it will always be incomplete; only valid in
-- replacement strings.
elseif nxt == 0x30 then -- 0
return false, "invalid capture index '%0'"
-- End of string throws an error, as the escape sequence is
-- incomplete.
elseif nxt == nil then
return false, "incomplete escape sequence: final '%' must be followed by a character"
else
pos = pos + 1
end
-- New capture group (abc).
elseif ch == "(" then
-- String library patterns cannot have more than 32 capture groups.
if str_lib == "string" and cap_open + cap_complete >= 32 then
return false, "string library pattern cannot contain more than 32 capture groups"
end
-- Increment the number of open groups.
cap_open = cap_open + 1
-- End of capture group.
elseif ch == ")" then
-- There must be at least one open group.
if cap_open < 1 then
return false, "cannot close a capture group with ')' when none are open"
end
-- Decrement the number of open groups, and increment the number of
-- complete groups.
cap_open, cap_complete = cap_open - 1, cap_complete + 1
-- Charset [abc].
elseif ch == "[" then
local result, err_msg = parse_charset(pattern, pos)
if not result then
return false, err_msg
end
pos = result
end
until not ch
-- End of string throws an error if any capture groups are open, as they are
-- incomplete.
if cap_open > 0 then
return false, "unclosed capture group: must be closed with ')'"
end
return true
end
return function(pattern, str_lib, safe)
local ok, err_msg = validate_pattern(pattern, str_lib or "string")
if ok then
return ok
elseif safe then
return ok, err_msg
end
error(err_msg)
end