More actions
No edit summary |
m 1 revision imported |
(No difference)
|
Latest revision as of 07:23, 7 June 2024
local p = {}
local CHAR_EQUAL = string.byte("=")
local CHAR_OPEN_BRACE = string.byte("{")
local CHAR_PIPE = string.byte("|")
local CHAR_CLOSE_BRACE = string.byte("}")
local CHAR_OPEN_BRACKET = string.byte("[")
local CHAR_CLOSE_BRACKET = string.byte("]")
local CHAR_GREATER_THAN = string.byte(">")
local CHAR_LOWER_THAN = string.byte("<")
local CHAR_SLASH = string.byte("/")
local FormatParser = {} -- methods
local FormatParserMetatable = {
__index = FormatParser,
}
function p.getTemplateArgs(page, options)
options = options or {}
local rawContent = ''
if options.custom then
rawContent = page
else
local title = mw.title.makeTitle(options.namespace or '', page)
if title == nil then
return {}
end
rawContent = title:getContent()
if rawContent == nil or rawContent == '' then
return {}
else
rawContent = (rawContent:gsub('<!%-%-.-%-%->', '')) .. '<wbr>'
end
end
-- remove strip markers (e.g., <ref></ref> )
if options.unstrip then
rawContent = mw.text.unstrip(rawContent)
end
local templates = p._parseFormat(rawContent, options.only)
-- return only specific template to simplify usage
if options.only then
templates = templates[options.only] or {}
end
-- return raw page content too for extra parsing
if options.pageContent then
templates.PAGECONTENT = rawContent
end
-- return parsed content
return templates
end
function p._parseFormat(str, only)
if not str then return nil end
-- TODO consider supporting DPL-style escape characters
local parser = {
str = str,
strLen = #str,
i = 1,
tokenStart = 1,
template = nil
}
setmetatable(parser, FormatParserMetatable)
return parser:parse(only)
end
function FormatParser:peekByte(offset)
return self.str:byte(self.i + (offset or 0))
end
function FormatParser:parse(only)
local output = {}
local currPage
while self.i <= self.strLen do
if self:peekByte() == CHAR_OPEN_BRACE and self:peekByte(1) == CHAR_OPEN_BRACE then
currPage = self:parseTransclude()
elseif self.template ~= nil then
if (only ~= nil and self.template == only) or only == nil then
if output[self.template] == nil then
output[self.template] = currPage
elseif output[self.template][1] ~= nil and type(output[self.template][1]) == 'table' then
table.insert(output[self.template], currPage)
else
local temp = output[self.template]
output[self.template] = {}
table.insert(output[self.template], temp)
table.insert(output[self.template], currPage)
end
end
currPage = nil
self.template = nil
end
self.i = self.i + 1
end
return output
end
-- note: assumes all parser functions accept named args in all positions
-- (but in the actual wikitext parser, some (e.g., "#if") don't accept any,
-- and some don't accept them in certain positions (e.g., first arg of "#invoke"))
function FormatParser:parseTransclude()
local start = self.i
self.i = self.i + 2 -- advance past {{
self.tokenStart = self.i
local currPart = {value = {}}
local output = {}
-- note: always adds to currPart.value. when = is reached, moves currPart.value to currPart.name.
while self.i <= self.strLen do
if not self:tryParsingOpen(currPart.value) then
local currChar = self:peekByte()
if currChar == CHAR_PIPE then
if self.template == nil then
local name = self:parseString()
if name ~= nil and name ~= '' then
self.template = (mw.text.trim(name):gsub('_', ' '))
end
end
self:parseStringInto(currPart.value)
self.tokenStart = self.i+1
--include param to data if valid
if currPart.name and #currPart.name>0 and currPart.value and #currPart.value>0 then
currPart.name = mw.text.trim(table.concat(currPart.name))
currPart.value = mw.text.unstrip(mw.text.trim(table.concat(currPart.value)))
output[currPart.name] = currPart.value
elseif currPart.value and #currPart.value>0 then
currPart.value = mw.text.unstrip(mw.text.trim(table.concat(currPart.value)))
if currPart.value ~= self.template then
table.insert(output, currPart.value)
end
end
currPart = {value = {}}
elseif currChar == CHAR_EQUAL then
if not currPart.name then
self:parseStringInto(currPart.value)
self.tokenStart = self.i+1
currPart.name = currPart.value
currPart.value = {}
end
elseif currChar == CHAR_CLOSE_BRACE then
if self:peekByte(1) == CHAR_CLOSE_BRACE then
if self.template == nil then
local name = self:parseString()
if name ~= nil and name ~= '' then
self.template = mw.text.trim(name)
end
end
self:parseStringInto(currPart.value)
self.i = self.i+1
--include param to data if valid
if currPart.name and #currPart.name>0 and currPart.value and #currPart.value>0 then
currPart.name = mw.text.trim(table.concat(currPart.name))
currPart.value = mw.text.unstrip(mw.text.trim(table.concat(currPart.value)))
output[currPart.name] = currPart.value
elseif currPart.value and #currPart.value>0 then
currPart.value = mw.text.unstrip(mw.text.trim(table.concat(currPart.value)))
if currPart.value ~= self.template then
table.insert(output, currPart.value)
end
end
return output
end
end
end
self.i = self.i + 1
end
local issue = self:parseString()
mw.logObject(issue, 'issue in: ')
mw.logObject(self.str, 'page: ')
mw.logObject(output, 'up to now: ')
error("Unmatched {{ at position " .. start)
end
--- Add a string that ends on the character before the current one.
function FormatParser:parseString(offset)
if self.tokenStart ~= self.i then
return self.str:sub(self.tokenStart, self.i + (offset or -1))
end
end
function FormatParser:parseStringInto(node, offset)
local v = self:parseString(offset)
if v then
table.insert(node, v)
end
end
-- close tokens and | = are handled in transclude/replace modes
function FormatParser:tryParsingOpen(node)
local parseMode
local currChar = self:peekByte()
local nextChar = self:peekByte(1)
if currChar == CHAR_OPEN_BRACE then
if nextChar == CHAR_PIPE then
parseMode = self.parseTable
elseif nextChar == CHAR_OPEN_BRACE and self.template ~= nil and self.template ~= '' then
parseMode = self.parseTranscludeBasic
end
elseif currChar == CHAR_OPEN_BRACKET then
if nextChar == CHAR_OPEN_BRACKET then
parseMode = self.parseLink
end
elseif currChar == CHAR_LOWER_THAN then
local tag = self:parseString(10) or ''
if tag:find('^.?<?gallery') or tag:find('^.?<?nowiki') then
parseMode = self.parseTag
end
end
if parseMode then
self:parseStringInto(node)
parseMode(self, node)
self.tokenStart = self.i + 1
end
return parseMode ~= nil
end
function FormatParser:parseTranscludeBasic(output)
local start = self.i
self.tokenStart = self.i
self.i = self.i + 2
while self.i <= self.strLen do
if not self:tryParsingOpen(output) then
if self:peekByte() == CHAR_CLOSE_BRACE and self:peekByte(1) == CHAR_CLOSE_BRACE then
self.i = self.i + 1
self:parseStringInto(output, 0)
return output
end
end
self.i = self.i + 1
end
local issue = self:parseString()
mw.logObject(issue, 'issue in: ')
mw.logObject(self.str, 'entire page: ')
error("Unmatched {{ at position " .. start)
end
function FormatParser:parseTag(output)
local start = self.i
self.tokenStart = self.i
local count = 0
function findLowerThan()
while self.i <= self.strLen do
local currChar = self:peekByte()
if currChar == CHAR_GREATER_THAN then return end
self.i = self.i + 1
end
end
while self.i <= self.strLen do
local currChar = self:peekByte()
local nextChar = self:peekByte(1)
if currChar == CHAR_LOWER_THAN and nextChar ~= CHAR_SLASH then
count = count + 1
findLowerThan()
elseif currChar == CHAR_LOWER_THAN and nextChar == CHAR_SLASH then
count = count - 1
findLowerThan()
end
if count == 0 then
self.i = self.i + 1
self:parseStringInto(output, 0)
return output
end
self.i = self.i + 1
end
local issue = self:parseString()
mw.logObject(issue, 'issue in: ')
mw.logObject(self.str, 'entire page: ')
error("Unmatched tag at position " .. start)
end
function FormatParser:parseLink(output)
local start = self.i
self.tokenStart = self.i
self.i = self.i + 2
while self.i <= self.strLen do
if not self:tryParsingOpen(output) then
if self:peekByte() == CHAR_CLOSE_BRACKET and self:peekByte(1) == CHAR_CLOSE_BRACKET then
self.i = self.i + 1
self:parseStringInto(output, 0)
return output
end
end
self.i = self.i + 1
end
local issue = self:parseString()
mw.logObject(issue, 'issue in: ')
mw.logObject(self.str, 'entire page: ')
error("Unmatched [[ at position " .. start)
end
function FormatParser:parseTable(output)
local start = self.i
self.tokenStart = self.i
self.i = self.i + 2
while self.i <= self.strLen do
if not self:tryParsingOpen(output) then
if self:peekByte() == CHAR_PIPE and self:peekByte(1) == CHAR_CLOSE_BRACE then
self.i = self.i + 1
self:parseStringInto(output, 0)
return output
end
end
self.i = self.i + 1
end
local issue = self:parseString()
mw.logObject(issue, 'issue in: ')
mw.logObject(self.str, 'entire page: ')
error("Unmatched {| at position " .. start)
end
function p.removeBlank(tab)
if type(tab) ~= 'table' then return false end
for _ in pairs(tab) do
return true
end
return false
end
return p