Toggle menu
Toggle personal menu
Not logged in
Your IP address will be publicly visible if you make any edits.
Revision as of 22:44, 31 December 2023 by genshin-impact>Mikevoir
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
local p = {}
local CHAR_EQUAL = string.byte("=")
local CHAR_OPEN_BRACE = string.byte("{")
local CHAR_PIPE = string.byte("|")
local CHAR_CLOSE_BRACE = string.byte("}")
local CHAR_OPEN_BRACKET = string.byte("[")
local CHAR_CLOSE_BRACKET = string.byte("]")
local CHAR_GREATER_THAN = string.byte(">")
local CHAR_LOWER_THAN = string.byte("<")
local CHAR_SLASH = string.byte("/")
local FormatParser = {} -- methods
local FormatParserMetatable = {
	__index = FormatParser,
}

function p.getTemplateArgs(page, options)
	options = options or {}
	local rawContent = ''
	if options.custom then
		rawContent = page
	else
		local title = mw.title.makeTitle(options.namespace or '', page)
		if title == nil then
			return {}
		end
		rawContent = title:getContent()
		if rawContent == nil or rawContent == '' then
			return {}
		else
			rawContent = (rawContent:gsub('<!%-%-.-%-%->', '')) .. '<wbr>'
		end
	end
	
	-- remove strip markers (e.g., <ref></ref> )
	if options.unstrip then
		rawContent = mw.text.unstrip(rawContent)
	end
	
	local templates = p._parseFormat(rawContent, options.only)
	
	-- return only specific template to simplify usage
	if options.only then
		templates = templates[options.only] or {}
	end
	
	-- return raw page content too for extra parsing
	if options.pageContent then
		templates.PAGECONTENT = rawContent
	end
	
	-- return parsed content
	return templates
end

function p._parseFormat(str, only)
	if not str then return nil end

	 -- TODO consider supporting DPL-style escape characters

	local parser = {
		str = str,
		strLen = #str,
		i = 1,
		tokenStart = 1,
		template = nil
	}
	setmetatable(parser, FormatParserMetatable)
	return parser:parse(only)
end
function FormatParser:peekByte(offset)
	return self.str:byte(self.i + (offset or 0))
end
function FormatParser:parse(only)
	local output = {}
	local currPage
	while self.i <= self.strLen do
		if self:peekByte() == CHAR_OPEN_BRACE and self:peekByte(1) == CHAR_OPEN_BRACE then
			currPage = self:parseTransclude()
		elseif self.template ~= nil then
			if (only ~= nil and self.template == only) or only == nil then
				if output[self.template] == nil then
					output[self.template] = currPage
				elseif output[self.template][1] ~= nil and type(output[self.template][1]) == 'table' then
					table.insert(output[self.template], currPage)
				else
					local temp = output[self.template]
					output[self.template] = {}
					table.insert(output[self.template], temp)
					table.insert(output[self.template], currPage)
				end
			end
			currPage = nil
			self.template = nil
		end
		self.i = self.i + 1
	end
	return output
end
-- note: assumes all parser functions accept named args in all positions
-- (but in the actual wikitext parser, some (e.g., "#if") don't accept any,
-- and some don't accept them in certain positions (e.g., first arg of "#invoke"))
function FormatParser:parseTransclude()
	local start = self.i
	self.i = self.i + 2 -- advance past {{
	self.tokenStart = self.i
	local currPart = {value = {}}
	local output = {}
	-- note: always adds to currPart.value. when = is reached, moves currPart.value to currPart.name.
	while self.i <= self.strLen do
		if not self:tryParsingOpen(currPart.value) then
			local currChar = self:peekByte()
			if currChar == CHAR_PIPE then
				if self.template == nil then
					local name = self:parseString()
					if name ~= nil and name ~= '' then
						self.template = (mw.text.trim(name):gsub('_', ' '))
					end
				end
				self:parseStringInto(currPart.value)
				self.tokenStart = self.i+1

				--include param to data if valid
				if currPart.name and #currPart.name>0 and currPart.value and #currPart.value>0 then
					currPart.name = mw.text.trim(table.concat(currPart.name))
					currPart.value = mw.text.unstrip(mw.text.trim(table.concat(currPart.value)))
					output[currPart.name] = currPart.value
				elseif currPart.value and #currPart.value>0 then
					currPart.value = mw.text.unstrip(mw.text.trim(table.concat(currPart.value)))
					if currPart.value ~= self.template then
						table.insert(output, currPart.value)
					end
				end
				
				currPart = {value = {}}
			elseif currChar == CHAR_EQUAL then
				if not currPart.name then
					self:parseStringInto(currPart.value)
					self.tokenStart = self.i+1
					currPart.name = currPart.value
					currPart.value = {}
				end
			elseif currChar == CHAR_CLOSE_BRACE then
				if self:peekByte(1) == CHAR_CLOSE_BRACE then
					if self.template == nil then
						local name = self:parseString()
						if name ~= nil and name ~= '' then
							self.template = mw.text.trim(name)
						end
					end
					self:parseStringInto(currPart.value)
					self.i = self.i+1
					
					--include param to data if valid
					if currPart.name and #currPart.name>0 and currPart.value and #currPart.value>0 then
						currPart.name = mw.text.trim(table.concat(currPart.name))
						currPart.value = mw.text.unstrip(mw.text.trim(table.concat(currPart.value)))
						output[currPart.name] = currPart.value
					elseif currPart.value and #currPart.value>0 then
						currPart.value = mw.text.unstrip(mw.text.trim(table.concat(currPart.value)))
						if currPart.value ~= self.template then
							table.insert(output, currPart.value)
						end
					end
					
					
					return output
				end
			end
		end
		self.i = self.i + 1
	end
	local issue = self:parseString()
	mw.logObject(issue, 'issue in: ')
	mw.logObject(self.str, 'page: ')
	mw.logObject(output, 'up to now: ')
	error("Unmatched {{ at position " .. start)
end
--- Add a string that ends on the character before the current one.
function FormatParser:parseString(offset)
	if self.tokenStart ~= self.i then
		return self.str:sub(self.tokenStart, self.i + (offset or -1))
	end
end
function FormatParser:parseStringInto(node, offset)
	local v = self:parseString(offset)
	if v then
		table.insert(node, v)
	end
end
-- close tokens and | = are handled in transclude/replace modes
function FormatParser:tryParsingOpen(node)
	local parseMode
	local currChar = self:peekByte()
	local nextChar = self:peekByte(1)
	if currChar == CHAR_OPEN_BRACE then
		if nextChar == CHAR_PIPE then
			parseMode = self.parseTable
		elseif nextChar == CHAR_OPEN_BRACE and self.template ~= nil and self.template ~= '' then
			parseMode = self.parseTranscludeBasic
		end
	elseif currChar == CHAR_OPEN_BRACKET then
		if nextChar == CHAR_OPEN_BRACKET then
			parseMode = self.parseLink
		end
	elseif currChar == CHAR_LOWER_THAN then
		local tag = self:parseString(10) or ''
		if tag:find('^.?<?gallery') or tag:find('^.?<?nowiki') then
			parseMode = self.parseTag
		end
	end

	if parseMode then
		self:parseStringInto(node)
		parseMode(self, node)
		self.tokenStart = self.i + 1
	end
	return parseMode ~= nil
end
function FormatParser:parseTranscludeBasic(output)
	local start = self.i
	self.tokenStart = self.i

	self.i = self.i + 2

	while self.i <= self.strLen do
		if not self:tryParsingOpen(output) then
			if self:peekByte() == CHAR_CLOSE_BRACE and self:peekByte(1) == CHAR_CLOSE_BRACE then
				self.i = self.i + 1
				self:parseStringInto(output, 0)
				return output
			end
		end
		self.i = self.i + 1
	end
	local issue = self:parseString()
	mw.logObject(issue, 'issue in: ')
	mw.logObject(self.str, 'entire page: ')
	error("Unmatched {{ at position " .. start)
end

function FormatParser:parseTag(output)
	local start = self.i
	self.tokenStart = self.i
	local count = 0
	function findLowerThan()
		while self.i <= self.strLen do
			local currChar = self:peekByte()
			if currChar == CHAR_GREATER_THAN then return end
			self.i = self.i + 1
		end
	end
	
	while self.i <= self.strLen do
		local currChar = self:peekByte()
		local nextChar = self:peekByte(1)
		if currChar == CHAR_LOWER_THAN and nextChar ~= CHAR_SLASH then
			count = count + 1
			findLowerThan()
		elseif currChar == CHAR_LOWER_THAN and nextChar == CHAR_SLASH then
			count = count - 1
			findLowerThan()
		end
		if count == 0 then
			self.i = self.i + 1
			self:parseStringInto(output, 0)
			return output
		end
		self.i = self.i + 1
	end
	local issue = self:parseString()
	mw.logObject(issue, 'issue in: ')
	mw.logObject(self.str, 'entire page: ')
	error("Unmatched tag at position " .. start)
end

function FormatParser:parseLink(output)
	local start = self.i
	self.tokenStart = self.i

	self.i = self.i + 2

	while self.i <= self.strLen do
		if not self:tryParsingOpen(output) then
			if self:peekByte() == CHAR_CLOSE_BRACKET and self:peekByte(1) == CHAR_CLOSE_BRACKET then
				self.i = self.i + 1
				self:parseStringInto(output, 0)
				return output
			end
		end
		self.i = self.i + 1
	end
	local issue = self:parseString()
	mw.logObject(issue, 'issue in: ')
	mw.logObject(self.str, 'entire page: ')
	error("Unmatched [[ at position " .. start)
end

function FormatParser:parseTable(output)
	local start = self.i
	self.tokenStart = self.i

	self.i = self.i + 2

	while self.i <= self.strLen do
		if not self:tryParsingOpen(output) then
			if self:peekByte() == CHAR_PIPE and self:peekByte(1) == CHAR_CLOSE_BRACE then
				self.i = self.i + 1
				self:parseStringInto(output, 0)
				
				return output
			end
		end
		self.i = self.i + 1
	end
	local issue = self:parseString()
	mw.logObject(issue, 'issue in: ')
	mw.logObject(self.str, 'entire page: ')
	error("Unmatched {| at position " .. start)
end

function p.removeBlank(tab)
	if type(tab) ~= 'table' then return false end
	for _ in pairs(tab) do
		return true
	end
	return false
end

return p