smr/src/lua/parser_imageboard.lua

140 lines
5.5 KiB
Lua

--[[
A parser that approximates 8chan's markup:
Surround text with double single-quotes(') to make text italic
Surround text with triple single-quotes to make text bold
Surround text with underscores(_) to make it underlined
Surround text with double asterisks(*) to make it spoilered
Surround text with tildes(~) to make it strike through
Begin a line with a greater-than followed by a a space to make it
>greentext
Begin a line with a less-than followed by a space to make it
<pinktext
Surround text with forum-style [spoiler] and [/spoiler] tags as a second way to spoiler
Surround text with forum-style [code] and [/code] tags to make it preformatted and monospace
]]
local lpeg = require("lpeg")
lpeg.locale(lpeg)
local V,P,C,S,B,Cs = lpeg.V,lpeg.P,lpeg.C,lpeg.S,lpeg.B,lpeg.Cs
--Characters to escape in the body text
local escapes = {
["&"] = "&amp;",
["<"] = "&lt;",
[">"] = "&gt;",
}
local esctbl = {}
for char,_ in pairs(escapes) do
table.insert(esctbl,char)
end
local escapematch = string.format("([%s])",table.concat(esctbl))
local function sanitize_item(capture)
return escapes[capture] or capture
end
local function sanitize(text)
local ret,_ = string.gsub(text,escapematch,sanitize_item)
return ret
end
--Grammar
local space = S" \t\r"^0
local special = P{
P"**" + P"''" + P"'''" +
P"__" + P"==" + P"~~" +
P"\n>" + P"\n<" + P"\n" +
P"[code]" + P"[spoiler]"
}
local word = Cs((1 - special)^1) * space / sanitize
--Generates a pattern that formats text inside matching 'seq' tags with format
--ex wrap("^^",[[<sup>%s</sup>]])
--will wrap text "5^^3^^" as "5<sup>3</sup>"
--The third argument is nessessary to stop exponential backtracking. This removes
--a DOS vulnerability: If tags are nested really deep, the parser can lock up,
--potentially locking up all processes.
--[[
local function wrap(seq,format,V"sup")
return P(seq) * Cs(((V"marked" + word + P"\n"))^1) * P(seq) / function(a)
return string.format(format,a)
end
end
]]
local function wrap(seq,format,s)
return P(seq) * Cs((((V"marked" - s) + word + P"\n"))^1) * P(seq) / function(a)
return string.format(format,a)
end
end
--Generates a pattern that formats text inside opening and closing "name" tags
--with a format, BB forum style
local function tag(name,format)
local start_tag = P(string.format("[%s]",name))
local end_tag = P(string.format("[/%s]",name))
return start_tag * Cs(((1 - end_tag))^1) * end_tag / function(a)
return string.format(format,sanitize(a))
end
end
local grammar = P{
"chunk";
--regular
spoiler = wrap("**",[[<span class="spoiler">%s</span>]],V"spoiler"),
spoiler2 = tag("spoiler",[[<span class="spoiler2">%s</span>]]),
italic = wrap("''",[[<i>%s</i>]], V"italic"),
bold = wrap("'''",[[<b>%s</b>]], V"bold"),
underline = wrap("__",[[<u>%s</u>]], V"underline"),
heading = wrap("==",[[<h2>%s</h2>]], V"heading"),
strike = wrap("~~",[[<s>%s</s>]], V"strike"),
code = tag("code",[[<pre><code>%s</code></pre>]]),
greentext = P">" * (B"\n>" + B">") * Cs((V"marked" + word)^0) / function(a)
return string.format([[<span class="greentext">&gt;%s</span>]],a)
end,
pinktext = P"<" * (B"\n<" + B"<") * Cs((V"marked" + word)^0) / function(a)
return string.format([[<span class="pinktext">&lt;%s</span>]],a)
end,
marked = V"spoiler" + V"bold" + V"italic" + V"underline" + V"heading" + V"strike" + V"spoiler2" + V"code",
plainline = (V"marked" + word)^0,
line = Cs(V"greentext" + V"pinktext" + V"plainline" + P"") * P"\n" / function(a)
print("Found line:",a)
if a == "\r" then
return "<br/>"
else
return string.format("<p>%s</p>",a)
end
end,
ending = C(P(1)^0) / function(a) print("failed with ending:", a) return sanitize(a) end,
chunk = V"line"^0 * V"plainline" * V"ending"
}
--A chunk of text that the parser chokes on:
local s = [=[
Minor update to the search function, also added a search bar to the front page.
Characters in '''bold''' are literal characters, things in ''<angle brackets and italics>'' are substitutions.
The search utility searches for stories on the site. At it's most simple, it searches stories based on tags, but it can also filter stories based on the fields: '''title''', '''author''', '''date''', and '''hits'''. In general, the syntax for search is {'''+-'''} ''<field>'' ''<operator>'' ''<value>''
The first '''+''' or '''-''' specifies weather to include or exclude results based on this search, the ''<field>'' specifies what field to search for (or search based on tag if this is missing), and ''<operator>'' specifies how to search.
For title and author, the only allowed operator is '''='''. This operator will search for ''<value>'' appearing anywhere in the field, case insensitive. For '''hits''' and '''time''', the allowed operators are '''>''','''<''','''>=''', '''<=''','''=''', which searches for greater than, less than, greater than or equal to, less than or equal to, and strictly equal to respectively. '''tag''' does not need a ''<field>'' or ''<operator>'', and only allows exact matches. As a quirk of this system, it is impossible to search for the tags "author", "title", "hits" or "date".
Examples:
[code]
+author=admin -meta
[/code]
Will return all stories by the users "admin" and "b'''admin'''ton_enthusiast" that do not include the "meta" tag.
[code]
+hits>20 -date>=1609459201
[/code]
Will return all stories with more than 20 hits that were posted before January 1, 2021 (unix timestamp 1609459201).
While the date field is a little hard to use for humans, it may be useful for robots.
]=]
--print(table.concat({grammar:match(s .. "\n")}," "))
return function(text)
return table.concat({grammar:match(text .. "\n")}," ")
end