smr/src/lua/parser_imageboard.lua

115 lines
3.4 KiB
Lua

local lpeg = require("lpeg")
lpeg.locale(lpeg)
local V,P,C,S,B,Cs = lpeg.V,lpeg.P,lpeg.C,lpeg.S,lpeg.B,lpeg.Cs
--Characters to escape in the body text
local escapes = {
["&"] = "&",
["<"] = "&lt;",
[">"] = "&gt;",
}
local esctbl = {}
for char,_ in pairs(escapes) do
table.insert(esctbl,char)
end
local escapematch = string.format("([%s])",table.concat(esctbl))
local function sanitize_item(capture)
return escapes[capture] or capture
end
local function sanitize(text)
local ret,_ = string.gsub(text,escapematch,sanitize_item)
return ret
end
--Grammar
local space = S" \t\r"^0
local special = P{
P"**" + P"''" + P"'''" +
P"__" + P"==" + P"~~" +
P"\n>" + P"\n<" + P"\n" +
P"[code]" + P"[spoiler]"
}
local word = Cs((1 - special)^1) * space / sanitize
--Generates a pattern that formats text inside matching 'seq' tags with format
--ex wrap("^^",[[<sup>%s</sup>]])
--will wrap text "5^^3^^" as "5<sup>3</sup>"
local function wrap(seq,format)
return P(seq) * Cs(((1 - P(seq)) * space)^1) * P(seq) * space / function(a)
return string.format(format,sanitize(a))
end
end
--Generates a pattern that formats text inside opening and closing "name" tags
--with a format, BB forum style
local function tag(name,format)
local start_tag = P(string.format("[%s]",name))
local end_tag = P(string.format("[/%s]",name))
return start_tag * Cs(((1 - end_tag) * space)^1) * end_tag * space / function(a)
return string.format(format,sanitize(a))
end
end
local grammar = P{
"chunk";
--regular
spoiler = wrap("**",[[<span class="spoiler">%s</span>]]),
spoiler2 = tag("spoiler",[[<span class="spoiler2">%s</span>]]),
italic = wrap("''",[[<i>%s</i>]]),
bold = wrap("'''",[[<b>%s</b>]]),
underline = wrap("__",[[<u>%s</u>]]),
heading = wrap("==",[[<h2>%s</h2>]]),
strike = wrap("~~",[[<s>%s</s>]]),
code = tag("code",[[<pre><code>%s</code></pre>]]),
greentext = P">" * (B"\n>" + B">") * Cs((V"marked" + word)^0) / function(a)
return string.format([[<span class="greentext">&gt;%s</span>]],a)
end,
pinktext = P"<" * (B"\n<" + B"<") * Cs((V"marked" + word)^0) / function(a)
return string.format([[<span class="pinktext">&lt;%s</span>]],a)
end,
marked = V"spoiler" + V"bold" + V"italic" + V"underline" + V"heading" + V"strike" + V"spoiler2" + V"code",
plainline = (V"marked" + word)^0,
line = Cs(V"greentext" + V"pinktext" + V"plainline" + P"") * P"\n" / function(a)
return string.format("<p>%s",a)
end,
ending = C(P(1)^0) / sanitize,
chunk = V"line"^0 * V"plainline" * V"ending"
}
--[=[
local text = [[
<pinktext on the first line
this is **a big** test with ''italics''!
we need to > sanitize < things that could be tags
like really <b> badly </b>
words can include any'single item without=penalty
Can you use '''one tag ==within== another tag'''?
let's see if [spoiler]spoiler tags work[/spoiler]
things might even __go over
multiple lines__ blah
Let's test out those [code]
code tag,s and see how well
they work
here's ome
preformated <with injection>
text
[/code]
>Or have blank lines
one important thing is that greentext > should not start in the middle of a line
>this next line is a green text, what if I include **markup** inside it?
<and after '''it is''' a pinktext
>because of some of these restrictions **bold text
cannot go over multiple lines** in a green text
>greentext on the last line
<pinktext on the last line
]]
]=]
return function(text)
return table.concat({grammar:match(text .. "\n")}," ")
end
--for k,v in pairs({grammar:match(text)}) do
-- print(k,":",v)
--end