Updates to imageboard parser

This commit is contained in:
Robin Malley 2021-04-04 05:13:09 +00:00
parent 53b1a19c05
commit 73df8d400e
2 changed files with 164 additions and 10 deletions

View File

@ -1,5 +1,9 @@
describe("smr imageboard parser",function()
function assertf(stmt, fmt, ...)
if not stmt then
error(string.format(fmt,...))
end
end
describe("smr imageboard parser #parsers",function()
it("should load without error",function()
local parser = require("parser_imageboard")
end)
@ -9,4 +13,153 @@ describe("smr imageboard parser",function()
local output = parser(input)
assert(type(output) == "string","Expected string, got: %s",type(output))
end)
it("should spoiler text in asterisks ",function()
local parser = require("parser_imageboard")
local input = "Hello, **world**!"
local output = parser(input)
local expected = [[<p>Hello, <span class="spoiler">world</span>!</p> ]]
assertf(output == expected, "Expected\n%s\ngot\n%s\n", expected, output)
end)
it("should spoiler text in [spoiler] tags",function()
local parser = require("parser_imageboard")
local input = "Hello, [spoiler]world[/spoiler]!"
local output = parser(input)
local expected = [[<p>Hello, <span class="spoiler2">world</span>!</p> ]]
assertf(output == expected, "Expected\n%s\ngot\n%s\n", expected, output)
end)
it("should italicize words in double single quotes ('')",function()
local parser = require("parser_imageboard")
local input = "Hello, ''world''!"
local output = parser(input)
local expected = [[<p>Hello, <i>world</i>!</p> ]]
assertf(output == expected, "Expected\n%s\ngot\n%s\n", expected, output)
end)
it("should bold words in tripple single quotes (''')",function()
local parser = require("parser_imageboard")
local input = "Hello, '''world'''!"
local output = parser(input)
local expected = [[<p>Hello, <b>world</b>!</p> ]]
assertf(output == expected, "Expected\n%s\ngot\n%s\n", expected, output)
end)
it("should underline words in double underscores (__)",function()
local parser = require("parser_imageboard")
local input = "Hello, __world__!"
local output = parser(input)
local expected = [[<p>Hello, <u>world</u>!</p> ]]
assertf(output == expected, "Expected\n%s\ngot\n%s\n", expected, output)
end)
it("should make a heading out of things in double equals(==)",function()
local parser = require("parser_imageboard")
local input = "Hello, ==world==!"
local output = parser(input)
local expected = [[<p>Hello, <h2>world</h2>!</p> ]]
assertf(output == expected, "Expected\n%s\ngot\n%s\n", expected, output)
end)
it("should strikethrough words in double tildes (~~)",function()
local parser = require("parser_imageboard")
local input = "Hello, ~~world~~!"
local output = parser(input)
local expected = [[<p>Hello, <s>world</s>!</p> ]]
assertf(output == expected, "Expected\n%s\ngot\n%s\n", expected, output)
end)
it("should codify words in [code] tags",function()
local parser = require("parser_imageboard")
local input = "Hello, [code]world[/code]!"
local output = parser(input)
local expected = [[<p>Hello, <pre><code>world</code></pre>!</p> ]]
assertf(output == expected, "Expected\n%s\ngot\n%s\n", expected, output)
end)
it("should greentext lines that start with >",function()
local parser = require("parser_imageboard")
local input = "Hello,\n> world!"
local output = parser(input)
local expected = [[<p>Hello,</p> <p><span class="greentext">&gt; world!</span></p> ]]
assertf(output == expected, "Expected\n%s\ngot\n%s\n", expected, output)
end)
it("should pinktext lines that start with <",function()
local parser = require("parser_imageboard")
local input = "Hello,\n< world!"
local output = parser(input)
local expected = [[<p>Hello,</p> <p><span class="pinktext">&lt; world!</span></p> ]]
assertf(output == expected, "Expected\n%s\ngot\n%s\n", expected, output)
end)
it("should allow for bold+italic text",function()
local parser = require("parser_imageboard")
local input = "Hello,'''''world!'''''"
local output = parser(input)
local expected = [[<p>Hello,<i><b>world!</b></i></p> ]]
end)
local formatting = {
{"**","**"},
{"[spoiler]","[/spoiler]"},
{"''","''"},
{"'''","'''"},
{"__","__"},
{"==","=="},
{"~~","~~"},
{"[code]","[/code]"}
}
local formatting_line = {"> ", "< "}
for k,v in pairs(formatting) do
for i = 1, 50 do
it("should not break with " .. i .. " " .. v[1] .. " indicators in a row ",function()
local parser = require("parser_imageboard")
local input = "Hello, " .. string.rep(v[1],i) .. " world!"
local start_time = os.clock()
local output = parser(input)
local end_time = os.clock()
print(end_time - start_time)
assert(end_time - start_time < 1, "Took too long")
end)
end
end
for i = 1, 80 do
it("Should withstand a random string of " .. i .. " formatters and words. ",function()
local parser = require("parser_imageboard")
local input = {}
local function random_text()
if math.random() > 0.5 then
return "Hello"
else
return "world"
end
end
local function random_wrap(text)
local rngwrap = formatting[math.random(#formatting)]
return rngwrap[1] .. text .. rngwrap[2]
end
local function random_text_recursive(i)
if i == 0 then
return ""
end
local j = math.random()
if j < 0.33 then
return random_text_recursive(i-1) .. random_wrap(random_text())
elseif j < 0.66 then
return random_wrap(random_text() .. random_text_recursive(i-1)) .. random_wrap(random_text())
else
return random_wrap(random_text() .. random_text_recursive(i - 1))
end
end
input = random_text_recursive(i)
print("input is:",input)
local start_time = os.clock()
local output = parser(input)
print("output is:",output)
local end_time = os.clock()
print(end_time - start_time)
assert(end_time - start_time < 1, "Took too long")
end)
end
for _,file_name in ipairs{
"Beauty_and_the_Banchou_1"
} do
it("should parser " .. file_name,function()
local parser = require("parser_imageboard")
local input = require("spec.parser_tests." .. file_name)
local output = parser(input)
--print("output:",output)
end)
end
end)

View File

@ -62,7 +62,7 @@ end
]]
local function wrap(seq,format,s)
return P(seq) * Cs((((V"marked" - s) + word + P"\n"))^0) * P(seq) / function(a)
return P(seq) * Cs(((s + word + P"\n"))^0) * P(seq) / function(a)
return string.format(format,a)
end
end
@ -77,16 +77,17 @@ local function tag(name,format)
end
end
--local grammar = P(require('pegdebug').trace({
local grammar = P{
"chunk";
--regular
spoiler = wrap("**",[[<span class="spoiler">%s</span>]],V"spoiler"),
spoiler2 = tag("spoiler",[[<span class="spoiler2">%s</span>]]),
italic = wrap("''",[[<i>%s</i>]], V"italic"),
bold = wrap("'''",[[<b>%s</b>]], V"bold"),
underline = wrap("__",[[<u>%s</u>]], V"underline"),
heading = wrap("==",[[<h2>%s</h2>]], V"heading"),
strike = wrap("~~",[[<s>%s</s>]], V"strike"),
heading = wrap("==",[[<h2>%s</h2>]], V"underline" + V"strike" + V"italic"),
bold = wrap("'''",[[<b>%s</b>]], V"italic" + V"underline" + V"strike"),
italic = wrap("''",[[<i>%s</i>]], V"underline" + V"strike"),
underline = wrap("__",[[<u>%s</u>]], V"strike"),
strike = wrap("~~",[[<s>%s</s>]], P("blah")),
spoiler = wrap("**",[[<span class="spoiler">%s</span>]],V"spoiler2" + V"bold" + V"italic" + V"underline" + V"strike"),
spoiler2 = tag("spoiler",[[<span class="spoiler2">%s</span>]],V"spoiler" + V"bold" + V"italic" + V"underline" + V"strike"),
code = tag("code",[[<pre><code>%s</code></pre>]]),
greentext = P">" * (B"\n>" + B">") * Cs((V"marked" + word)^0) / function(a)
return string.format([[<span class="greentext">&gt;%s</span>]],a)