Fix the problem that caused the last crash.

In certain cases, the imageboard parser will have exponential time
complexity. While this patch fixes that particular problem, the
correct solution is to implement a timeout for particularly gnarly
parts of the code.
This commit is contained in:
Robin Malley 2020-12-29 20:19:05 +00:00
parent 4bcfcc20a0
commit 663da16a75
2 changed files with 47 additions and 8 deletions

View File

@ -32,8 +32,9 @@ p,.tag-list{margin-bottom:0px}
.column-0{margin-right:5px;}
@media (prefers-color-scheme: dark){
body, input, select, textarea{
body, input, select, textarea, pre, code{
background: #1c1428;
color: #d0d4d8 !important;
}
.spoiler, .spoiler2{color:#444;}
}

View File

@ -50,11 +50,21 @@ local word = Cs((1 - special)^1) * space / sanitize
--Generates a pattern that formats text inside matching 'seq' tags with format
--ex wrap("^^",[[<sup>%s</sup>]])
--will wrap text "5^^3^^" as "5<sup>3</sup>"
local function wrap(seq,format)
--The third argument is nessessary to stop exponential backtracking. This removes
--a DOS vulnerability: If tags are nested really deep, the parser can lock up,
--potentially locking up all processes.
--[[
local function wrap(seq,format,V"sup")
return P(seq) * Cs(((V"marked" + word + P"\n"))^1) * P(seq) / function(a)
return string.format(format,a)
end
end
]]
local function wrap(seq,format,s)
return P(seq) * Cs((((V"marked" - s) + word + P"\n"))^1) * P(seq) / function(a)
return string.format(format,a)
end
end
--Generates a pattern that formats text inside opening and closing "name" tags
--with a format, BB forum style
@ -69,13 +79,13 @@ end
local grammar = P{
"chunk";
--regular
spoiler = wrap("**",[[<span class="spoiler">%s</span>]]),
spoiler = wrap("**",[[<span class="spoiler">%s</span>]],V"spoiler"),
spoiler2 = tag("spoiler",[[<span class="spoiler2">%s</span>]]),
italic = wrap("''",[[<i>%s</i>]]),
bold = wrap("'''",[[<b>%s</b>]]),
underline = wrap("__",[[<u>%s</u>]]),
heading = wrap("==",[[<h2>%s</h2>]]),
strike = wrap("~~",[[<s>%s</s>]]),
italic = wrap("''",[[<i>%s</i>]], V"italic"),
bold = wrap("'''",[[<b>%s</b>]], V"bold"),
underline = wrap("__",[[<u>%s</u>]], V"underline"),
heading = wrap("==",[[<h2>%s</h2>]], V"heading"),
strike = wrap("~~",[[<s>%s</s>]], V"strike"),
code = tag("code",[[<pre><code>%s</code></pre>]]),
greentext = P">" * (B"\n>" + B">") * Cs((V"marked" + word)^0) / function(a)
return string.format([[<span class="greentext">&gt;%s</span>]],a)
@ -86,6 +96,7 @@ local grammar = P{
marked = V"spoiler" + V"bold" + V"italic" + V"underline" + V"heading" + V"strike" + V"spoiler2" + V"code",
plainline = (V"marked" + word)^0,
line = Cs(V"greentext" + V"pinktext" + V"plainline" + P"") * P"\n" / function(a)
print("Found line:",a)
if a == "\r" then
return "<br/>"
else
@ -96,6 +107,33 @@ local grammar = P{
chunk = V"line"^0 * V"plainline" * V"ending"
}
--A chunk of text that the parser chokes on:
local s = [=[
Minor update to the search function, also added a search bar to the front page.
Characters in '''bold''' are literal characters, things in ''<angle brackets and italics>'' are substitutions.
The search utility searches for stories on the site. At it's most simple, it searches stories based on tags, but it can also filter stories based on the fields: '''title''', '''author''', '''date''', and '''hits'''. In general, the syntax for search is {'''+-'''} ''<field>'' ''<operator>'' ''<value>''
The first '''+''' or '''-''' specifies weather to include or exclude results based on this search, the ''<field>'' specifies what field to search for (or search based on tag if this is missing), and ''<operator>'' specifies how to search.
For title and author, the only allowed operator is '''='''. This operator will search for ''<value>'' appearing anywhere in the field, case insensitive. For '''hits''' and '''time''', the allowed operators are '''>''','''<''','''>=''', '''<=''','''=''', which searches for greater than, less than, greater than or equal to, less than or equal to, and strictly equal to respectively. '''tag''' does not need a ''<field>'' or ''<operator>'', and only allows exact matches. As a quirk of this system, it is impossible to search for the tags "author", "title", "hits" or "date".
Examples:
[code]
+author=admin -meta
[/code]
Will return all stories by the users "admin" and "b'''admin'''ton_enthusiast" that do not include the "meta" tag.
[code]
+hits>20 -date>=1609459201
[/code]
Will return all stories with more than 20 hits that were posted before January 1, 2021 (unix timestamp 1609459201).
While the date field is a little hard to use for humans, it may be useful for robots.
]=]
--print(table.concat({grammar:match(s .. "\n")}," "))
return function(text)
return table.concat({grammar:match(text .. "\n")}," ")
end