Initial commit
This commit is contained in:
commit
b86682931f
|
@ -0,0 +1,35 @@
|
|||
<%!
|
||||
import BTEdb, time
|
||||
%>
|
||||
<%
|
||||
db = BTEdb.Database("/dev/shm/lainchan-scraper.json")
|
||||
dumped = db.Dump("table")
|
||||
dumped.sort(key = lambda x: x["time"])
|
||||
%>
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Lainchan scraper for IPFS IDs</title>
|
||||
<link rel="stylesheet" href="/style.css" />
|
||||
<link rel="stylesheet" href="https://lainchan.org/stylesheets/code/dark.css" />
|
||||
<script type="text/javascript" src="https://lainchan.org/js/jquery.min.js"></script>
|
||||
<script type="text/javascript" src="https://lainchan.org/js/prettify.js"></script>
|
||||
<script type="text/javascript" src="https://lainchan.org/js/hilight.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<h1>IPFS Scraper</h1>
|
||||
<div id="content">
|
||||
% for post in dumped[::-1]:
|
||||
<div class="post">
|
||||
<a class="postlink" href="https://lainchan.org/${post["board"]}/res/${post["parent_thread_id"]}.html#q${post["post"]}" target="_blank">Post</a> -
|
||||
<a class="ipfslink-public" href="https://ipfs.io/ipfs/${post["match"]}">public</a>
|
||||
<a class="ipfslink-local" href="http://localhost:8080/ipfs/${post["match"]}">local</a> -
|
||||
${post["match"]}
|
||||
<span class="date">${time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(post["time"]))}</span>
|
||||
<div class="embedded">${post["text"].replace(u"\u03bb", "lam").replace('href="/' + post["board"], 'href="https://lainchan.org/' + post["board"])}</div>
|
||||
</div>
|
||||
% endfor
|
||||
</div>
|
||||
<div id="footer">Niles Rogoff 2016</div>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,29 @@
|
|||
import urllib.request, json, BTEdb, re, time
|
||||
db = BTEdb.Database("/dev/shm/lainchan-scraper.json")
|
||||
db.BeginTransaction(False)
|
||||
boards = ['lam', "tech"]
|
||||
regex = re.compile(r"\b[A-Za-z0-9]{46}\b")
|
||||
if db.TableExists("table"):
|
||||
db.Truncate("table")
|
||||
else:
|
||||
db.CreateTable("table")
|
||||
for board in boards:
|
||||
threads = json.loads(urllib.request.urlopen("https://lainchan.org/"+board+"/threads.json").read().decode("utf-8"))
|
||||
for page in threads:
|
||||
for thread in page["threads"]:
|
||||
print(thread["no"])
|
||||
time.sleep(5)
|
||||
for post in json.loads(urllib.request.urlopen("https://lainchan.org/" + board + "/res/" + str(thread["no"]) + ".json").read().decode("utf-8"))["posts"]:
|
||||
if "com" in post:
|
||||
result = re.search(regex, post["com"])
|
||||
if result:
|
||||
i = 0
|
||||
while True:
|
||||
try:
|
||||
db.Insert("table", board = board, match = result.group(i), parent_thread_id = thread["no"], time = post["time"], text = post["com"], post = post["no"])
|
||||
print(post["com"])
|
||||
except:
|
||||
break
|
||||
i+= 1
|
||||
db.CommitTransaction()
|
||||
db.Destroy()
|
|
@ -0,0 +1,45 @@
|
|||
body {
|
||||
background-color: #efeff0;
|
||||
font-family: sans-serif;
|
||||
/*color: #eeeeee;*/
|
||||
}
|
||||
h1 {
|
||||
text-align: center;
|
||||
}
|
||||
a {
|
||||
/*color: #1d1f21;*/
|
||||
color: #aa0000;
|
||||
text-decoration: none;
|
||||
}
|
||||
a:hover {
|
||||
text-decoration:underline;
|
||||
}
|
||||
#content {
|
||||
padding: 50px;
|
||||
}
|
||||
.embedded {
|
||||
background-color: rgba(0,0,0.2);
|
||||
border: 1px solid black;
|
||||
border-radius: 10px;
|
||||
padding: 10px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.quote {
|
||||
color: darkgreen;
|
||||
}
|
||||
.date {
|
||||
text-align: right;
|
||||
float: right;
|
||||
}
|
||||
#footer {
|
||||
position: fixed;
|
||||
bottom: 20px;
|
||||
right: 20px;
|
||||
font-size: .8em;
|
||||
font-family: monospace;
|
||||
color: grey;
|
||||
background-color: inherit;
|
||||
border-radius: 2px;
|
||||
padding: 2px;
|
||||
z-index: 100;
|
||||
}
|
Loading…
Reference in New Issue