Initial commit
This commit is contained in:
commit
b86682931f
|
@ -0,0 +1,35 @@
|
||||||
|
<%!
|
||||||
|
import BTEdb, time
|
||||||
|
%>
|
||||||
|
<%
|
||||||
|
db = BTEdb.Database("/dev/shm/lainchan-scraper.json")
|
||||||
|
dumped = db.Dump("table")
|
||||||
|
dumped.sort(key = lambda x: x["time"])
|
||||||
|
%>
|
||||||
|
<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Lainchan scraper for IPFS IDs</title>
|
||||||
|
<link rel="stylesheet" href="/style.css" />
|
||||||
|
<link rel="stylesheet" href="https://lainchan.org/stylesheets/code/dark.css" />
|
||||||
|
<script type="text/javascript" src="https://lainchan.org/js/jquery.min.js"></script>
|
||||||
|
<script type="text/javascript" src="https://lainchan.org/js/prettify.js"></script>
|
||||||
|
<script type="text/javascript" src="https://lainchan.org/js/hilight.js"></script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>IPFS Scraper</h1>
|
||||||
|
<div id="content">
|
||||||
|
% for post in dumped[::-1]:
|
||||||
|
<div class="post">
|
||||||
|
<a class="postlink" href="https://lainchan.org/${post["board"]}/res/${post["parent_thread_id"]}.html#q${post["post"]}" target="_blank">Post</a> -
|
||||||
|
<a class="ipfslink-public" href="https://ipfs.io/ipfs/${post["match"]}">public</a>
|
||||||
|
<a class="ipfslink-local" href="http://localhost:8080/ipfs/${post["match"]}">local</a> -
|
||||||
|
${post["match"]}
|
||||||
|
<span class="date">${time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(post["time"]))}</span>
|
||||||
|
<div class="embedded">${post["text"].replace(u"\u03bb", "lam").replace('href="/' + post["board"], 'href="https://lainchan.org/' + post["board"])}</div>
|
||||||
|
</div>
|
||||||
|
% endfor
|
||||||
|
</div>
|
||||||
|
<div id="footer">Niles Rogoff 2016</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -0,0 +1,29 @@
|
||||||
|
import urllib.request, json, BTEdb, re, time
|
||||||
|
db = BTEdb.Database("/dev/shm/lainchan-scraper.json")
|
||||||
|
db.BeginTransaction(False)
|
||||||
|
boards = ['lam', "tech"]
|
||||||
|
regex = re.compile(r"\b[A-Za-z0-9]{46}\b")
|
||||||
|
if db.TableExists("table"):
|
||||||
|
db.Truncate("table")
|
||||||
|
else:
|
||||||
|
db.CreateTable("table")
|
||||||
|
for board in boards:
|
||||||
|
threads = json.loads(urllib.request.urlopen("https://lainchan.org/"+board+"/threads.json").read().decode("utf-8"))
|
||||||
|
for page in threads:
|
||||||
|
for thread in page["threads"]:
|
||||||
|
print(thread["no"])
|
||||||
|
time.sleep(5)
|
||||||
|
for post in json.loads(urllib.request.urlopen("https://lainchan.org/" + board + "/res/" + str(thread["no"]) + ".json").read().decode("utf-8"))["posts"]:
|
||||||
|
if "com" in post:
|
||||||
|
result = re.search(regex, post["com"])
|
||||||
|
if result:
|
||||||
|
i = 0
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
db.Insert("table", board = board, match = result.group(i), parent_thread_id = thread["no"], time = post["time"], text = post["com"], post = post["no"])
|
||||||
|
print(post["com"])
|
||||||
|
except:
|
||||||
|
break
|
||||||
|
i+= 1
|
||||||
|
db.CommitTransaction()
|
||||||
|
db.Destroy()
|
|
@ -0,0 +1,45 @@
|
||||||
|
body {
|
||||||
|
background-color: #efeff0;
|
||||||
|
font-family: sans-serif;
|
||||||
|
/*color: #eeeeee;*/
|
||||||
|
}
|
||||||
|
h1 {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
a {
|
||||||
|
/*color: #1d1f21;*/
|
||||||
|
color: #aa0000;
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
a:hover {
|
||||||
|
text-decoration:underline;
|
||||||
|
}
|
||||||
|
#content {
|
||||||
|
padding: 50px;
|
||||||
|
}
|
||||||
|
.embedded {
|
||||||
|
background-color: rgba(0,0,0.2);
|
||||||
|
border: 1px solid black;
|
||||||
|
border-radius: 10px;
|
||||||
|
padding: 10px;
|
||||||
|
margin-bottom: 20px;
|
||||||
|
}
|
||||||
|
.quote {
|
||||||
|
color: darkgreen;
|
||||||
|
}
|
||||||
|
.date {
|
||||||
|
text-align: right;
|
||||||
|
float: right;
|
||||||
|
}
|
||||||
|
#footer {
|
||||||
|
position: fixed;
|
||||||
|
bottom: 20px;
|
||||||
|
right: 20px;
|
||||||
|
font-size: .8em;
|
||||||
|
font-family: monospace;
|
||||||
|
color: grey;
|
||||||
|
background-color: inherit;
|
||||||
|
border-radius: 2px;
|
||||||
|
padding: 2px;
|
||||||
|
z-index: 100;
|
||||||
|
}
|
Loading…
Reference in New Issue