From b86682931f6e1aa61523ba8e3042229997c5fed0 Mon Sep 17 00:00:00 2001 From: Niles Rogoff Date: Wed, 29 Jun 2016 23:05:31 -0400 Subject: [PATCH] Initial commit --- index.pyhtml | 35 +++++++++++++++++++++++++++++++++++ scraper.py | 29 +++++++++++++++++++++++++++++ style.css | 45 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 index.pyhtml create mode 100644 scraper.py create mode 100644 style.css diff --git a/index.pyhtml b/index.pyhtml new file mode 100644 index 0000000..1a82799 --- /dev/null +++ b/index.pyhtml @@ -0,0 +1,35 @@ +<%! + import BTEdb, time +%> +<% + db = BTEdb.Database("/dev/shm/lainchan-scraper.json") + dumped = db.Dump("table") + dumped.sort(key = lambda x: x["time"]) +%> + + + + Lainchan scraper for IPFS IDs + + + + + + + +

IPFS Scraper

+
+ % for post in dumped[::-1]: +
+ Post - + public + local - + ${post["match"]} + ${time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(post["time"]))} +
${post["text"].replace(u"\u03bb", "lam").replace('href="/' + post["board"], 'href="https://lainchan.org/' + post["board"])}
+
+ % endfor +
+ + + diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..baf1d84 --- /dev/null +++ b/scraper.py @@ -0,0 +1,29 @@ +import urllib.request, json, BTEdb, re, time +db = BTEdb.Database("/dev/shm/lainchan-scraper.json") +db.BeginTransaction(False) +boards = ['lam', "tech"] +regex = re.compile(r"\b[A-Za-z0-9]{46}\b") +if db.TableExists("table"): + db.Truncate("table") +else: + db.CreateTable("table") +for board in boards: + threads = json.loads(urllib.request.urlopen("https://lainchan.org/"+board+"/threads.json").read().decode("utf-8")) + for page in threads: + for thread in page["threads"]: + print(thread["no"]) + time.sleep(5) + for post in json.loads(urllib.request.urlopen("https://lainchan.org/" + board + "/res/" + str(thread["no"]) + ".json").read().decode("utf-8"))["posts"]: + if "com" in post: + result = re.search(regex, post["com"]) + if result: + i = 0 + while True: + try: + db.Insert("table", board = board, match = result.group(i), parent_thread_id = thread["no"], time = post["time"], text = post["com"], post = post["no"]) + print(post["com"]) + except: + break + i+= 1 +db.CommitTransaction() +db.Destroy() diff --git a/style.css b/style.css new file mode 100644 index 0000000..fa41db3 --- /dev/null +++ b/style.css @@ -0,0 +1,45 @@ +body { + background-color: #efeff0; + font-family: sans-serif; + /*color: #eeeeee;*/ +} +h1 { + text-align: center; +} +a { + /*color: #1d1f21;*/ + color: #aa0000; + text-decoration: none; +} +a:hover { + text-decoration:underline; +} +#content { + padding: 50px; +} +.embedded { + background-color: rgba(0,0,0.2); + border: 1px solid black; + border-radius: 10px; + padding: 10px; + margin-bottom: 20px; +} +.quote { + color: darkgreen; +} +.date { + text-align: right; + float: right; +} +#footer { + position: fixed; + bottom: 20px; + right: 20px; + font-size: .8em; + font-family: monospace; + color: grey; + background-color: inherit; + border-radius: 2px; + padding: 2px; + z-index: 100; +}