From 9f0acb950d9c85b5701760ecf7aafbfe3374341c Mon Sep 17 00:00:00 2001 From: Mark Powers Date: Sat, 5 Sep 2020 01:59:22 +0000 Subject: Add archive.org link handling --- main.py | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..28d4086 --- /dev/null +++ b/main.py @@ -0,0 +1,55 @@ +from bs4 import BeautifulSoup +from datetime import date +from mastodon import Mastodon + +import sys +import requests +import datetime +import json + +base_url = "https://news.ycombinator.com/" +year = int(date.today().strftime("%Y")) - 10 +today = str(year) + date.today().strftime("-%m-%d") +r = requests.get('https://news.ycombinator.com/front?day='+today) +soup = BeautifulSoup(r.text, features="lxml") +items = soup.find_all("tr", "athing")[:3] + +index = int(sys.argv[1]) +item = items[index] + +story = item.find("a", "storylink") +title = story.text +link = story["href"] +if "http" not in link: + link = base_url + link +try: + r = requests.get(link) + if r.status_code == 404: + raise Exception(404) +except: + timestamp = int(datetime.datetime.timestamp(datetime.datetime.today() - datetime.timedelta(days=(10 * 365)))) + r = requests.get("http://archive.org/wayback/available?url=" + link + "×tamp=" + str(timestamp)) + res = r.json() + if res["archived_snapshots"] and res["archived_snapshots"]["closest"]["available"]: + link = res["archived_snapshots"]["closest"]["url"] + else: + link = "[dead link]" + + + + +comment_el = item.next_sibling +comment_link = base_url + comment_el.find_all("a")[-1]["href"] +if comment_link == link: + comment_link = "" + +toot_content = title + "\n" + link + "\n"+comment_link + +mastodon = Mastodon( + access_token = '/home/mark/hndecade/hndecade_usercred.secret', + api_base_url = 'https://botsin.space' +) + +print(toot_content) +#mastodon.toot(toot_content) + -- cgit v1.2.3