main.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61

from bs4 import BeautifulSoup
from datetime import date
from mastodon import Mastodon

import sys
import requests
import datetime
import json

# Constant URL to Hacker News
base_url = "https://news.ycombinator.com/"
# Calculate the date 10 years ago as %Y-%m%d
year = int(date.today().strftime("%Y")) - 10
today = str(year) + date.today().strftime("-%m-%d")
# Request the page
r = requests.get('https://news.ycombinator.com/front?day='+today)
# Parse the html
soup = BeautifulSoup(r.text, features="lxml")
# Get the post specified by the first argument
items = soup.find_all("tr", "athing")[:3]
index = int(sys.argv[1])
item = items[index]
story = item.find("a", "storylink")

# Parse the title and link from the post
title = story.text
link = story["href"]
if "http" not in link:
    link = base_url + link
try:
    r = requests.get(link)
    if r.status_code == 404:
        raise Exception(404)
except:
    timestamp = int(datetime.datetime.timestamp(datetime.datetime.today() - datetime.timedelta(days=(10 * 365))))
    r = requests.get("http://archive.org/wayback/available?url=" + link + "&timestamp=" + str(timestamp))
    res = r.json()
    if res["archived_snapshots"] and res["archived_snapshots"]["closest"]["available"]:
        link = res["archived_snapshots"]["closest"]["url"]
    else:
        link = "[dead link]"

    
# Find the comments link from the row
comment_el = item.next_sibling
comment_link = base_url + comment_el.find_all("a")[-1]["href"]
if comment_link == link:
    comment_link = ""

# Format the final string
toot_content = title + "\n" + link + "\n"+comment_link

# Connect to Mastodon and send a toot
mastodon = Mastodon(
    access_token = '/home/mark/hndecade/hndecade_usercred.secret',
    api_base_url = 'https://botsin.space'
)

mastodon.toot(toot_content)