diff options
-rw-r--r-- | config.py | 12 | ||||
-rw-r--r-- | main.py | 144 | ||||
-rw-r--r-- | on_this_day.py | 149 | ||||
-rw-r--r-- | test.py | 3 |
4 files changed, 308 insertions, 0 deletions
diff --git a/config.py b/config.py new file mode 100644 index 0000000..8fd06d4 --- /dev/null +++ b/config.py @@ -0,0 +1,12 @@ +config = { + "host": "http://daily.marks.kitchen", + "thoreau": {"journal1": "/home/mark/email-scripts/journal1.txt"}, + "news": { + "urls": "https://chroniclingamerica.loc.gov/lccn/sn96076642/%%s/ed-1/seq-1/,https://chroniclingamerica.loc.gov/lccn/sn85040451/%%s/ed-1/seq-1/", + "names": "Belding Banner,Vernon County Censor", + }, + "db": "/home/mark/rss.db", +} + + +# https://www.gutenberg.org/files/57393/57393-0.txt @@ -0,0 +1,144 @@ +import on_this_day +import sys +import sqlite3 + +from datetime import datetime +from config import config +from http.server import HTTPServer, BaseHTTPRequestHandler + +def format_datetime(date): + return str(date) + +def generate_item(item): + return f"""<item> + <title><![CDATA[{item["title"]}]]></title> + <description><![CDATA[{item["description"]}]]></description> + <link>{config["host"]}/{item["guid"]}</link> + <guid isPermaLink="true">{item["guid"]}</guid> + <pubDate>{item["createdAt"]}</pubDate> + </item>""" + +def generate_feed(items): + return f"""<rss version="2.0"><channel> + <title>On This Day</title> + <description>Daily posts of stuff for today</description> + <lastBuildDate>{format_datetime(datetime.now())}</lastBuildDate> + {"".join(generate_item(item) for item in items)} + </channel></rss>""" + +def generate_ul(items): + return f"""<ul> + {f"<li>{'</li><li>'.join(items)}</li>"} + <ul>""" + +def get_description(): + list_items = [ + (on_this_day.get_old_news, "old news"), + (on_this_day.get_peanuts, "peanuts"), + (on_this_day.get_calvin_and_hobbes, "calvin and hobbes"), + (on_this_day.get_today_wikipedia, "wikipedia"), + (on_this_day.get_week_holidays, "holiday"), + (on_this_day.get_crossword, "crossword"), + (on_this_day.year_progress_bar, "year progress"), + (on_this_day.get_homepage_links, "homepage links"), + ] + items = [] + for func, name in list_items: + try: + items.append(func()) + except Exception as e: + items.append(f"Error getting {name}") + print("Exception:", e) + ul = generate_ul(items) + + div_items = [ + (on_this_day.get_today_wikiquote, "wikiquote"), + (on_this_day.get_thoreau, "thoreau"), + ] + items = [] + for func, name in div_items: + try: + items.append(func()) + except: + items.append(f"Error getting {name}") + divs = f"<div>{'</div><div>'.join(items)}</div>" + + return f""" + <h1>On this day</h1> + {ul} + {divs} + """ + +def setup(con): + cur = con.cursor() + cur.execute("CREATE TABLE IF NOT EXISTS items (guid, title, description, createdAt)") + con.commit() + cur.close() + +def insert_today(): + con = sqlite3.connect(config["db"]) + now = datetime.now() + cur = con.cursor() + cur.execute( + "INSERT INTO items values (?, ?, ?, ?)", + (now.isoformat(), str(now), get_description(), format_datetime(now)) + ) + con.commit() + cur.close() + +def get_all(con): + cur = con.cursor() + items = [] + for (guid, title, description, createdAt) in cur.execute("SELECT guid, title, description, createdAt FROM items"): + items.append({ + "guid": guid, + "title": title, + "description": description, + "createdAt": createdAt, + }) + cur.close() + return generate_feed(items) + +def get_one_desc(con, guid): + cur = con.cursor() + description = next( + cur.execute("SELECT description FROM items WHERE guid=:guid", {"guid": guid}))[0] + cur.close() + print(description) + return description + +class RssHTTPRequestHandler(BaseHTTPRequestHandler): + def __init__(self, *args): + BaseHTTPRequestHandler.__init__(self, *args) + + def do_GET(self): + if self.path == "/": + self.send_response(200) + self.end_headers() + self.wfile.write(bytes(get_all(con), "utf-8")) + else: + guid = self.path[1:] + self.send_response(200) + self.end_headers() + self.wfile.write(bytes(get_one_desc(con, guid), "utf-8")) + +con = None +def server(): + print("Starting http server") + http = HTTPServer(("", 8000), RssHTTPRequestHandler) + print("serving forever") + http.serve_forever() + +def test(): + print(get_description()) + +if __name__ == "__main__": + if len(sys.argv) == 1: + con = sqlite3.connect(config["db"]) + setup(con) + server() + elif sys.argv[1] == "insert": + insert_today() + elif sys.argv[1] == "test": + test() + diff --git a/on_this_day.py b/on_this_day.py new file mode 100644 index 0000000..da70cd8 --- /dev/null +++ b/on_this_day.py @@ -0,0 +1,149 @@ +import requests +import sys +import subprocess + +from datetime import date, datetime +from bs4 import BeautifulSoup + +from config import config + +def get_old_news(): + print("getting old news") + year = int(date.today().strftime("%Y")) - 100 + century_ago = str(year) + date.today().strftime("-%m-%d") + news_text = "" + urls = config["news"]["urls"].split(",") + names = config["news"]["names"].split(",") + for i in range(len(urls)): + full_url = urls[i] % century_ago + name = names[i] + if requests.get(full_url).status_code != 404: + news_text += '<div><a href="%s">%s %s</a></div>\n' % (full_url, name, century_ago) + return news_text or "No old news" + +def get_today_wikipedia(): + print("getting today's wikipedia") + full_url = "https://en.wikipedia.org/wiki/%s" % date.today().strftime("%B_%d") + return '<div><a href="%s">Today\'s Wikipedia</a></div>' % (full_url) + +def get_week_holidays(): + print("getting holidays") + command = "calendar -f /usr/share/calendar/calendar.mark -A 14".split(" ") + output = subprocess.check_output(command) + output = output.decode("utf-8").strip().split("\n") + return "</li>\n<li>".join(output) or "No holidays this fortnight." + +def get_crossword(): + date_str = date.today().strftime("%Y-%m-%d") + url = f"https://simplydailypuzzles.com/daily-cryptic/index.html?puzz=dc1-{date_str}" + return f'<div><a href="{url}">Cryptic Crossword</a></div>' + +def get_today_wikiquote(): + print("getting today's wikiquote") + full_url = "https://en.wikiquote.org/wiki/%s" % date.today().strftime("%B_%d") + r = requests.get(full_url) + soup = BeautifulSoup(r.text, features="lxml") + table = str(soup.find(text="2020").parent.parent.next_sibling.next_sibling) + table = table.replace('href="/', 'href="https://en.wikiquote.org/') + return '<div style="border: 1px solid black">%s</div>' % table + +def get_calvin_and_hobbes(): + print("getting calvin and hobbes") + year = int(date.today().strftime("%Y")) % 9 + 1986 + comic_date = str(year) + date.today().strftime("/%m/%d") + full_url = "https://www.gocomics.com/calvinandhobbes/%s" % comic_date + r = requests.get(full_url) + soup = BeautifulSoup(r.text, features="lxml") + if not "Today on" in str(soup.title): # gocomics gives you today if 404 + comic_src = soup.select(".item-comic-image")[0].img["src"] + return '<div><a href="%s">Calvin and Hobbes</a></div>' % (comic_src) + else: + return "" + +def get_peanuts(): + print("getting peanuts") + comic_date = date.today().strftime("%Y/%m/%d") + full_url = "https://www.gocomics.com/peanuts/%s" % comic_date + r = requests.get(full_url) + soup = BeautifulSoup(r.text, features="lxml") + comic_src = soup.select(".item-comic-image")[0].img["src"] + return '<div><a href="%s">Peanuts</a></div>' % (comic_src) + +# Sites without feeds/need to be checked themselves +def get_homepage_links(): + pages = [ + {"url": "https://aldaily.com/", "title": "Arts and Letters Daily"}, + {"url": "https://www.powerlanguage.co.uk/wordle/", "title": "Wordle"}, + ] + page_links = [ + f'<div><a href="{item["url"]}">{item["title"]}</a></div>' + for item in pages + ] + return "</li>\n<li>".join(page_links) + + +def check_for_starts_with_line(lst, line): + for md in lst: + if line.startswith(md): + return True + return False + +def get_thoreau(): + print("getting thoreau") + year_int = int(date.today().strftime("%Y")) - 183 + year = str(year_int) + year_stop = str(year_int+1) + month_days = [ + date.today().strftime("_%b %-d."), + date.today().strftime("_%b. %-d."), + date.today().strftime("_%B %-d."), + date.today().strftime("_%B. %-d.") + ] + filename = config["thoreau"]["journal1"] + with open(filename) as f: + lines = f.readlines() + # Find lines that the year lies on + i= 0 + year_start_idx = -1 + for i in range(len(lines)): + if lines[i].startswith(year): + year_start_idx = i+1 + break + year_stop_idx = -1 + for i in range(year_start_idx, len(lines)): + if lines[i].startswith(year_stop): + year_stop_idx = i - 2 + break + entry_start_idx = -1 + + # Find the lines inside the year that the date lies on + i = year_start_idx + while i < year_stop_idx: + if check_for_starts_with_line(month_days, lines[i]): + entry_start_idx = i - 2 + i += 1 + break + i += 1 + entry_end_idx = -1 + while i < year_stop_idx: + if lines[i].startswith("_"): + entry_end_idx = i - 2 + break + i += 1 + + # If found date, join the strings + if entry_start_idx != -1 and entry_end_idx != -1: + return "".join(lines[entry_start_idx:entry_end_idx]) + return "No Thoreau entry on " + month_days[0] + year + +def year_progress_bar(width=50): + day_of_year = float(datetime.now().strftime('%j')) + count = width * (day_of_year / 365) + year_string = "[" + for i in range(width): + if i < count: + year_string += "#" + else: + year_string += "_" + year_string += "]" + return f"<pre>Year: {year_string}</pre>" @@ -0,0 +1,3 @@ +import on_this_day + +print(on_this_day.get_week_holidays()) |