Add Mastodon Autoposter

buddhist-uni · Jun 12, 2024 · 999e4ab · 999e4ab
1 parent 4614c5e
commit 999e4ab
Show file tree

Hide file tree

Showing 6 changed files with 262 additions and 0 deletions.
diff --git a/.github/workflows/mastoposter.yml b/.github/workflows/mastoposter.yml
@@ -0,0 +1,20 @@
+name: Mastodon Autoposter
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 8,20 * * *"
+jobs:
+  mastoposter:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout the Code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Install Dependencies
+        run: pip install Mastodon.py titlecase pyyaml python-frontmatter
+      - name: Run Mastoposter
+        shell: bash
+        run: |
+          cd scripts
+          MASTODON_TOKEN="${{ secrets.MASTODON_TOKEN }}" python mastoposter.py
diff --git a/_tags/bodhisatta.md b/_tags/bodhisatta.md
@@ -1,5 +1,6 @@
 ---
 title: "The Bodhisatta Path"
+hashtag: "bodhisattva"
 status: unpublished
 parents: [form]
 ---

diff --git a/_tags/media.md b/_tags/media.md
@@ -1,5 +1,6 @@
 ---
 title: "Media Studies"
+hashtag: "mediaStudies"
 status: unpublished
 parents: [communication]
 ---

diff --git a/scripts/mastoposter.py b/scripts/mastoposter.py
@@ -0,0 +1,208 @@
+#!/bin/python
+# Posts the next piece of content to Mastodon
+
+from datetime import datetime
+import json
+from mastodon import Mastodon
+import re
+import os
+from urllib.parse import urlparse
+
+import website
+
+def write_post_title(page: website.ContentFile) -> str:
+  title = page.title
+  if ": " in title:
+    title = title.split(': ')[-1]
+  return title
+
+def length_of_item(page) -> str:
+  length = ""
+  if page.minutes:
+    length = f", {page.minutes}-minute "
+  elif page.pages:
+    if isinstance(page.pages, str) and "--" in page.pages:
+        pages = page.pages.split("--")
+        pages = int(pages[1]) - int(pages[0]) + 1
+    else:
+      pages = int(page.pages)
+    length = f", {pages}-page "
+  return length
+
+def get_category_for_item(page: website.ContentFile) -> tuple[str, str]:
+  match page.category:
+    case "articles":
+      emoji = "📰"
+      category = "article"
+      match page.subcat:
+        case "poetry":
+          emoji = "📜"
+          category = "#poem"
+    case "av":
+      emoji = "🗣️"
+      category = "talk"
+      match page.subcat:
+        case "poetry":
+          category = "#poem"
+        case "music":
+          category = "listen"
+          emoji = "🎵"
+        case "film":
+          category = "video"
+          emoji = "📼"
+        case "podcast":
+          category = "podcast"
+        case "course":
+          category = "online course"
+          emoji = "🧑‍🏫"
+    case "booklets":
+      emoji = "📖"
+      category = "book"
+      match page.subcat:
+        case "poetry":
+          category = "book of poetry"
+        case "thesis":
+          category = "thesis"
+    case "canon":
+      emoji = "☸️"
+      category = "canonical work"
+      book = re.split(r'[0-9\.-]', page.slug)[0]
+      if book in ['an', 'sn', 'snp', 'kp', 'mn', 'pv', 'vv', 'dn', 'iti', 'thig', 'thag', 'ud']:
+        category = "sutta"
+      elif book in ['ea', 'da', 'sa', 'ma']:
+        category = "āgama"
+      elif book in ['t', 'toh']:
+        category = "sūtra"
+      elif page.subcat == "poetry":
+        category = "canonical #poem"
+      elif 'abhidhamma' in page.tags or 'abhidharma' in page.tags:
+        category = "Abhidharma"
+    case "essays":
+      emoji = "🗒️"
+      category = "essay"
+      match page.subcat:
+        case "poetry":
+          category = "#poem"
+    case "excerpts" | "papers":
+      emoji = "📑"
+      category = "paper"
+      match page.subcat:
+        case "poetry":
+          category = "#poem"
+    case "monographs":
+      emoji = "📕"
+      category = "book"
+      match page.subcat:
+        case "poetry":
+          category = "book of poetry"
+        case "fiction":
+          category = "novel"
+    case "reference":
+      emoji = "🆓"
+      category = "resource"
+    case _:
+      raise RuntimeError("Unknown category")
+  if page.translator:
+    if category[-1] in ['m', 'y']:
+      category += " in"
+    category += " #translation"
+  return (emoji, category)
+
+def hashtagify(text: str) -> str:
+  text = text.lower().replace(" ", "-")
+  if '-' not in text:
+    return text
+  return text[0].upper() + re.sub(
+    r'-([a-z])',
+    lambda match: match.group(1).upper(),
+    text[1:],
+  )
+
+def write_tags_for_item(page: website.ContentFile) -> list[str]:
+  ret = []
+  day_of_the_week = datetime.now().weekday() # 0 = Monday
+  tags = page.tags or []
+  if page.course:
+    tags.insert(0, page.course)
+  if day_of_the_week == 3: # Thursday
+    if int(page.year) < 2000 or 'past' in tags:
+      ret.append('TBT')
+  for t in tags:
+    if not t:
+      continue
+    tag = website.tags.get(t)
+    if tag and tag.hashtag:
+      if tag.hashtag not in ret:
+        ret.append(tag.hashtag)
+    else:
+      tag = hashtagify(t)
+      if tag not in ret:
+        ret.append(tag)
+  if day_of_the_week == 4: # Friday
+    if page.category in ['booklets', 'monographs']:
+      ret.append("FridayReads")
+  if "translation" in ret and page.translator:
+    ret.remove("translation")
+  return [t.replace("Roots", "History") for t in ret]
+
+def write_post_for_item(page: website.ContentFile) -> str:
+  title = write_post_title(page)
+  length = length_of_item(page)
+  emoji, category = get_category_for_item(page)
+  tags = write_tags_for_item(page)
+  tags = " ".join([f"#{t[0].upper()}{t[1:]}" for t in tags])
+  if int(page.year) >= datetime.now().year - 1:
+    year = ""
+    adjectives = "✨NEW✨, free"
+  else:
+    year = f" from {page.year}"
+    adjectives = "free"
+  return f"""{emoji} {title} (A {adjectives}{length}{category}{year})
+
+Tags: {tags}
+{website.baseurl}{page.url}"""
+
+if __name__ == "__main__":
+  print("Loading site data...", flush=True)
+  website.load()
+
+  DOMAIN = urlparse(website.config.get("mastodon_link")).netloc
+  AUTH_TOKEN = os.getenv("MASTODON_TOKEN")
+  assert AUTH_TOKEN is not None, "Please set the MASTODON_TOKEN environment variable"
+  print("Logging in to Mastodon...", flush=True)
+  mastodon = Mastodon(api_base_url=DOMAIN, access_token=AUTH_TOKEN)
+  ME = mastodon.me()
+  print("Fetching the last few posts...", flush=True)
+  last_few_posts = mastodon.account_statuses(
+    ME['id'],
+    exclude_reblogs=True,
+    exclude_replies=True,
+    limit=5,
+  )
+  print("Selecting the next post...", flush=True)
+  last_few_urls = [p['card']['url'][len(website.baseurl):] for p in last_few_posts if p['card']]
+  idx_to_post = None
+  for ridx, c in enumerate(reversed(website.content)):
+    if c.url in last_few_urls:
+      break
+    idx_to_post = len(website.content) - 1 - ridx
+  if idx_to_post is None:
+    print("::error title=Nothing to do::No new items left to post to Mastodon")
+    quit(1)
+  # skip unfree content
+  while not (website.content[idx_to_post].external_url or website.content[idx_to_post].drive_links):
+    idx_to_post += 1
+    if idx_to_post >= len(website.content):
+      print("::error title=Nothing to do::No new items left to post to Mastodon")
+      quit(1)
+  print(f"::notice title=Post Selection::Posted item {idx_to_post+1} of {len(website.content)} (~{len(website.content)-idx_to_post-1} remaining after this one)", flush=True)
+  status = write_post_for_item(website.content[idx_to_post])
+  masto_info = mastodon.status_post(
+    status=status,
+    language="en",
+    visibility="public",
+  )
+  print("::group::Mastodon Response")
+  print(json.dumps(masto_info, indent=2, default=str))
+  print("::endgroup::")
+
diff --git a/scripts/requirements.txt b/scripts/requirements.txt
@@ -14,3 +14,4 @@ google-api-python-client
 google_auth_oauthlib
 titlecase
 pypdf
+Mastodon.py
diff --git a/scripts/website.py b/scripts/website.py
@@ -1,5 +1,7 @@
 
 from typing import Any
+import subprocess
+from datetime import datetime
 from strutils import (
   Path,
   git_root_folder as root_folder
@@ -14,6 +16,7 @@
 
 config = yaml.load(root_folder.joinpath('_config.yml').read_text(), Loader=yaml.Loader)
 baseurl = config.get('url')
+filecreationtimes = dict()
 
 class JekyllFile(frontmatter.Post):
   def __init__(self, fd: Path, content, handler=None, **kwargs) -> None:
@@ -23,6 +26,7 @@ def __init__(self, fd: Path, content, handler=None, **kwargs) -> None:
     super().__init__(content, handler=handler, **kwargs)
     self.absolute_path = fd
     self.relative_path = fd.relative_to(root_folder)
+    self.created_at = filecreationtimes[str(self.relative_path)]
 
   @classmethod
   def load(cls, f: Path, **kwargs):
@@ -85,6 +89,10 @@ def __iter__(self):
     return iter(self.tags.values())
   def __len__(self):
     return len(self.tags)
+  def __contains__(self, item):
+    if isinstance(item, TagFile):
+      return item.slug in self.tags
+    return bool(self.get(item))
 
 class ContentFile(JekyllFile):
   def __init__(self, fd: Path, content, handler=None, **kwargs) -> None:
@@ -109,16 +117,39 @@ def entry_with_drive_id(gid):
         return entry
   return None
 
+def get_file_creation_times():
+  """Returns a dict from relative filepath strings to datetime stamps"""
+  filecreationtimes = dict()
+  SYGIL = '%these-files-modified-at:'
+  git_history = subprocess.run(
+    ["git", "--git-dir", root_folder.joinpath(".git"),
+     "log", "--name-only", "--date=unix",
+     f"--pretty=%{SYGIL}%ct"
+    ],
+    capture_output=True, text=True, check=True).stdout.splitlines()
+  timestamp = datetime.now()
+  for line in git_history:
+    if SYGIL in line:
+      timestamp = datetime.fromtimestamp(int(line[len(SYGIL):]))
+      continue
+    if line == "":
+      continue
+    filecreationtimes[line] = timestamp
+  return filecreationtimes
+
+
 def load():
   if content:
     return
+  filecreationtimes.update(get_file_creation_times())
   for contentfolder in root_folder.joinpath('_content').iterdir():
     if (not contentfolder.is_dir()) or contentfolder.name.startswith('.'):
       continue
     for contentfile in contentfolder.iterdir():
       if contentfile.is_dir() or contentfile.name.startswith('.'):
         continue
       content.append(ContentFile.load(contentfile))
+  content.sort(key=lambda c: c.created_at)
   for tagfile in root_folder.joinpath('_tags').iterdir():
     if (not tagfile.is_file()) or tagfile.name.startswith('.'):
       continue