Skip to content

Commit

Permalink
unshift msgs into array to preserve order
Browse files Browse the repository at this point in the history
add msgid to human Date: field
add optional delay between emailing each message
update example rc file
no delay after final partial fetch
remove unused dependencies
remove some debug output
  • Loading branch information
jimt committed Aug 26, 2021
1 parent 279672a commit 19eaba8
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 87 deletions.
2 changes: 1 addition & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "smf",
"version": "4.00.0",
"version": "4.1.0",
"description": "Harvest recent postings from SMF",
"dependencies": {
"axios": "^0.21.1",
Expand Down
125 changes: 42 additions & 83 deletions smf.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,9 @@ Copyright 2011-2021 James Tittsler

const axios = require('axios').default;
const process = require("process");
const url = require("url");
const cheerio = require("cheerio");
const nodemailer = require("nodemailer");
const fs = require("fs");
const os = require("os");
const ini = require("ini");
var config = ini.parse(fs.readFileSync("./smf.rc", "utf-8"));
let smtpConfig = {
Expand Down Expand Up @@ -70,7 +68,6 @@ function getHighWaterMark() {
process.exit(1);
}

console.log(`high water mark ${highwater}`);
if (isNaN(highwater) || highwater == 0) {
console.error('high water mark not set');
log.error('high water mark not set');
Expand Down Expand Up @@ -106,94 +103,30 @@ function unHTMLEntities(a) {
return a;
};

async function processPage(item, page) {
let url = item.link;
let msgid = url.replace(/.*#msg/, '');
let $ = cheerio.load(page);
// look through all the div.post_wrapper for one that contains
// a subject for the desired message number
let $h5 = $(`#subject_${msgid}`);
let $el = $h5.closest(".post_wrapper");
let from = $el
.find('div.poster a[title^="View the profile of"]')
.eq(0)
.text();
let $post = $el.find("div.post").eq(0);
$("div.quote", $post).attr(
"style",
"color: #000; background-color: #d7daec; margin: 1px; padding: 6px; font-size: 1em; line-height: 1.5em; font-style: italic; font-family: Georgia, Times, serif;"
);
$("div.quoteheader,div.codeheader", $post).attr(
"style",
"color: #000; text-decoration: none; font-style: normal; font-weight: bold; font-size: 1em; line-height: 1.2em; padding-bottom: 4px;"
);
$(".meaction", $post).attr("style", "color: red;");
$("embed", $post).each(function () {
let src = decodeURIComponent($(this).attr("src"));
log.debug(` embed: ${src}`);
return $(this).replaceWith(`<p><a href="${src}">${src}</a></p>`);
});
let $attachments = $el.find("div.attachments");
if ($attachments) {
$attachments.attr("style", "font-size: 0.8em;");
$("a", $attachments)
.prop("onclick", null);
$post.append($attachments);
}
let post = $post.html();
let d = new Date(item.pubDate);
let isodate = d.toISOString();
let originalLink = item.link;
if (config.smf.protocol.startsWith('https')) {
originalLink = originalLink.replace('http:', 'https:');
}
log.debug(`From: ${from}`);
log.debug(`Subject: [${item.category}] ${unHTMLEntities(item.title)}`);
log.debug(`Date: ${isodate} Lastdate: ${lastdate.toISOString()}`);
mailer.sendMail(
{
from: `"${from}" ${config.email.sender}`,
to: config.email.to,
subject: `[${item.category}] ${unHTMLEntities(item.title.trim())}`,
html: `<html><head></head><body><div><p><b>From:</b> ${from}<br /><b>Date:</b> ${item.pubDate
}</p><div style="max-width:72ch;">${post}</div>
<p><a href="${originalLink}">Original message</a></p></div></body></html>`
},
function (error) {
if (error) {
log.debug(`>>failed to send mail ${msgid}`);
log.debug(error);
process.exit(1);
}
});
log.debug(`>>sent ${msgid} (${isodate}) for ${item.category}`);
await config.db.run("UPDATE feeds SET last=(?) WHERE category=(?)",
isodate, item.category);
}

/**
* @returns {Promise<number>}
*/
async function processPage(highwater, page, posts) {
let $ = cheerio.load(page);
let more = 0;
$('.core_posts').each(function (i) {
let msg = {};
let $h5as = $(this).find('.topic_details>h5').first().find('a');
msg.board = $h5as.eq(0).attr('href');
msg.category = $h5as.eq(0).text();
msg.msgLink = $h5as.eq(1).attr('href');
msg.subject = unHTMLEntities($h5as.eq(1).text());
msg.msgid = msg.msgLink.replace(/.*#msg/, '');
if (msg.msgid > highwater) {
msg.link = $h5as.eq(1).attr('href');
msg.subject = unHTMLEntities($h5as.eq(1).text().trim());
msg.id = msg.link.replace(/.*#msg/, '');
if (msg.id > highwater) {
let $authDate = $(this).find('.topic_details .smalltext').first();
msg.author = $authDate.find('a').first().text();
let dtrego = /.*\son\s(\S+)(,|\sat)\s(\d\d:\d\d:\d\d).*/.exec($authDate.text());
if (dtrego[1] === 'Today') {
// FIXME: there is some ambiguity in "Today"
let d = new Date();
msg.dt = d.toISOString().slice(0, 11) + dtrego[3];
msg.pubDate = d.toISOString().slice(0, 11) + dtrego[3];
} else {
msg.dt = dtrego[1] + 'T' + dtrego[3];
msg.pubDate = dtrego[1] + 'T' + dtrego[3];
}
let $post = $(this).find('.list_posts').first();
$("div.quote", $post).attr(
Expand All @@ -206,12 +139,13 @@ async function processPage(highwater, page, posts) {
);
$(".meaction", $post).attr("style", "color: red;");
msg.post = $post.html();
console.log('------------');
console.dir(msg);
posts.push(msg);
posts.unshift(msg);
more += 1;
} else {
more = 0;
}
});
return 0;
return more;
}

async function smf() {
Expand All @@ -222,11 +156,9 @@ async function smf() {
let posts = [];
let res;

console.log(`smf() highwater=${highwater}`);
// process Recent Posts pages until we get to messages we've seen
while (more > 0) {
try {
console.log(`fetching ${config.smf.recent_url}${start}`);
log.debug(`fetching ${config.smf.recent_url}${start}`);
res = await axios.get(config.smf.recent_url + start, {
headers: {
Expand All @@ -245,12 +177,39 @@ async function smf() {
more = await processPage(highwater, res.data, posts);
start += more;

console.log('delay');
await sleep(config.smf.recent_fetch_delay || 5000);
if (more) {
await sleep(config.smf.recent_fetch_delay || 5000);
}
}

for (let msg of posts) {
console.log(`mailing: ${msg.id}: ${msg.category} / ${msg.subject}`);
mailer.sendMail({
from: `"${msg.author}" ${config.email.sender}`,
to: config.email.to,
subject: `[${msg.category}] ${msg.subject}`,
html: `<html><head></head><body>
<div><p><b>From:</b> ${msg.author}<br />
<b>Date:</b> ${msg.pubDate} #${msg.id}</p>
<div style="max-width:72ch;">${msg.post}</div>
<p><a href="${msg.link}">Original message</a></p>
</div></body></html>`
},
function (error) {
if (error) {
log.error(`>>failed to send mail ${msg.id}`);
log.error(error);
process.exit(42);
}
}
);
if (msg.id > highwater) {
highwater = msg.id;
}
await sleep(config.smf.email_delay || 5000);
}

if (highwater > origmark) {
console.log(`setting highwater ${highwater}`);
setHighWaterMark(highwater);
}
}
Expand Down
3 changes: 1 addition & 2 deletions smf.rc.example
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ cookie = "SMFCookie463=abcabcabcabcabcabc; PHPSESSID=0abc0abc0abc;"
recent_url = "https://ringo.net/forum/index.php?action=recent;start="
highwatermark = "smf.mark"
recent_fetch_delay = 10000
feed_fetch_delay = 1000
item_fetch_delay = 1000
email_delay = 5000
loglevel = error

[email]
Expand Down

0 comments on commit 19eaba8

Please sign in to comment.