diff --git a/package-lock.json b/package-lock.json index b64e8c5..da1fec5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "smf", - "version": "4.00.0", + "version": "4.1.0", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/package.json b/package.json index 1b676aa..3751e22 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "smf", - "version": "4.00.0", + "version": "4.1.0", "description": "Harvest recent postings from SMF", "dependencies": { "axios": "^0.21.1", diff --git a/smf.js b/smf.js index 2b3a9bc..87cc4db 100644 --- a/smf.js +++ b/smf.js @@ -18,11 +18,9 @@ Copyright 2011-2021 James Tittsler const axios = require('axios').default; const process = require("process"); -const url = require("url"); const cheerio = require("cheerio"); const nodemailer = require("nodemailer"); const fs = require("fs"); -const os = require("os"); const ini = require("ini"); var config = ini.parse(fs.readFileSync("./smf.rc", "utf-8")); let smtpConfig = { @@ -70,7 +68,6 @@ function getHighWaterMark() { process.exit(1); } - console.log(`high water mark ${highwater}`); if (isNaN(highwater) || highwater == 0) { console.error('high water mark not set'); log.error('high water mark not set'); @@ -106,94 +103,30 @@ function unHTMLEntities(a) { return a; }; -async function processPage(item, page) { - let url = item.link; - let msgid = url.replace(/.*#msg/, ''); - let $ = cheerio.load(page); - // look through all the div.post_wrapper for one that contains - // a subject for the desired message number - let $h5 = $(`#subject_${msgid}`); - let $el = $h5.closest(".post_wrapper"); - let from = $el - .find('div.poster a[title^="View the profile of"]') - .eq(0) - .text(); - let $post = $el.find("div.post").eq(0); - $("div.quote", $post).attr( - "style", - "color: #000; background-color: #d7daec; margin: 1px; padding: 6px; font-size: 1em; line-height: 1.5em; font-style: italic; font-family: Georgia, Times, serif;" - ); - $("div.quoteheader,div.codeheader", $post).attr( - "style", - "color: #000; text-decoration: none; font-style: normal; font-weight: bold; font-size: 1em; line-height: 1.2em; padding-bottom: 4px;" - ); - $(".meaction", $post).attr("style", "color: red;"); - $("embed", $post).each(function () { - let src = decodeURIComponent($(this).attr("src")); - log.debug(` embed: ${src}`); - return $(this).replaceWith(`

${src}

`); - }); - let $attachments = $el.find("div.attachments"); - if ($attachments) { - $attachments.attr("style", "font-size: 0.8em;"); - $("a", $attachments) - .prop("onclick", null); - $post.append($attachments); - } - let post = $post.html(); - let d = new Date(item.pubDate); - let isodate = d.toISOString(); - let originalLink = item.link; - if (config.smf.protocol.startsWith('https')) { - originalLink = originalLink.replace('http:', 'https:'); - } - log.debug(`From: ${from}`); - log.debug(`Subject: [${item.category}] ${unHTMLEntities(item.title)}`); - log.debug(`Date: ${isodate} Lastdate: ${lastdate.toISOString()}`); - mailer.sendMail( - { - from: `"${from}" ${config.email.sender}`, - to: config.email.to, - subject: `[${item.category}] ${unHTMLEntities(item.title.trim())}`, - html: `

From: ${from}
Date: ${item.pubDate - }

${post}
-

Original message

` - }, - function (error) { - if (error) { - log.debug(`>>failed to send mail ${msgid}`); - log.debug(error); - process.exit(1); - } - }); - log.debug(`>>sent ${msgid} (${isodate}) for ${item.category}`); - await config.db.run("UPDATE feeds SET last=(?) WHERE category=(?)", - isodate, item.category); -} - /** * @returns {Promise} */ async function processPage(highwater, page, posts) { let $ = cheerio.load(page); + let more = 0; $('.core_posts').each(function (i) { let msg = {}; let $h5as = $(this).find('.topic_details>h5').first().find('a'); msg.board = $h5as.eq(0).attr('href'); msg.category = $h5as.eq(0).text(); - msg.msgLink = $h5as.eq(1).attr('href'); - msg.subject = unHTMLEntities($h5as.eq(1).text()); - msg.msgid = msg.msgLink.replace(/.*#msg/, ''); - if (msg.msgid > highwater) { + msg.link = $h5as.eq(1).attr('href'); + msg.subject = unHTMLEntities($h5as.eq(1).text().trim()); + msg.id = msg.link.replace(/.*#msg/, ''); + if (msg.id > highwater) { let $authDate = $(this).find('.topic_details .smalltext').first(); msg.author = $authDate.find('a').first().text(); let dtrego = /.*\son\s(\S+)(,|\sat)\s(\d\d:\d\d:\d\d).*/.exec($authDate.text()); if (dtrego[1] === 'Today') { // FIXME: there is some ambiguity in "Today" let d = new Date(); - msg.dt = d.toISOString().slice(0, 11) + dtrego[3]; + msg.pubDate = d.toISOString().slice(0, 11) + dtrego[3]; } else { - msg.dt = dtrego[1] + 'T' + dtrego[3]; + msg.pubDate = dtrego[1] + 'T' + dtrego[3]; } let $post = $(this).find('.list_posts').first(); $("div.quote", $post).attr( @@ -206,12 +139,13 @@ async function processPage(highwater, page, posts) { ); $(".meaction", $post).attr("style", "color: red;"); msg.post = $post.html(); - console.log('------------'); - console.dir(msg); - posts.push(msg); + posts.unshift(msg); + more += 1; + } else { + more = 0; } }); - return 0; + return more; } async function smf() { @@ -222,11 +156,9 @@ async function smf() { let posts = []; let res; - console.log(`smf() highwater=${highwater}`); // process Recent Posts pages until we get to messages we've seen while (more > 0) { try { - console.log(`fetching ${config.smf.recent_url}${start}`); log.debug(`fetching ${config.smf.recent_url}${start}`); res = await axios.get(config.smf.recent_url + start, { headers: { @@ -245,12 +177,39 @@ async function smf() { more = await processPage(highwater, res.data, posts); start += more; - console.log('delay'); - await sleep(config.smf.recent_fetch_delay || 5000); + if (more) { + await sleep(config.smf.recent_fetch_delay || 5000); + } + } + + for (let msg of posts) { + console.log(`mailing: ${msg.id}: ${msg.category} / ${msg.subject}`); + mailer.sendMail({ + from: `"${msg.author}" ${config.email.sender}`, + to: config.email.to, + subject: `[${msg.category}] ${msg.subject}`, + html: ` +

From: ${msg.author}
+ Date: ${msg.pubDate} #${msg.id}

+
${msg.post}
+

Original message

+
` + }, + function (error) { + if (error) { + log.error(`>>failed to send mail ${msg.id}`); + log.error(error); + process.exit(42); + } + } + ); + if (msg.id > highwater) { + highwater = msg.id; + } + await sleep(config.smf.email_delay || 5000); } if (highwater > origmark) { - console.log(`setting highwater ${highwater}`); setHighWaterMark(highwater); } } diff --git a/smf.rc.example b/smf.rc.example index 3063294..ae63490 100644 --- a/smf.rc.example +++ b/smf.rc.example @@ -6,8 +6,7 @@ cookie = "SMFCookie463=abcabcabcabcabcabc; PHPSESSID=0abc0abc0abc;" recent_url = "https://ringo.net/forum/index.php?action=recent;start=" highwatermark = "smf.mark" recent_fetch_delay = 10000 -feed_fetch_delay = 1000 -item_fetch_delay = 1000 +email_delay = 5000 loglevel = error [email]