From 8acb3afcebd4f813be542e92463f7cba19bf0305 Mon Sep 17 00:00:00 2001 From: Matt Furden Date: Sat, 13 Jan 2024 19:30:45 -0800 Subject: [PATCH] Release 1.5.1 --- checkif.ts | 28 +++++----- electron-scraper.ts | 133 ++++++++++++++++++++++++++++++++++++++++++++ main.ts | 8 ++- manifest.json | 2 +- package.json | 2 +- settings.ts | 33 ++++++++--- versions.json | 1 + 7 files changed, 182 insertions(+), 25 deletions(-) create mode 100644 electron-scraper.ts diff --git a/checkif.ts b/checkif.ts index 7000f16..cc00385 100644 --- a/checkif.ts +++ b/checkif.ts @@ -5,26 +5,26 @@ export class CheckIf { public static isMarkdownLinkAlready(editor: Editor): boolean { let cursor = editor.getCursor(); - // Check if the characters before the url are ]( to indicate a markdown link - var titleEnd = editor.getRange( - { ch: cursor.ch - 2, line: cursor.line }, - { ch: cursor.ch, line: cursor.line } - ); + // Check if the characters before the url are ]( to indicate a markdown link + var titleEnd = editor.getRange( + { ch: cursor.ch - 2, line: cursor.line }, + { ch: cursor.ch, line: cursor.line } + ); - return titleEnd == "](" + return titleEnd == "](" } public static isAfterQuote(editor: Editor): boolean { let cursor = editor.getCursor(); - // Check if the characters before the url are " or ' to indicate we want the url directly - // This is common in elements like - var beforeChar = editor.getRange( - { ch: cursor.ch - 1, line: cursor.line }, - { ch: cursor.ch, line: cursor.line } - ); + // Check if the characters before the url are " or ' to indicate we want the url directly + // This is common in elements like + var beforeChar = editor.getRange( + { ch: cursor.ch - 1, line: cursor.line }, + { ch: cursor.ch, line: cursor.line } + ); - return beforeChar == "\"" || beforeChar == "'" + return beforeChar == "\"" || beforeChar == "'" } public static isUrl(text: string): boolean { @@ -42,4 +42,4 @@ export class CheckIf { return urlRegex.test(text); } -} \ No newline at end of file +} diff --git a/electron-scraper.ts b/electron-scraper.ts new file mode 100644 index 0000000..a80e71a --- /dev/null +++ b/electron-scraper.ts @@ -0,0 +1,133 @@ +const electronPkg = require("electron"); +import { request } from "obsidian"; + +function blank(text: string): boolean { + return text === undefined || text === null || text === ""; +} + +function notBlank(text: string): boolean { + return !blank(text); +} + +// async wrapper to load a url and settle on load finish or fail +async function load(window: any, url: string): Promise { + return new Promise((resolve, reject) => { + window.webContents.on("did-finish-load", (event: any) => resolve(event)); + window.webContents.on("did-fail-load", (event: any) => reject(event)); + window.loadURL(url); + }); +} + +async function electronGetPageTitle(url: string): Promise { + const { remote } = electronPkg; + const { BrowserWindow } = remote; + + try { + const window = new BrowserWindow({ + width: 1000, + height: 600, + webPreferences: { + webSecurity: false, + nodeIntegration: true, + images: false, + }, + show: false, + }); + window.webContents.setAudioMuted(true); + + await load(window, url); + + try { + const title = window.webContents.getTitle(); + window.destroy(); + + if (notBlank(title)) { + return title; + } else { + return url; + } + } catch (ex) { + window.destroy(); + return url; + } + } catch (ex) { + console.error(ex); + return "Site Unreachable"; + } +} + +async function nonElectronGetPageTitle(url: string): Promise { + try { + const html = await request({ url }); + + const doc = new DOMParser().parseFromString(html, "text/html"); + const title = doc.querySelectorAll("title")[0]; + + if (title == null || blank(title?.innerText)) { + // If site is javascript based and has a no-title attribute when unloaded, use it. + var noTitle = title?.getAttr("no-title"); + if (notBlank(noTitle)) { + return noTitle; + } + + // Otherwise if the site has no title/requires javascript simply return Title Unknown + return url; + } + + return title.innerText; + } catch (ex) { + console.error(ex); + + return "Site Unreachable"; + } +} + +function getUrlFinalSegment(url: string): string { + try { + const segments = new URL(url).pathname.split('/'); + const last = segments.pop() || segments.pop(); // Handle potential trailing slash + return last; + } catch (_) { + return "File" + } +} + +async function tryGetFileType(url: string) { + try { + const response = await fetch(url, { method: "HEAD" }); + + // Ensure site returns an ok status code before scraping + if (!response.ok) { + return "Site Unreachable"; + } + + // Ensure site is an actual HTML page and not a pdf or 3 gigabyte video file. + let contentType = response.headers.get("content-type"); + if (!contentType.includes("text/html")) { + return getUrlFinalSegment(url); + } + return null; + } catch (err) { + return null; + } +} + +export default async function getPageTitle(url: string): Promise { + // If we're on Desktop use the Electron scraper + if (!(url.startsWith("http") || url.startsWith("https"))) { + url = "https://" + url; + } + + // Try to do a HEAD request to see if the site is reachable and if it's an HTML page + // If we error out due to CORS, we'll just try to scrape the page anyway. + let fileType = await tryGetFileType(url); + if (fileType) { + return fileType; + } + + if (electronPkg != null) { + return electronGetPageTitle(url); + } else { + return nonElectronGetPageTitle(url); + } +} diff --git a/main.ts b/main.ts index fbd8303..d2e98fe 100644 --- a/main.ts +++ b/main.ts @@ -2,6 +2,7 @@ import { CheckIf } from "checkif" import { EditorExtensions } from "editor-enhancements" import { Editor, Plugin } from "obsidian" import getPageTitle from "scraper" +import getElectronPageTitle from "electron-scraper" import { AutoLinkTitleSettingTab, AutoLinkTitleSettings, @@ -230,7 +231,12 @@ export default class AutoLinkTitle extends Plugin { async fetchUrlTitle(url: string): Promise { try { - const title = await getPageTitle(url); + let title = ""; + if (this.settings.useNewScraper) { + title = await getPageTitle(url); + } else { + title = await getElectronPageTitle(url); + } return title.replace(/(\r\n|\n|\r)/gm, "").trim(); } catch (error) { console.error(error) diff --git a/manifest.json b/manifest.json index 3b72821..cad00f3 100644 --- a/manifest.json +++ b/manifest.json @@ -1,7 +1,7 @@ { "id": "obsidian-auto-link-title", "name": "Auto Link Title", - "version": "1.5.0", + "version": "1.5.1", "minAppVersion": "0.12.17", "description": "This plugin automatically fetches the titles of links from the web", "author": "Matt Furden", diff --git a/package.json b/package.json index 8628a69..028a0ff 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "obsidian-auto-link-title", - "version": "1.5.0", + "version": "1.5.1", "description": "Automatically fetches the titles of links from the web and formats link when pasted", "main": "main.js", "scripts": { diff --git a/settings.ts b/settings.ts index d3f1838..832ed4e 100644 --- a/settings.ts +++ b/settings.ts @@ -11,6 +11,7 @@ export interface AutoLinkTitleSettings { enhanceDefaultPaste: boolean; websiteBlacklist: string; maximumTitleLength: number; + useNewScraper: boolean; } export const DEFAULT_SETTINGS: AutoLinkTitleSettings = { @@ -27,6 +28,7 @@ export const DEFAULT_SETTINGS: AutoLinkTitleSettings = { enhanceDefaultPaste: true, websiteBlacklist: "", maximumTitleLength: 0, + useNewScraper: false, }; export class AutoLinkTitleSettingTab extends PluginSettingTab { @@ -60,16 +62,16 @@ export class AutoLinkTitleSettingTab extends PluginSettingTab { new Setting(containerEl) .setName("Maximum title length") .setDesc( - "Set the maximum length of the title. Set to 0 to disable." + "Set the maximum length of the title. Set to 0 to disable." ) .addText((val) => - val - .setValue(this.plugin.settings.maximumTitleLength.toString(10)) - .onChange(async (value) => { - const titleLength = (Number(value)) - this.plugin.settings.maximumTitleLength = isNaN(titleLength) || titleLength < 0 ? 0 : titleLength; - await this.plugin.saveSettings(); - }) + val + .setValue(this.plugin.settings.maximumTitleLength.toString(10)) + .onChange(async (value) => { + const titleLength = (Number(value)) + this.plugin.settings.maximumTitleLength = isNaN(titleLength) || titleLength < 0 ? 0 : titleLength; + await this.plugin.saveSettings(); + }) ) new Setting(containerEl) @@ -101,5 +103,20 @@ export class AutoLinkTitleSettingTab extends PluginSettingTab { await this.plugin.saveSettings(); }) ); + + new Setting(containerEl) + .setName("Use New Scraper") + .setDesc( + "Use experimental new scraper, seems to work well on desktop but not mobile." + ) + .addToggle((val) => + val + .setValue(this.plugin.settings.useNewScraper) + .onChange(async (value) => { + console.log(value); + this.plugin.settings.useNewScraper = value; + await this.plugin.saveSettings(); + }) + ); } } diff --git a/versions.json b/versions.json index d0180b0..5238797 100644 --- a/versions.json +++ b/versions.json @@ -1,4 +1,5 @@ { + "1.5.1": "0.12.17", "1.5.0": "0.12.17", "1.4.1": "0.12.17", "1.4.0": "0.12.17",