diff --git a/.gitignore b/.gitignore index 8a30d25..1bd2c05 100644 --- a/.gitignore +++ b/.gitignore @@ -396,3 +396,4 @@ FodyWeavers.xsd # JetBrains Rider *.sln.iml +secrets.json diff --git a/RssBot.sln b/RssBot.sln new file mode 100644 index 0000000..b2b5133 --- /dev/null +++ b/RssBot.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.5.33516.290 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "RssBot", "RssBot\RssBot.csproj", "{B2F433C4-D3E7-4D95-A89A-D7B8377D7195}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {B2F433C4-D3E7-4D95-A89A-D7B8377D7195}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B2F433C4-D3E7-4D95-A89A-D7B8377D7195}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B2F433C4-D3E7-4D95-A89A-D7B8377D7195}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B2F433C4-D3E7-4D95-A89A-D7B8377D7195}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {73B4F06F-AEBC-4FD7-86E2-A303CFE1FCB2} + EndGlobalSection +EndGlobal diff --git a/RssBot/BotWork.cs b/RssBot/BotWork.cs new file mode 100644 index 0000000..756a34a --- /dev/null +++ b/RssBot/BotWork.cs @@ -0,0 +1,39 @@ +using Microsoft.Extensions.Logging; + +namespace RssBot +{ + public class BotWork + { + private readonly ILogger _logger; + private readonly Rss _rss; + private readonly Toot _toot; + + public BotWork(ILogger logger, Rss rss, Toot toot) + { + _logger = logger; + _rss = rss; + _toot = toot; + } + + public async Task RetrieveAndSendToots() + { + var newFeedItems = await _rss.ReadFeed(); + foreach (var botItems in newFeedItems) + { + foreach (var item in botItems.Value) + { + try + { + await _toot.SendToot(botItems.Key, item); + } + catch (Exception ex) + { + _logger.LogError(ex, "failed sending toot for {key}, {item}", botItems.Key, item); + throw; + } + } + } + _logger.LogInformation("Done"); + } + } +} \ No newline at end of file diff --git a/RssBot/Config.cs b/RssBot/Config.cs new file mode 100644 index 0000000..f9409e2 --- /dev/null +++ b/RssBot/Config.cs @@ -0,0 +1,33 @@ +namespace RssBot +{ + public class Config + { + public bool PrivateOnly { get; set; } // Don'T toot public + public List Feeds { get; set; } + public List TagReplacements { get; set; } + } + + public class TagReplacement + { + public string From { get; set; } + public string To { get; set; } + } + + public class FeedConfig + { + public string Url { get; set; } + public List Bots { get; set; } + } + + public class BotConfig + { + public string Id { get; set; } + public string UrlFilter { get; set; } + public string? UrlExclude { get; set; } + public string? TypeFilter { get; set; } + public bool ShowImage { get; set; } + public bool ShowTags { get; set; } + public string? IgnoreTags { get; set; } + public string? AdditionalTags { get; set; } + } +} \ No newline at end of file diff --git a/RssBot/Helpers.cs b/RssBot/Helpers.cs new file mode 100644 index 0000000..eb1d1e8 --- /dev/null +++ b/RssBot/Helpers.cs @@ -0,0 +1,55 @@ +using CodeHollow.FeedReader; + +using RssBot.RssBot; + +using System.Xml.Linq; + +namespace RssBot +{ + public static class Helpers + { + public static RssItem ToRssItem(this FeedItem feedItem) + { + var bestImage = GetBestImage(feedItem.SpecificItem.Element); + var tags = GetTags(feedItem.SpecificItem.Element); + + return new RssItem + { + Title = feedItem.Title, + Description = feedItem.Description, + Url = feedItem.Link, + ImageUrl = bestImage?.Key, + ImageDescription = bestImage?.Value, + Tags = tags + }; + } + + public static string? GetTags(XElement feedElement) + { + return feedElement.Descendants().Where(q => q.Name.LocalName == "keywords").FirstOrDefault()?.FirstNode?.ToString(); + } + + public static KeyValuePair? GetBestImage(XElement feedElement) + { + KeyValuePair? bestImage = null; + var images = feedElement.Descendants().Where(q => q.Name.LocalName == "image").ToList(); + if (images.Count > 0) + { + int maxWidth = 0; + + foreach (var image in images) + { + var url = image.Elements().FirstOrDefault(q => q.Name.LocalName == "data")?.FirstNode?.ToString(); + var width = image.Elements().FirstOrDefault(q => q.Name.LocalName == "width")?.FirstNode?.ToString(); + var alt = image.Elements().FirstOrDefault(q => q.Name.LocalName == "alt")?.FirstNode?.ToString(); + if (alt != null && url != null && width != null && int.TryParse(width, out int intWith) && intWith > maxWidth) + { + maxWidth = intWith; + bestImage = new KeyValuePair(url, alt); + } + } + } + return bestImage; + } + } +} \ No newline at end of file diff --git a/RssBot/Program.cs b/RssBot/Program.cs new file mode 100644 index 0000000..421c70b --- /dev/null +++ b/RssBot/Program.cs @@ -0,0 +1,29 @@ +// See https://aka.ms/new-console-template for more information +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; + +using RssBot; + +Console.WriteLine("Hello, World!"); +var services = new ServiceCollection(); + +services.AddLogging(logging => +{ + logging.ClearProviders(); + logging.AddConsole(); + logging.SetMinimumLevel(LogLevel.Debug); + var logFile = "rssmastodon.log"; + logging.AddFile(logFile, append: true); +}); +services.AddScoped(); +services.AddScoped(); +services.AddScoped(); + +var provider = services.BuildServiceProvider(); +var botwork = provider.GetRequiredService(); +await botwork.RetrieveAndSendToots(); + + + + + diff --git a/RssBot/Rss.cs b/RssBot/Rss.cs new file mode 100644 index 0000000..53b081b --- /dev/null +++ b/RssBot/Rss.cs @@ -0,0 +1,114 @@ +using CodeHollow.FeedReader; + +using Microsoft.Extensions.Logging; + +using Newtonsoft.Json; + +using RssBot.RssBot; + +namespace RssBot +{ + public class Rss + { + private readonly ILogger _logger; + private Config _config; + + public Rss(ILogger logger) + { + _logger = logger; + var config = File.ReadAllText("./config.json"); + _config = JsonConvert.DeserializeObject(config) ?? throw new FileNotFoundException("cannot read config"); + } + + public async Task>> ReadFeed() + { + var unpublishedItems = new Dictionary>(); + foreach (var bots in _config.Feeds.Select(q => q.Bots)) + { + foreach (var bot in bots) + { + unpublishedItems.Add(bot, new List()); + } + } + + foreach (var feedConfig in _config.Feeds) + { + using (var db = new LiteDB.LiteDatabase("state.db")) + { + var states = db.GetCollection(); + var match = states.FindById(feedConfig.Url); + + var feed = await FeedReader.ReadAsync(feedConfig.Url); + if (feed.Type != FeedType.Rss_1_0) + { + _logger.LogError("Unexpected RSS-Type. Expecting 1.0, received '{type}'", feed.Type); + return unpublishedItems; + } + + if (match == null) + { + // first start, mark all as already sent + match = new State { Id = feedConfig.Url, LastFeed = DateTime.Now }; + + foreach (var item in feed.Items) + { + match.PostedItems.Add(new PostedItem { Id = item.Id, ReadDate = DateTime.Now }); + } + } + else + { + // cleanup old stuff + match.PostedItems.RemoveAll(q => q.ReadDate < DateTime.Now.AddDays(-120)); + } + var newItems = feed.Items.Where(q => !match.PostedItems.Any(m => m.Id == q.Id)); + + _logger.LogInformation("Tooting '{count}' feeds since '{lastfeed}'", newItems.Count(), match.LastFeed); + foreach (var item in newItems) + { + try + { + var x = item.SpecificItem.Element.Descendants().ToList(); + var rssItem = (item.ToRssItem()); + var bot = GetBotForRssItem(feedConfig, rssItem); + match.PostedItems.Add(new PostedItem { Id = item.Id, ReadDate = DateTime.Now }); + if (bot == null) continue; + unpublishedItems[bot].Add(rssItem); + } + catch (Exception ex) + { + _logger.LogError(ex, "Cannot toot item {item}", item); + } + } + + match.LastFeed = DateTime.Now; + states.Upsert(match); + } + } + return unpublishedItems; + } + + private static bool UrlHasExcludes(string url, string? excludes) + { + if (excludes == null) return false; + var excludeList = excludes.Split(" "); + foreach (var exclude in excludeList) + { + if (url.Contains(exclude, StringComparison.CurrentCultureIgnoreCase)) return true; + } + return false; + } + + private static BotConfig? GetBotForRssItem(FeedConfig config, RssItem item) + { + foreach (var bot in config.Bots) + { + if (item.Url.Contains(bot.UrlFilter, StringComparison.InvariantCultureIgnoreCase)) + { + if (UrlHasExcludes(item.Url, bot.UrlExclude)) continue; + return bot; + } + } + return null; + } + } +} \ No newline at end of file diff --git a/RssBot/RssBot.csproj b/RssBot/RssBot.csproj new file mode 100644 index 0000000..460392b --- /dev/null +++ b/RssBot/RssBot.csproj @@ -0,0 +1,34 @@ + + + + Exe + net6.0 + enable + enable + + + + + + + + + + Always + + + Always + + + + + + + + + + + + + + diff --git a/RssBot/RssBot.sln b/RssBot/RssBot.sln new file mode 100644 index 0000000..b5f7d7e --- /dev/null +++ b/RssBot/RssBot.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.5.33516.290 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "RssBot", "RssBot\RssBot.csproj", "{B2F433C4-D3E7-4D95-A89A-D7B8377D7195}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {B2F433C4-D3E7-4D95-A89A-D7B8377D7195}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B2F433C4-D3E7-4D95-A89A-D7B8377D7195}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B2F433C4-D3E7-4D95-A89A-D7B8377D7195}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B2F433C4-D3E7-4D95-A89A-D7B8377D7195}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {73B4F06F-AEBC-4FD7-86E2-A303CFE1FCB2} + EndGlobalSection +EndGlobal diff --git a/RssBot/RssItem.cs b/RssBot/RssItem.cs new file mode 100644 index 0000000..7dfff66 --- /dev/null +++ b/RssBot/RssItem.cs @@ -0,0 +1,12 @@ +namespace RssBot.RssBot +{ + public class RssItem + { + public string Title { get; set; } + public string Description { get; set; } + public string Url { get; set; } + public string? ImageUrl { get; set; } + public string? ImageDescription { get; set; } + public string? Tags { get; set; } + } +} \ No newline at end of file diff --git a/RssBot/Secrets.cs b/RssBot/Secrets.cs new file mode 100644 index 0000000..a6693cb --- /dev/null +++ b/RssBot/Secrets.cs @@ -0,0 +1,15 @@ +namespace RssBot +{ + public class Secrets + { + public string Instance { get; set; } = "unknown"; + public List Bots { get; set; } + } + + public class BotSecret + { + public string Id { get; set; } + public string Secret { get; set; } + public bool Disabled { get; set; } = false; + } +} \ No newline at end of file diff --git a/RssBot/State.cs b/RssBot/State.cs new file mode 100644 index 0000000..d1e98d2 --- /dev/null +++ b/RssBot/State.cs @@ -0,0 +1,16 @@ +namespace RssBot +{ + public class State + { + public DateTime? LastFeed { get; set; } + public string Id { get; set; } + + public List PostedItems { get; set; } = new List(); + } + + public class PostedItem + { + public DateTime? ReadDate { get; set; } + public string Id { get; set; } + } +} \ No newline at end of file diff --git a/RssBot/Toot.cs b/RssBot/Toot.cs new file mode 100644 index 0000000..f56c800 --- /dev/null +++ b/RssBot/Toot.cs @@ -0,0 +1,120 @@ +using Mastonet; +using Mastonet.Entities; + +using Microsoft.Extensions.Logging; + +using Newtonsoft.Json; + +using RssBot.RssBot; + +using System.Text.RegularExpressions; + +namespace RssBot +{ + public class Toot + { + private readonly Secrets _secrets; + private Config _config; + private readonly ILogger _logger; + + public Toot(ILogger logger) + { + var secrets = File.ReadAllText("./secrets.json"); + _secrets = JsonConvert.DeserializeObject(secrets)!; + var config = File.ReadAllText("./config.json"); + _config = JsonConvert.DeserializeObject(config)!; + _logger = logger; + } + + private async Task UploadMedia(MastodonClient client, Stream fileStream, string filename, string description) + { + string attachmentId = null; + try + { + _logger.LogDebug("Uploading Image {filename}", filename); + if (fileStream == null) return null; + var attachment = await client.UploadMedia(fileStream, filename, description); + attachmentId = attachment.Id; + } + catch (Exception ex) + { + return null; + } + return attachmentId; + } + + public async Task SendToot(BotConfig botConfig, RssItem rssItem) + { + var allTags = botConfig.ShowTags ? GetTagString(botConfig, rssItem) : string.Empty; + + string content = $"{rssItem.Title}\n\n{rssItem.Description}\n\n{rssItem.Url}\n\n{allTags}"; + Stream? imageStream = null; + if (rssItem.ImageUrl != null) + { + imageStream = await DownloadImage(rssItem.ImageUrl); + } + return await SendToot(botConfig.Id, content, null, imageStream, rssItem.ImageDescription ?? "Vorschaubild"); + } + + private string GetTagString(BotConfig botConfig, RssItem rssItem) + { + var allTags = rssItem.Tags ?? string.Empty; + foreach (var replacement in _config.TagReplacements) allTags = allTags.Replace(replacement.From, replacement.To); + List tagList; + tagList = allTags.Split(allTags.Contains(",") ? "," : " ", StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries).ToList(); + + if (botConfig.IgnoreTags != null) + { + var ignoreTags = botConfig.IgnoreTags.Split(" ", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList(); + tagList = tagList.Where(q => !ignoreTags.Contains(q, StringComparer.InvariantCultureIgnoreCase)).ToList(); + } + if (botConfig.AdditionalTags != null) + { + var additionalTags = botConfig.AdditionalTags.Split(" ", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList(); + tagList.AddRange(additionalTags); + } + + tagList.ForEach(q => q = Regex.Replace(q, "[^A-Za-z0-9]", "")); + + return string.Join(" ", tagList.Distinct().Select(q => "#" + q)); + } + + private async Task DownloadImage(string url) + { + HttpClient client = new(); + var response = await client.GetAsync(new Uri(url)); + if (!response.IsSuccessStatusCode) return null; // Dont throw error if just image is missing + return await response.Content.ReadAsStreamAsync(); + } + + public async Task SendToot(string botId, string content, string? replyTo, Stream? media, string altTag) + { + _logger.LogDebug("Sending Toot"); + var client = GetServiceClient(botId); + if (client == null) + { + _logger.LogWarning("Bot not found or disabled"); + return null; + } + string? attachmentId = null; + if (media != null) attachmentId = await UploadMedia(client, media, "preview.png", altTag); + + if (attachmentId != null) + { + return await client.PublishStatus(content, _config.PrivateOnly ? Visibility.Private : Visibility.Public, replyTo, mediaIds: new List { attachmentId }); + } + else + { + return await client.PublishStatus(content, _config.PrivateOnly ? Visibility.Private : Visibility.Public, replyTo); + } + } + + private MastodonClient? GetServiceClient(string botId) + { + var bot = _secrets.Bots.FirstOrDefault(q => q.Id == botId && !q.Disabled); + if (bot == null) return null; + + return new MastodonClient(_secrets.Instance, bot.Secret); + } + } +} \ No newline at end of file diff --git a/RssBot/config.json b/RssBot/config.json index 6c4715f..10ac25e 100644 --- a/RssBot/config.json +++ b/RssBot/config.json @@ -1,36 +1,94 @@ { + "privateonly": false, "feeds": [ { - "url":"https://", - "intervalMinutes":"15", + "url": "http://www.ndr.de/nachrichten/hamburg/index-rss.xml", + "intervalMinutes": "15", "bots": [ { - "id":"News.Hamburg", - "urlfilter":"/nachrichten/hamburg", - "typefilter":"Article", - "showimage":true, - "showtags":true, - "ignoretags":null + "id": "News.Hamburg", + "urlfilter": "/nachrichten/hamburg", + "urlexclude": "podcast topvideos", + "typefilter": "Article", + "showimage": true, + "showtags": true, + "ignoretags": null, + "additionaltags": "HamburgNews" }, { - "id":"HamburgJournal", - "urlfilter":"/fernsehen/sendungen/hamburg_journal", - "typefilter":"Article", - "showimage":true, - "showtags":true, - "ignoretags":"hamburg" + "id": "Sport.Hamburg", + "urlfilter": "/sport/", + "urlexclude": "podcast topvideos", + "showimage": true, + "showtags": true, + "ignoretags": null, + "additionaltags": "HamburgSport NeusVomSpocht" }, - , { - "id":"Sendungen.hamburg", - "urlfilter":"/fernsehen/sendungen/", - "urlexclude":"hamburg_journal", - "typefilter":null, - "showimage":true, - "showtags":true, - "ignoretags":"hamburg" + { + "id": "Hamburg.HamburgJournal", + "urlfilter": "/fernsehen/sendungen/hamburg_journal", + "urlexclude": "podcast topvideos", + "typefilter": "Video", + "showimage": true, + "showtags": true, + "ignoretags": "hamburg" }, + { + "id": "Sendungen.hamburg", + "urlfilter": "/fernsehen/sendungen/", + "urlexclude": "hamburg_journal podcast topvideos", + "typefilter": "Video", + "showimage": true, + "showtags": true, + "ignoretags": "hamburg" + } ] } - + ], + "tagreplacements": [ + { + "from": "hamburger sv", + "to": "HSV" + }, + { + "from": "FC St. Pauli", + "to": "FCSP" + }, + { + "from": "FC St.Pauli", + "to": "FCSP" + }, + { + "from": "FC Sankt Pauli", + "to": "FCSP" + }, + { + "from": "Sankt Pauli", + "to": "SanktPauli" + }, + { + "from": "1. Bundesliga", + "to": "BuLi" + }, + { + "from": "2. Bundesliga", + "to": "BuLi2" + }, + { + "from": "Norddeutscher Rundfunk", + "to": "NDR" + }, + { + "from": "2. Bundesliga", + "to": "BuLi2" + }, + { + "from": "2. Bundesliga", + "to": "BuLi2" + }, + { + "from": "2. Bundesliga", + "to": "BuLi2" + } ] -} +} \ No newline at end of file