From 4d6bfc5e0164d10433f4ec3eb0cbb6a7554106d4 Mon Sep 17 00:00:00 2001 From: Friedrich von Never Date: Sun, 21 Aug 2022 21:43:47 +0200 Subject: [PATCH] (#102) ContentProxy: finish working FileCache --- .../Emulsion.ContentProxy.fsproj | 2 + Emulsion.ContentProxy/FileCache.fs | 92 +++++++++++++++---- .../Emulsion.TestFramework.fsproj | 4 +- .../SimpleHttpClientFactory.fs | 7 ++ Emulsion.TestFramework/WebFileStorage.fs | 32 ++++++- Emulsion.Tests/ContentProxy/FileCacheTests.fs | 20 +++- Emulsion.sln.DotSettings | 1 + 7 files changed, 130 insertions(+), 28 deletions(-) create mode 100644 Emulsion.TestFramework/SimpleHttpClientFactory.fs diff --git a/Emulsion.ContentProxy/Emulsion.ContentProxy.fsproj b/Emulsion.ContentProxy/Emulsion.ContentProxy.fsproj index 8a528a74..f086ead5 100644 --- a/Emulsion.ContentProxy/Emulsion.ContentProxy.fsproj +++ b/Emulsion.ContentProxy/Emulsion.ContentProxy.fsproj @@ -17,7 +17,9 @@ + + diff --git a/Emulsion.ContentProxy/FileCache.fs b/Emulsion.ContentProxy/FileCache.fs index 6159dd8e..e74606b8 100644 --- a/Emulsion.ContentProxy/FileCache.fs +++ b/Emulsion.ContentProxy/FileCache.fs @@ -2,12 +2,15 @@ open System open System.IO +open System.Net.Http open System.Security.Cryptography open System.Text - open System.Threading -open Emulsion.Settings + open Serilog +open SimpleBase + +open Emulsion.Settings type DownloadRequest = { Uri: Uri @@ -15,40 +18,87 @@ type DownloadRequest = { Size: uint64 } +module Base58 = + /// Suggested by @ttldtor. + let M4N71KR = Base58(Base58Alphabet "123456789qwertyuiopasdfghjkzxcvbnmQWERTYUPASDFGHJKLZXCVBNM") + module FileCache = - let FileName(sha256: SHA256, cacheKey: string): string = + let EncodeFileName(sha256: SHA256, cacheKey: string): string = cacheKey |> Encoding.UTF8.GetBytes |> sha256.ComputeHash - |> Convert.ToBase64String + |> Base58.M4N71KR.Encode + + let DecodeFileNameToSha256Hash(fileName: string): byte[] = + (Base58.M4N71KR.Decode fileName).ToArray() -// TODO: Total cache limit type FileCache(logger: ILogger, settings: FileCacheSettings, + httpClientFactory: IHttpClientFactory, sha256: SHA256) = let getFilePath(cacheKey: string) = - Path.Combine(settings.Directory, FileCache.FileName(sha256, cacheKey)) + Path.Combine(settings.Directory, FileCache.EncodeFileName(sha256, cacheKey)) let getFromCache(cacheKey: string) = async { let path = getFilePath cacheKey return if File.Exists path then - Some(new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Delete)) + Some(new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read|||FileShare.Delete)) else None } - // TODO: Check total item size, too + let assertCacheValid() = async { + Directory.EnumerateFileSystemEntries settings.Directory + |> Seq.iter(fun entry -> + let entryName = Path.GetFileName entry + + if not <| File.Exists entry + then failwith $"Cache directory invalid: contains a subdirectory: \"{entryName}\"." + + let hash = FileCache.DecodeFileNameToSha256Hash entryName + if hash.Length <> sha256.HashSize / 8 + then failwith ( + $"Cache directory invalid: contains entry \"{entryName}\" which doesn't correspond to a " + + "base58-encoded SHA-256 hash." + ) + ) + } + let ensureFreeCache size = async { - if size > settings.FileSizeLimitBytes then + if size > settings.FileSizeLimitBytes || size > settings.TotalCacheSizeLimitBytes then return false else - return failwith "TODO: Sanity check that cache only has files" + do! assertCacheValid() + + let allEntries = + Directory.EnumerateFileSystemEntries settings.Directory + |> Seq.map FileInfo + + // Now, sort the entries from newest to oldest, and start deleting if required at a point when we understand + // that there are too much files: + let entriesByPriority = + allEntries + |> Seq.sortByDescending(fun info -> info.LastWriteTimeUtc) + |> Seq.toArray + + let mutable currentSize = 0UL + for info in entriesByPriority do + currentSize <- currentSize + Checked.uint64 info.Length + if currentSize + size > settings.TotalCacheSizeLimitBytes then + logger.Information("Deleting a cache item \"{FileName}\" ({Size} bytes)", info.Name, info.Length) + info.Delete() + + return true } - let download uri: Async = async { - return failwithf "TODO: Download the URI and return a stream" + let download(uri: Uri): Async = async { + let! ct = Async.CancellationToken + + use client = httpClientFactory.CreateClient() + let! response = Async.AwaitTask <| client.GetAsync(uri, ct) + return! Async.AwaitTask <| response.EnsureSuccessStatusCode().Content.ReadAsStreamAsync() } let downloadIntoCacheAndGet uri cacheKey: Async = async { @@ -57,21 +107,23 @@ type FileCache(logger: ILogger, let path = getFilePath cacheKey logger.Information("Saving {Uri} to path {Path}…", uri, path) - use cachedFile = new FileStream(path, FileMode.Open, FileAccess.Write, FileShare.None) - do! Async.AwaitTask(stream.CopyToAsync(cachedFile, ct)) - logger.Information("Download successful: \"{Uri}\" to \"{Path}\".") + do! async { // to limit the cachedFile scope + use cachedFile = new FileStream(path, FileMode.CreateNew, FileAccess.Write, FileShare.None) + do! Async.AwaitTask(stream.CopyToAsync(cachedFile, ct)) + logger.Information("Download successful: \"{Uri}\" to \"{Path}\".") + } let! file = getFromCache cacheKey return upcast Option.get file } let cancellation = new CancellationTokenSource() - let processRequest request: Async = async { + let processRequest request: Async = async { logger.Information("Cache lookup for content {Uri} (cache key {CacheKey})", request.Uri, request.CacheKey) match! getFromCache request.CacheKey with | Some content -> logger.Information("Cache hit for content {Uri} (cache key {CacheKey})", request.Uri, request.CacheKey) - return Some content + return content | None -> logger.Information("No cache hit for content {Uri} (cache key {CacheKey}), will download", request.Uri, request.CacheKey) let! shouldCache = ensureFreeCache request.Size @@ -79,11 +131,11 @@ type FileCache(logger: ILogger, logger.Information("Resource {Uri} (cache key {CacheKey}, {Size} bytes) will fit into cache, caching", request.Uri, request.CacheKey, request.Size) let! result = downloadIntoCacheAndGet request.Uri request.CacheKey logger.Information("Resource {Uri} (cache key {CacheKey}, {Size} bytes) downloaded", request.Uri, request.CacheKey, request.Size) - return Some result + return result else logger.Information("Resource {Uri} (cache key {CacheKey}) won't fit into cache, directly downloading", request.Uri, request.CacheKey) let! result = download request.Uri - return Some result + return result } let rec processLoop(processor: MailboxProcessor<_ * AsyncReplyChannel<_>>) = async { @@ -91,7 +143,7 @@ type FileCache(logger: ILogger, let! request, replyChannel = processor.Receive() try let! result = processRequest request - replyChannel.Reply result + replyChannel.Reply(Some result) with | ex -> logger.Error(ex, "Exception while processing the file download queue") diff --git a/Emulsion.TestFramework/Emulsion.TestFramework.fsproj b/Emulsion.TestFramework/Emulsion.TestFramework.fsproj index fcfeedab..8d086454 100644 --- a/Emulsion.TestFramework/Emulsion.TestFramework.fsproj +++ b/Emulsion.TestFramework/Emulsion.TestFramework.fsproj @@ -1,8 +1,9 @@ - + net6.0 true + Library @@ -13,6 +14,7 @@ + diff --git a/Emulsion.TestFramework/SimpleHttpClientFactory.fs b/Emulsion.TestFramework/SimpleHttpClientFactory.fs new file mode 100644 index 00000000..e024a00b --- /dev/null +++ b/Emulsion.TestFramework/SimpleHttpClientFactory.fs @@ -0,0 +1,7 @@ +namespace Emulsion.TestFramework + +open System.Net.Http + +type SimpleHttpClientFactory() = + interface IHttpClientFactory with + member this.CreateClient _ = new HttpClient() diff --git a/Emulsion.TestFramework/WebFileStorage.fs b/Emulsion.TestFramework/WebFileStorage.fs index b9ae0a3c..28e3e66a 100644 --- a/Emulsion.TestFramework/WebFileStorage.fs +++ b/Emulsion.TestFramework/WebFileStorage.fs @@ -1,14 +1,38 @@ namespace Emulsion.TestFramework open System +open System.Net +open System.Net.Sockets + +open Microsoft.AspNetCore.Builder +open Microsoft.AspNetCore.Http + +module private NetUtil = + let findFreePort() = + use socket = new Socket(SocketType.Stream, ProtocolType.Tcp) + socket.Bind(IPEndPoint(IPAddress.Loopback, 0)) + (socket.LocalEndPoint :?> IPEndPoint).Port type WebFileStorage(data: Map) = + let url = $"http://localhost:{NetUtil.findFreePort()}" + + let startWebApplication() = + let builder = WebApplication.CreateBuilder() + let app = builder.Build() + app.MapGet("/{entry}", Func<_, _>(fun (entry: string) -> task { + return Results.Bytes(data[entry]) + })) |> ignore + app, app.RunAsync url + + let app, task = startWebApplication() + member _.Link(entry: string): Uri = - failwith "todo" + Uri $"{url}/{entry}" member _.Content(entry: string): byte[] = - failwith "todo" + data[entry] interface IDisposable with - member this.Dispose(): unit = failwith "todo" - + member this.Dispose(): unit = + app.StopAsync().Wait() + task.Wait() diff --git a/Emulsion.Tests/ContentProxy/FileCacheTests.fs b/Emulsion.Tests/ContentProxy/FileCacheTests.fs index f0f22a4f..4ad4903e 100644 --- a/Emulsion.Tests/ContentProxy/FileCacheTests.fs +++ b/Emulsion.Tests/ContentProxy/FileCacheTests.fs @@ -32,7 +32,7 @@ type FileCacheTests(outputHelper: ITestOutputHelper) = TotalCacheSizeLimitBytes = totalLimitBytes } - new FileCache(xunitLogger outputHelper, settings, sha256) + new FileCache(xunitLogger outputHelper, settings, SimpleHttpClientFactory(), sha256) let assertCacheState(entries: (string * byte[]) seq) = let files = @@ -46,10 +46,20 @@ type FileCacheTests(outputHelper: ITestOutputHelper) = let entries = entries - |> Seq.map(fun (k, v) -> FileCache.FileName(sha256, k), v) + |> Seq.map(fun (k, v) -> FileCache.EncodeFileName(sha256, k), v) |> Map.ofSeq - Assert.Equal>(entries, files) + Assert.Equal>(entries.Keys, files.Keys) + for key in entries.Keys do + Assert.Equal>(entries[key], files[key]) + + [] + member _.``File cache should throw a validation exception if the cache directory contains directories``(): unit = + Assert.False true + + [] + member _.``File cache should throw a validation exception if the cache directory contains non-conventionally-named files``(): unit = + Assert.False true [] member _.``File should be cached``(): unit = @@ -84,6 +94,10 @@ type FileCacheTests(outputHelper: ITestOutputHelper) = |] } + [] + member _.``File cache cleanup works in order by file modification dates``(): unit = + Assert.False true + [] member _.``File should be read even after cleanup``(): unit = Assert.False true diff --git a/Emulsion.sln.DotSettings b/Emulsion.sln.DotSettings index a93531a6..b2e4be22 100644 --- a/Emulsion.sln.DotSettings +++ b/Emulsion.sln.DotSettings @@ -3,5 +3,6 @@ True True True + True True True \ No newline at end of file