diff --git a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs index d1fe54c17..c7d6f3686 100644 --- a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs +++ b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs @@ -151,6 +151,21 @@ private static IMachineBuilder AddHangfireBuildJobRunner(this IMachineBuilder bu return builder; } + private static MongoStorageOptions GetMongoStorageOptions() + { + var mongoStorageOptions = new MongoStorageOptions + { + MigrationOptions = new MongoMigrationOptions + { + MigrationStrategy = new MigrateMongoMigrationStrategy(), + BackupStrategy = new CollectionMongoBackupStrategy() + }, + CheckConnection = true, + CheckQueuedJobsStrategy = CheckQueuedJobsStrategy.TailNotificationsCollection, + }; + return mongoStorageOptions; + } + public static IMachineBuilder AddMongoHangfireJobClient( this IMachineBuilder builder, string? connectionString = null @@ -164,19 +179,7 @@ public static IMachineBuilder AddMongoHangfireJobClient( c.SetDataCompatibilityLevel(CompatibilityLevel.Version_170) .UseSimpleAssemblyNameTypeSerializer() .UseRecommendedSerializerSettings() - .UseMongoStorage( - connectionString, - new MongoStorageOptions - { - MigrationOptions = new MongoMigrationOptions - { - MigrationStrategy = new MigrateMongoMigrationStrategy(), - BackupStrategy = new CollectionMongoBackupStrategy() - }, - CheckConnection = true, - CheckQueuedJobsStrategy = CheckQueuedJobsStrategy.TailNotificationsCollection, - } - ) + .UseMongoStorage(connectionString, GetMongoStorageOptions()) .UseFilter(new AutomaticRetryAttribute { Attempts = 0 }) ); builder.Services.AddHealthChecks().AddCheck(name: "Hangfire"); @@ -208,7 +211,7 @@ public static IMachineBuilder AddHangfireJobServer( builder.Services.AddHangfireServer(o => { - o.Queues = queues.ToArray(); + o.Queues = [.. queues]; }); return builder; } @@ -402,6 +405,20 @@ public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder) return builder; } + public static IMachineBuilder AddModelCleanupJob(this IMachineBuilder builder, string? connectionString = null) + { + connectionString ??= builder.Configuration?.GetConnectionString("Hangfire"); + if (connectionString is null) + throw new InvalidOperationException("Hangfire connection string is required"); + + var mongoClientSettings = MongoClientSettings.FromUrl(new MongoUrl(connectionString)); + JobStorage.Current = new MongoStorage(mongoClientSettings, "recurring_job", GetMongoStorageOptions()); + builder.Services.AddSingleton(); + RecurringJobOptions options = new() { TimeZone = TimeZoneInfo.Utc }; + RecurringJob.AddOrUpdate("Cleanup-job", x => x.RunAsync(), Cron.Daily, options); + return builder; + } + private static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, BuildJobOptions options) { builder.Services.AddScoped(); diff --git a/src/SIL.Machine.AspNetCore/Models/TranslationEngine.cs b/src/SIL.Machine.AspNetCore/Models/TranslationEngine.cs index ffc639fc7..53d478672 100644 --- a/src/SIL.Machine.AspNetCore/Models/TranslationEngine.cs +++ b/src/SIL.Machine.AspNetCore/Models/TranslationEngine.cs @@ -7,6 +7,7 @@ public class TranslationEngine : IEntity public string EngineId { get; set; } = default!; public string SourceLanguage { get; set; } = default!; public string TargetLanguage { get; set; } = default!; + public bool IsModelRetrievable { get; set; } = false; public int BuildRevision { get; set; } public Build? CurrentBuild { get; set; } } diff --git a/src/SIL.Machine.AspNetCore/Services/CleanupJob.cs b/src/SIL.Machine.AspNetCore/Services/CleanupJob.cs index fe33ce2e7..a26aadcaa 100644 --- a/src/SIL.Machine.AspNetCore/Services/CleanupJob.cs +++ b/src/SIL.Machine.AspNetCore/Services/CleanupJob.cs @@ -6,11 +6,9 @@ public class CleanupOldModelsJob(ISharedFileService sharedFileService) : ICleanu private List _filesPreviouslyMarkedForDeletion = []; private readonly List _filesNewlyMarkedForDeletion = []; - private static readonly string MODEL_DIRECTORY = "models/"; - public async Task RunAsync() { - var files = await SharedFileService.ListFilesAsync(MODEL_DIRECTORY); + var files = await SharedFileService.ListFilesAsync(ISharedFileService.ModelDirectory); // split name by underscore into engineID and buildRevision Dictionary modelsByEngineId = []; foreach (string file in files) diff --git a/src/SIL.Machine.AspNetCore/Services/HangfireHealthCheck.cs b/src/SIL.Machine.AspNetCore/Services/HangfireHealthCheck.cs index 73bffff36..1d011c578 100644 --- a/src/SIL.Machine.AspNetCore/Services/HangfireHealthCheck.cs +++ b/src/SIL.Machine.AspNetCore/Services/HangfireHealthCheck.cs @@ -1,15 +1,9 @@ namespace SIL.Machine.AspNetCore.Services; -public class HangfireHealthCheck : IHealthCheck +public class HangfireHealthCheck(JobStorage jobStorage, IOptions options) : IHealthCheck { - private readonly JobStorage _jobStorage; - private readonly IOptions _options; - - public HangfireHealthCheck(JobStorage jobStorage, IOptions options) - { - _jobStorage = jobStorage; - _options = options; - } + private readonly JobStorage _jobStorage = jobStorage; + private readonly IOptions _options = options; public Task CheckHealthAsync( HealthCheckContext context, diff --git a/src/SIL.Machine.AspNetCore/Services/IFileStorage.cs b/src/SIL.Machine.AspNetCore/Services/IFileStorage.cs index 2a25c5018..5e5c44c01 100644 --- a/src/SIL.Machine.AspNetCore/Services/IFileStorage.cs +++ b/src/SIL.Machine.AspNetCore/Services/IFileStorage.cs @@ -14,7 +14,7 @@ Task> ListFilesAsync( Task OpenWriteAsync(string path, CancellationToken cancellationToken = default); - Task GetPresignedUrlAsync(string path, CancellationToken cancellationToken = default); + Task GetPresignedUrlAsync(string path, int minutesToExpire, CancellationToken cancellationToken = default); Task DeleteAsync(string path, bool recurse = false, CancellationToken cancellationToken = default); } diff --git a/src/SIL.Machine.AspNetCore/Services/ISharedFileService.cs b/src/SIL.Machine.AspNetCore/Services/ISharedFileService.cs index 650a7a8af..144c6883f 100644 --- a/src/SIL.Machine.AspNetCore/Services/ISharedFileService.cs +++ b/src/SIL.Machine.AspNetCore/Services/ISharedFileService.cs @@ -2,11 +2,13 @@ public interface ISharedFileService { + public const string ModelDirectory = "models/"; + Uri GetBaseUri(); Uri GetResolvedUri(string path); - Task GetPresignedUrlAsync(string path); + Task GetPresignedUrlAsync(string path, int minutesToExpire); Task> ListFilesAsync( string path, diff --git a/src/SIL.Machine.AspNetCore/Services/ITranslationEngineService.cs b/src/SIL.Machine.AspNetCore/Services/ITranslationEngineService.cs index ae6950414..9efc860da 100644 --- a/src/SIL.Machine.AspNetCore/Services/ITranslationEngineService.cs +++ b/src/SIL.Machine.AspNetCore/Services/ITranslationEngineService.cs @@ -9,6 +9,7 @@ Task CreateAsync( string? engineName, string sourceLanguage, string targetLanguage, + bool isModelRetrievable = false, CancellationToken cancellationToken = default ); Task DeleteAsync(string engineId, CancellationToken cancellationToken = default); diff --git a/src/SIL.Machine.AspNetCore/Services/InMemoryStorage.cs b/src/SIL.Machine.AspNetCore/Services/InMemoryStorage.cs index 92aeec30a..b86613672 100644 --- a/src/SIL.Machine.AspNetCore/Services/InMemoryStorage.cs +++ b/src/SIL.Machine.AspNetCore/Services/InMemoryStorage.cs @@ -96,7 +96,11 @@ public Task> ListFilesAsync( ); } - public Task GetPresignedUrlAsync(string path, CancellationToken cancellationToken = default) + public Task GetPresignedUrlAsync( + string path, + int minutesToExpire, + CancellationToken cancellationToken = default + ) { return Task.FromResult(path); } diff --git a/src/SIL.Machine.AspNetCore/Services/LocalStorage.cs b/src/SIL.Machine.AspNetCore/Services/LocalStorage.cs index 392659879..b79ef0976 100644 --- a/src/SIL.Machine.AspNetCore/Services/LocalStorage.cs +++ b/src/SIL.Machine.AspNetCore/Services/LocalStorage.cs @@ -36,7 +36,11 @@ public Task> ListFilesAsync( ); } - public Task GetPresignedUrlAsync(string path, CancellationToken cancellationToken = default) + public Task GetPresignedUrlAsync( + string path, + int minutesToExpire, + CancellationToken cancellationToken = default + ) { return Task.FromResult(path); } diff --git a/src/SIL.Machine.AspNetCore/Services/NmtClearMLBuildJobFactory.cs b/src/SIL.Machine.AspNetCore/Services/NmtClearMLBuildJobFactory.cs index 927310e72..28ac322fe 100644 --- a/src/SIL.Machine.AspNetCore/Services/NmtClearMLBuildJobFactory.cs +++ b/src/SIL.Machine.AspNetCore/Services/NmtClearMLBuildJobFactory.cs @@ -52,6 +52,7 @@ public async Task CreateJobScriptAsync( + $" 'shared_file_uri': '{baseUri}',\n" + $" 'shared_file_folder': '{folder}',\n" + (buildOptions is not null ? $" 'build_options': '''{buildOptions}''',\n" : "") + + (engine.IsModelRetrievable ? $" 'save_model': '{engineId}_{engine.BuildRevision + 1}',\n" : "") + $" 'clearml': True\n" + "}\n" + "run(args)\n"; diff --git a/src/SIL.Machine.AspNetCore/Services/NmtEngineService.cs b/src/SIL.Machine.AspNetCore/Services/NmtEngineService.cs index a186f7d2f..17014b019 100644 --- a/src/SIL.Machine.AspNetCore/Services/NmtEngineService.cs +++ b/src/SIL.Machine.AspNetCore/Services/NmtEngineService.cs @@ -29,11 +29,14 @@ ISharedFileService sharedFileService public TranslationEngineType Type => TranslationEngineType.Nmt; + private const int MinutesToExpire = 60; + public async Task CreateAsync( string engineId, string? engineName, string sourceLanguage, string targetLanguage, + bool isModelRetrievable = false, CancellationToken cancellationToken = default ) { @@ -43,12 +46,13 @@ await _engines.InsertAsync( { EngineId = engineId, SourceLanguage = sourceLanguage, - TargetLanguage = targetLanguage + TargetLanguage = targetLanguage, + IsModelRetrievable = isModelRetrievable }, cancellationToken ); await _buildJobService.CreateEngineAsync( - new[] { BuildJobType.Cpu, BuildJobType.Gpu }, + [BuildJobType.Cpu, BuildJobType.Gpu], engineId, engineName, cancellationToken @@ -115,16 +119,35 @@ public async Task GetModelPresignedUrlAsync( CancellationToken cancellationToken = default ) { - var files = await _sharedFileService.ListFilesAsync($"models/", cancellationToken: cancellationToken); - // find latest file that start with the engineId - var latestFile = files.Where(f => f.StartsWith(engineId)).OrderByDescending(f => f).FirstOrDefault(); - if (latestFile is null) - throw new FileNotFoundException("No built, saved model found for engine.", engineId); - string buildRevision = latestFile.Split('_').Last(); + TranslationEngine engine = await GetEngineAsync(engineId, cancellationToken); + if (!engine.IsModelRetrievable) + throw new InvalidOperationException( + "The model cannot be downloaded. " + + "To enable downloading the model, recreate the engine with IsModelRetrievable property to true." + ); + if (engine.BuildRevision == 0) + throw new InvalidOperationException("The engine has not been built yet."); + string filename = $"{engineId}_{engine.BuildRevision}.tar.gz"; + bool fileExists = await _sharedFileService.ExistsAsync( + ISharedFileService.ModelDirectory + filename, + cancellationToken + ); + if (!fileExists) + throw new FileNotFoundException( + $"The model should exist to be downloaded but is not there for BuildRevision {engine.BuildRevision}." + ); var modelInfo = new ModelPresignedUrl { - PresignedUrl = (await _sharedFileService.GetPresignedUrlAsync($"models/{latestFile}")).ToString(), - BuildRevision = buildRevision + PresignedUrl = ( + await _sharedFileService.GetPresignedUrlAsync( + ISharedFileService.ModelDirectory + filename, + MinutesToExpire + ) + ).ToString(), + BuildRevision = engine.BuildRevision, + UrlExpirationTime = DateTime + .UtcNow.AddMinutes(MinutesToExpire) + .ToString("yyyy-MM-ddTHH\\:mm\\:ss.fffffffzzz", CultureInfo.InvariantCulture) }; return modelInfo; } @@ -178,4 +201,12 @@ private async Task CancelBuildJobAsync(string engineId, CancellationToken cancel if (buildId is not null && jobState is BuildJobState.None) await _platformService.BuildCanceledAsync(buildId, CancellationToken.None); } + + private async Task GetEngineAsync(string engineId, CancellationToken cancellationToken) + { + TranslationEngine? engine = await _engines.GetAsync(e => e.EngineId == engineId, cancellationToken); + if (engine is null) + throw new InvalidOperationException($"The engine {engineId} does not exist."); + return engine; + } } diff --git a/src/SIL.Machine.AspNetCore/Services/NmtTrainBuildJob.cs b/src/SIL.Machine.AspNetCore/Services/NmtTrainBuildJob.cs index ad6d60612..925cd5061 100644 --- a/src/SIL.Machine.AspNetCore/Services/NmtTrainBuildJob.cs +++ b/src/SIL.Machine.AspNetCore/Services/NmtTrainBuildJob.cs @@ -68,6 +68,11 @@ await PipInstallModuleAsync( + $" 'trg_lang': '{ConvertLanguageTag(engine.TargetLanguage)}',\n" + $" 'shared_file_uri': '{_sharedFileService.GetBaseUri()}',\n" + (buildOptions is not null ? $" 'build_options': '''{buildOptions}''',\n" : "") + + ( + engine.IsModelRetrievable + ? $" 'save_model': '{engine.Id}_{engine.BuildRevision + 1}',\n" + : "" + ) + $" 'clearml': False\n" + "}\n" + "run(args)\n" diff --git a/src/SIL.Machine.AspNetCore/Services/S3FileStorage.cs b/src/SIL.Machine.AspNetCore/Services/S3FileStorage.cs index 93f356f85..e27d1477b 100644 --- a/src/SIL.Machine.AspNetCore/Services/S3FileStorage.cs +++ b/src/SIL.Machine.AspNetCore/Services/S3FileStorage.cs @@ -65,7 +65,11 @@ public async Task> ListFilesAsync( return response.S3Objects.Select(s3Obj => s3Obj.Key[_basePath.Length..]).ToList(); } - public Task GetPresignedUrlAsync(string path, CancellationToken cancellationToken = default) + public Task GetPresignedUrlAsync( + string path, + int minutesToExpire, + CancellationToken cancellationToken = default + ) { return Task.FromResult( _client.GetPreSignedURL( @@ -73,7 +77,7 @@ public Task GetPresignedUrlAsync(string path, CancellationToken cancella { BucketName = _bucketName, Key = _basePath + Normalize(path), - Expires = DateTime.UtcNow.AddMinutes(60) + Expires = DateTime.UtcNow.AddMinutes(minutesToExpire) } ) ); diff --git a/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs b/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs index 63b168e67..43a8972a6 100644 --- a/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs +++ b/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs @@ -23,6 +23,7 @@ await engineService.CreateAsync( request.HasEngineName ? request.EngineName : null, request.SourceLanguage, request.TargetLanguage, + request.IsModelRetrievable, context.CancellationToken ); return Empty; @@ -132,7 +133,8 @@ ServerCallContext context return new GetModelPresignedUrlResponse { PresignedUrl = modelPresignedUrl.PresignedUrl, - BuildRevision = modelPresignedUrl.BuildRevision + BuildRevision = modelPresignedUrl.BuildRevision, + UrlExpirationTime = modelPresignedUrl.UrlExpirationTime }; } diff --git a/src/SIL.Machine.AspNetCore/Services/SharedFileService.cs b/src/SIL.Machine.AspNetCore/Services/SharedFileService.cs index 04d11377e..cbdb60836 100644 --- a/src/SIL.Machine.AspNetCore/Services/SharedFileService.cs +++ b/src/SIL.Machine.AspNetCore/Services/SharedFileService.cs @@ -55,12 +55,12 @@ public Uri GetResolvedUri(string path) return new Uri(_baseUri, path); } - public async Task GetPresignedUrlAsync(string path) + public async Task GetPresignedUrlAsync(string path, int minutesToExpire) { string presignedUrl = path; if (_baseUri is not null) if (_baseUri.Scheme == "s3") - presignedUrl = await _fileStorage.GetPresignedUrlAsync(path); + presignedUrl = await _fileStorage.GetPresignedUrlAsync(path, minutesToExpire); var url = GetResolvedUri(presignedUrl); return url; } diff --git a/src/SIL.Machine.AspNetCore/Services/SmtTransferEngineService.cs b/src/SIL.Machine.AspNetCore/Services/SmtTransferEngineService.cs index 994d53a91..0397df19c 100644 --- a/src/SIL.Machine.AspNetCore/Services/SmtTransferEngineService.cs +++ b/src/SIL.Machine.AspNetCore/Services/SmtTransferEngineService.cs @@ -5,37 +5,25 @@ public static class SmtTransferBuildStages public const string Train = "train"; } -public class SmtTransferEngineService : ITranslationEngineService +public class SmtTransferEngineService( + IDistributedReaderWriterLockFactory lockFactory, + IPlatformService platformService, + IDataAccessContext dataAccessContext, + IRepository engines, + IRepository trainSegmentPairs, + SmtTransferEngineStateService stateService, + IBuildJobService buildJobService, + JobStorage jobStorage +) : ITranslationEngineService { - private readonly IDistributedReaderWriterLockFactory _lockFactory; - private readonly IPlatformService _platformService; - private readonly IDataAccessContext _dataAccessContext; - private readonly IRepository _engines; - private readonly IRepository _trainSegmentPairs; - private readonly SmtTransferEngineStateService _stateService; - private readonly IBuildJobService _buildJobService; - private readonly JobStorage _jobStorage; - - public SmtTransferEngineService( - IDistributedReaderWriterLockFactory lockFactory, - IPlatformService platformService, - IDataAccessContext dataAccessContext, - IRepository engines, - IRepository trainSegmentPairs, - SmtTransferEngineStateService stateService, - IBuildJobService buildJobService, - JobStorage jobStorage - ) - { - _lockFactory = lockFactory; - _platformService = platformService; - _dataAccessContext = dataAccessContext; - _engines = engines; - _trainSegmentPairs = trainSegmentPairs; - _stateService = stateService; - _buildJobService = buildJobService; - _jobStorage = jobStorage; - } + private readonly IDistributedReaderWriterLockFactory _lockFactory = lockFactory; + private readonly IPlatformService _platformService = platformService; + private readonly IDataAccessContext _dataAccessContext = dataAccessContext; + private readonly IRepository _engines = engines; + private readonly IRepository _trainSegmentPairs = trainSegmentPairs; + private readonly SmtTransferEngineStateService _stateService = stateService; + private readonly IBuildJobService _buildJobService = buildJobService; + private readonly JobStorage _jobStorage = jobStorage; public TranslationEngineType Type => TranslationEngineType.SmtTransfer; @@ -44,6 +32,7 @@ public async Task CreateAsync( string? engineName, string sourceLanguage, string targetLanguage, + bool isModelRetrievable = false, CancellationToken cancellationToken = default ) { @@ -53,7 +42,8 @@ await _engines.InsertAsync( { EngineId = engineId, SourceLanguage = sourceLanguage, - TargetLanguage = targetLanguage + TargetLanguage = targetLanguage, + IsModelRetrievable = isModelRetrievable }, cancellationToken ); diff --git a/src/SIL.Machine.AspNetCore/Usings.cs b/src/SIL.Machine.AspNetCore/Usings.cs index bd130ee4a..3bae9b368 100644 --- a/src/SIL.Machine.AspNetCore/Usings.cs +++ b/src/SIL.Machine.AspNetCore/Usings.cs @@ -1,6 +1,7 @@ global using System.Collections.Concurrent; global using System.Diagnostics; global using System.Diagnostics.CodeAnalysis; +global using System.Globalization; global using System.IO.Compression; global using System.Linq.Expressions; global using System.Net; diff --git a/src/SIL.Machine.Serval.EngineServer/Program.cs b/src/SIL.Machine.Serval.EngineServer/Program.cs index 7b84f9d8d..f9fea80fa 100644 --- a/src/SIL.Machine.Serval.EngineServer/Program.cs +++ b/src/SIL.Machine.Serval.EngineServer/Program.cs @@ -11,12 +11,9 @@ .AddMongoHangfireJobClient() .AddServalTranslationEngineService() .AddBuildJobService() + .AddModelCleanupJob() .AddClearMLService(); -builder.Services.AddSingleton(); -RecurringJobOptions options = new() { TimeZone = TimeZoneInfo.Utc }; -RecurringJob.AddOrUpdate("Cleanup-job", x => x.RunAsync(), Cron.Daily, options); - if (builder.Environment.IsDevelopment()) builder .Services.AddOpenTelemetry() diff --git a/src/SIL.Machine/Translation/ModelPresignedUrl.cs b/src/SIL.Machine/Translation/ModelPresignedUrl.cs index e9faa1b26..84a62fb2e 100644 --- a/src/SIL.Machine/Translation/ModelPresignedUrl.cs +++ b/src/SIL.Machine/Translation/ModelPresignedUrl.cs @@ -3,6 +3,7 @@ public class ModelPresignedUrl { public string PresignedUrl { get; set; } = default; - public string BuildRevision { get; set; } = default; + public int BuildRevision { get; set; } = default; + public string UrlExpirationTime { get; set; } = default; } }