Skip to content

Commit

Permalink
Add NLLB-200 language code checking support
Browse files Browse the repository at this point in the history
  • Loading branch information
johnml1135 committed Jan 20, 2024
1 parent 79c0832 commit 831225a
Show file tree
Hide file tree
Showing 11 changed files with 337 additions and 36 deletions.
1 change: 1 addition & 0 deletions src/SIL.Machine.AspNetCore/SIL.Machine.AspNetCore.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
<ProjectReference Include="..\SIL.Machine\SIL.Machine.csproj" />
<ProjectReference Include="..\..\..\serval\src\Serval.Grpc\Serval.Grpc.csproj" Condition="Exists('..\..\..\serval\src\Serval.Grpc\Serval.Grpc.csproj')" />
<ProjectReference Include="..\..\..\serval\src\SIL.DataAccess\SIL.DataAccess.csproj" Condition="Exists('..\..\..\serval\src\SIL.DataAccess\SIL.DataAccess.csproj')" />
<EmbeddedResource Include="data\flores200languages.csv" />
</ItemGroup>

<Target Name="ZipThotNewModel" BeforeTargets="BeforeBuild">
Expand Down
1 change: 1 addition & 0 deletions src/SIL.Machine.AspNetCore/Services/ILanguageTagService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
public interface ILanguageTagService
{
string ConvertToFlores200Code(string languageTag);
LanguageInfoDto CheckInFlores200(string languageTag);
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,6 @@ Task StartBuildAsync(
Task CancelBuildAsync(string engineId, CancellationToken cancellationToken = default);

Task<int> GetQueueSizeAsync(CancellationToken cancellationToken = default);

LanguageInfoDto GetlanguageInfo(string languageCode);
}
31 changes: 31 additions & 0 deletions src/SIL.Machine.AspNetCore/Services/LanguageTagService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ public class LanguageTagService : ILanguageTagService

private readonly Dictionary<string, string> _defaultScripts;

private readonly Dictionary<string, string> _flores200Languages;

private static readonly Regex LangTagPattern = new Regex(
"(?'language'[a-zA-Z]{2,8})([_-](?'script'[a-zA-Z]{4}))?",
RegexOptions.ExplicitCapture
Expand All @@ -16,6 +18,7 @@ public LanguageTagService()
{
// initialise SLDR language tags to retrieve latest langtags.json file
_defaultScripts = InitializeDefaultScripts();
_flores200Languages = InitializeFlores200Languages();
}

private static Dictionary<string, string> InitializeDefaultScripts()
Expand Down Expand Up @@ -56,6 +59,26 @@ private static Dictionary<string, string> InitializeDefaultScripts()
return tempDefaultScripts;
}

private static Dictionary<string, string> InitializeFlores200Languages()
{
var tempFlores200Languages = new Dictionary<string, string>();
using var floresStream = Assembly
.GetExecutingAssembly()
.GetManifestResourceStream("SIL.Machine.AspNetCore.data.flores200languages.csv");
Debug.Assert(floresStream is not null);
var reader = new StreamReader(floresStream);
Debug.Assert(reader.ReadLine() == "language, code");
while (!reader.EndOfStream)
{
string? line = reader.ReadLine();
if (line is null)
continue;
string[] values = line.Split(',');
tempFlores200Languages[values[1].Trim()] = values[0].Trim();
}
return tempFlores200Languages;
}

public string ConvertToFlores200Code(string languageTag)
{
// Try to find a pattern of {language code}_{script}
Expand Down Expand Up @@ -96,4 +119,12 @@ public string ConvertToFlores200Code(string languageTag)
else
return languageTag;
}

public LanguageInfoDto CheckInFlores200(string languageTag)
{
string flores200Code = ConvertToFlores200Code(languageTag);
if (_flores200Languages.TryGetValue(flores200Code, out string? tempName))
return new LanguageInfoDto(flores200Code, "Nmt", flores200Code, tempName, true);
return new LanguageInfoDto(languageTag, "Nmt", flores200Code, "", false);
}
}
45 changes: 21 additions & 24 deletions src/SIL.Machine.AspNetCore/Services/NmtEngineService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,23 @@ public static class NmtBuildStages
public const string Postprocess = "postprocess";
}

public class NmtEngineService : ITranslationEngineService
public class NmtEngineService(
IPlatformService platformService,
IDistributedReaderWriterLockFactory lockFactory,
IDataAccessContext dataAccessContext,
IRepository<TranslationEngine> engines,
IBuildJobService buildJobService,
ILanguageTagService languageTagService,
ClearMLMonitorService clearMLMonitorService
) : ITranslationEngineService
{
private readonly IDistributedReaderWriterLockFactory _lockFactory;
private readonly IPlatformService _platformService;
private readonly IDataAccessContext _dataAccessContext;
private readonly IRepository<TranslationEngine> _engines;
private readonly IBuildJobService _buildJobService;
private readonly ClearMLMonitorService _clearMLMonitorService;

public NmtEngineService(
IPlatformService platformService,
IDistributedReaderWriterLockFactory lockFactory,
IDataAccessContext dataAccessContext,
IRepository<TranslationEngine> engines,
IBuildJobService buildJobService,
ClearMLMonitorService clearMLMonitorService
)
{
_lockFactory = lockFactory;
_platformService = platformService;
_dataAccessContext = dataAccessContext;
_engines = engines;
_buildJobService = buildJobService;
_clearMLMonitorService = clearMLMonitorService;
}
private readonly IDistributedReaderWriterLockFactory _lockFactory = lockFactory;
private readonly IPlatformService _platformService = platformService;
private readonly IDataAccessContext _dataAccessContext = dataAccessContext;
private readonly IRepository<TranslationEngine> _engines = engines;
private readonly IBuildJobService _buildJobService = buildJobService;
private readonly ILanguageTagService _languageTagService = languageTagService;
private readonly ClearMLMonitorService _clearMLMonitorService = clearMLMonitorService;

public TranslationEngineType Type => TranslationEngineType.Nmt;

Expand Down Expand Up @@ -151,6 +143,11 @@ public Task<int> GetQueueSizeAsync(CancellationToken cancellationToken = default
return Task.FromResult(_clearMLMonitorService.QueueSize);
}

public LanguageInfoDto GetlanguageInfo(string languageCode)
{
return _languageTagService.CheckInFlores200(languageCode);
}

private async Task CancelBuildJobAsync(string engineId, CancellationToken cancellationToken)
{
(string? buildId, BuildJobState jobState) = await _buildJobService.CancelBuildJobAsync(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,16 @@

namespace SIL.Machine.AspNetCore.Services;

public class ServalTranslationEngineServiceV1 : TranslationEngineApi.TranslationEngineApiBase
public class ServalTranslationEngineServiceV1(
IEnumerable<ITranslationEngineService> engineServices,
HealthCheckService healthCheckService
) : TranslationEngineApi.TranslationEngineApiBase
{
private static readonly Empty Empty = new();

private readonly Dictionary<TranslationEngineType, ITranslationEngineService> _engineServices;
private readonly Dictionary<TranslationEngineType, ITranslationEngineService> _engineServices = engineServices.ToDictionary(es => es.Type);

private readonly HealthCheckService _healthCheckService;

public ServalTranslationEngineServiceV1(
IEnumerable<ITranslationEngineService> engineServices,
HealthCheckService healthCheckService
)
{
_engineServices = engineServices.ToDictionary(es => es.Type);
_healthCheckService = healthCheckService;
}
private readonly HealthCheckService _healthCheckService = healthCheckService;

public override async Task<Empty> Create(CreateRequest request, ServerCallContext context)
{
Expand Down Expand Up @@ -133,6 +127,23 @@ ServerCallContext context
return new GetQueueSizeResponse { Size = await engineService.GetQueueSizeAsync(context.CancellationToken) };
}

public override Task<GetLanguageInfoResponse> GetLanguageInfo(

Check failure on line 130 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

The type or namespace name 'GetLanguageInfoResponse' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 130 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

The type or namespace name 'GetLanguageInfoResponse' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 130 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

The type or namespace name 'GetLanguageInfoResponse' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 130 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

The type or namespace name 'GetLanguageInfoResponse' could not be found (are you missing a using directive or an assembly reference?)
GetLanguageInfoRequest request,

Check failure on line 131 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

The type or namespace name 'GetLanguageInfoRequest' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 131 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

The type or namespace name 'GetLanguageInfoRequest' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 131 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

The type or namespace name 'GetLanguageInfoRequest' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 131 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

The type or namespace name 'GetLanguageInfoRequest' could not be found (are you missing a using directive or an assembly reference?)
ServerCallContext context
)
{
ITranslationEngineService engineService = GetEngineService(request.EngineType);
LanguageInfoDto languageInfo = engineService.GetlanguageInfo(request.LanguageCode);
return Task.FromResult(
new GetLanguageInfoResponse
{
ResolvedLanguageCode = languageInfo.ResolvedLanguageCode,
CommonLanguageName = languageInfo.CommonLanguageName,
NativeLanguageSupport = languageInfo.NativeLanguageSupport,
}
);
}

public override async Task<HealthCheckResponse> HealthCheck(Empty request, ServerCallContext context)
{
HealthReport healthReport = await _healthCheckService.CheckHealthAsync();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,11 @@ public Task<int> GetQueueSizeAsync(CancellationToken cancellationToken = default
return Task.FromResult(Convert.ToInt32(_jobStorage.GetMonitoringApi().EnqueuedCount("smt_transfer")));
}

public LanguageInfoDto GetlanguageInfo(string languageCode)
{
throw new NotImplementedException();
}

private async Task CancelBuildJobAsync(string engineId, CancellationToken cancellationToken)
{
(string? buildId, BuildJobState jobState) = await _buildJobService.CancelBuildJobAsync(
Expand Down
Loading

0 comments on commit 831225a

Please sign in to comment.