Skip to content

Commit

Permalink
Pass confidences in AWP; other minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Feb 24, 2025
1 parent 3fdf92e commit a020acf
Show file tree
Hide file tree
Showing 25 changed files with 43 additions and 76 deletions.
4 changes: 1 addition & 3 deletions src/Echo/src/EchoEngine/WordAlignmentEngineServiceV1.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ ServerCallContext context
{
SourceTokens = { sourceTokens },
TargetTokens = { targetTokens },
Confidences = { Enumerable.Repeat(1.0, minLength) },
Alignment = { GenerateAlignedWordPairs(minLength) }
}
};
Expand Down Expand Up @@ -93,8 +92,7 @@ await _parallelCorpusPreprocessingService.PreprocessAsync(
.Select(
(_, i) => new AlignedWordPair() { SourceIndex = i, TargetIndex = i }
)
},
Confidences = { row.SourceSegment.Split().Select(_ => 1.0) }
}
}
);
return Task.CompletedTask;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

public class ThotWordAlignmentModelOptions
{
public const string Key = "WordAlignmentModel";
public const string Key = "ThotWordAlignmentModel";

public ThotWordAlignmentModelOptions()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,5 @@ public record WordAlignment
public required IReadOnlyList<string> Refs { get; init; }
public required IReadOnlyList<string> SourceTokens { get; set; }
public required IReadOnlyList<string> TargetTokens { get; set; }
public required IReadOnlyList<double> Confidences { get; set; }
public required IReadOnlyList<AlignedWordPair> Alignment { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
<PackageReference Include="SIL.Machine.Translation.Thot" Version="3.5.2" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine.Translation.Thot\SIL.Machine.Translation.Thot.csproj')" />
<PackageReference Include="SIL.WritingSystems" Version="14.1.1" />
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
<PackageReference Include="YamlDotNet" Version="11.2.1" />
</ItemGroup>

<ItemGroup>
Expand All @@ -50,7 +51,6 @@
<ProjectReference Include="..\..\..\..\..\machine\src\SIL.Machine\SIL.Machine.csproj" Condition="Exists('..\..\..\..\..\machine\src\SIL.Machine\SIL.Machine.csproj')" />
<ProjectReference Include="..\..\..\..\..\machine\src\SIL.Machine.Morphology.HermitCrab\SIL.Machine.Morphology.HermitCrab.csproj" Condition="Exists('..\..\..\..\..\machine\src\SIL.Machine.Morphology.HermitCrab\SIL.Machine.Morphology.HermitCrab.csproj')" />
<ProjectReference Include="..\..\..\..\..\machine\src\SIL.Machine.Translation.Thot\SIL.Machine.Translation.Thot.csproj" Condition="Exists('..\..\..\..\..\machine\src\SIL.Machine.Translation.Thot\SIL.Machine.Translation.Thot.csproj')" />
<ProjectReference Include="..\..\..\..\..\machine\src\SIL.Machine.Tool\SIL.Machine.Tool.csproj" Condition="Exists('..\..\..\..\..\machine\src\SIL.Machine.Tool\SIL.Machine.Tool.csproj')" />
<ProjectReference Include="..\..\..\ServiceToolkit\src\SIL.ServiceToolkit\SIL.ServiceToolkit.csproj" />
<EmbeddedResource Include="data\flores200languages.csv" />
</ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
namespace Serval.Machine.Shared.Services;
using System.ComponentModel;

namespace Serval.Machine.Shared.Services;

public class ClearMLMonitorService(
IServiceProvider services,
Expand Down Expand Up @@ -105,9 +107,20 @@ await _clearMLService.GetTasksByIdAsync(
var dataAccessContext = scope.ServiceProvider.GetRequiredService<IDataAccessContext>();
foreach (ITrainingEngine engine in engineToBuildServiceDict.Keys)
{
IPlatformService platformService = scope.ServiceProvider.GetKeyedService<IPlatformService>(
engine.Type.ToEngineGroup()
)!;
IPlatformService platformService = scope.ServiceProvider.GetRequiredKeyedService<IPlatformService>(
engine.Type switch
{
EngineType.SmtTransfer => EngineGroup.Translation,
EngineType.Nmt => EngineGroup.Translation,
EngineType.Statistical => EngineGroup.WordAlignment,
_
=> throw new InvalidEnumArgumentException(
nameof(engine.Type),
(int)engine.Type,
typeof(EngineType)
)
}
);
if (engine.CurrentBuild is null || !tasks.TryGetValue(engine.CurrentBuild.JobId, out ClearMLTask? task))
continue;

Expand Down
14 changes: 0 additions & 14 deletions src/Machine/src/Serval.Machine.Shared/Services/EngineType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,3 @@ public enum EngineGroup
Translation,
WordAlignment
}

public static class EngineTypeExtensions
{
public static EngineGroup ToEngineGroup(this EngineType engineType)
{
return engineType switch
{
EngineType.SmtTransfer => EngineGroup.Translation,
EngineType.Nmt => EngineGroup.Translation,
EngineType.Statistical => EngineGroup.WordAlignment,
_ => throw new ArgumentOutOfRangeException(nameof(engineType), engineType, null)
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ await call.RequestStream.WriteAsync(
Refs = { wordAlignment.Refs },
SourceTokens = { wordAlignment.SourceTokens },
TargetTokens = { wordAlignment.TargetTokens },
Confidences = { wordAlignment.Confidences },
Alignment = { Map(wordAlignment.Alignment) }
},
cancellationToken
Expand Down Expand Up @@ -125,7 +124,6 @@ JsonSerializerOptions options
}
string corpusId = "",
textId = "";
IReadOnlyList<double> confidences = [];
IReadOnlyList<string> refs = [],
sourceTokens = [],
targetTokens = [];
Expand All @@ -145,10 +143,6 @@ JsonSerializerOptions options
reader.Read();
textId = reader.GetString()!;
break;
case "confidences":
reader.Read();
confidences = JsonSerializer.Deserialize<IList<double>>(ref reader, options)!.ToArray();
break;
case "refs":
reader.Read();
refs = JsonSerializer.Deserialize<IList<string>>(ref reader, options)!.ToArray();
Expand Down Expand Up @@ -178,7 +172,6 @@ JsonSerializerOptions options
TextId = textId,
Refs = refs,
Alignment = alignedWordPairs,
Confidences = confidences,
SourceTokens = sourceTokens,
TargetTokens = targetTokens
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,7 @@ public async Task<WordAlignmentResult> AlignAsync(
{
SourceTokens = { sourceTokens },
TargetTokens = { targetTokens },
Alignment = { wordPairs.Select(Map) },
Confidences = { wordPairs.Select(wp => wp.AlignmentScore).ToList() }
Alignment = { wordPairs.Select(Map) }
};
},
cancellationToken: cancellationToken
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,7 @@ CancellationToken cancellationToken
targetWriter,
wordAlignment with
{
Alignment = alignedWordPairs,
Confidences = alignedWordPairs.Select(wp => wp.AlignmentScore).ToArray()
Alignment = alignedWordPairs
},
JsonSerializerOptions
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ public async Task AlignAsync()
using var env = new TestEnvironment();
WordAlignmentResult result = await env.Service.AlignAsync(EngineId1, "esto es una prueba.", "this is a test.");
Assert.That(string.Join(' ', result.TargetTokens), Is.EqualTo("this is a test ."));
Assert.That(result.Confidences, Has.Count.EqualTo(5));
Assert.That(result.Alignment.First().SourceIndex, Is.EqualTo(0));
Assert.That(result.Alignment.First().TargetIndex, Is.EqualTo(0));
}
Expand Down
14 changes: 2 additions & 12 deletions src/Serval/src/Serval.Client/Client.g.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7137,8 +7137,7 @@ public partial interface IWordAlignmentEnginesClient
/// <br/> * An auto-generated reference of `[TextId]:[lineNumber]`, 1 indexed.
/// <br/>* **SourceTokens**: the tokenized source segment
/// <br/>* **TargetTokens**: the tokenized target segment
/// <br/>* **Confidences**: the confidence of the alignment on a scale from 0 to 1
/// <br/>* **Alignment**: a list of aligned word pairs
/// <br/>* **Alignment**: a list of aligned word pairs with associated scores
/// <br/>
/// <br/>Word alignments can be filtered by text id if provided.
/// <br/>Only word alignments for the most recent successful build of the engine are returned.
Expand Down Expand Up @@ -8406,8 +8405,7 @@ public string BaseUrl
/// <br/> * An auto-generated reference of `[TextId]:[lineNumber]`, 1 indexed.
/// <br/>* **SourceTokens**: the tokenized source segment
/// <br/>* **TargetTokens**: the tokenized target segment
/// <br/>* **Confidences**: the confidence of the alignment on a scale from 0 to 1
/// <br/>* **Alignment**: a list of aligned word pairs
/// <br/>* **Alignment**: a list of aligned word pairs with associated scores
/// <br/>
/// <br/>Word alignments can be filtered by text id if provided.
/// <br/>Only word alignments for the most recent successful build of the engine are returned.
Expand Down Expand Up @@ -10547,10 +10545,6 @@ public partial class WordAlignmentResult
[System.ComponentModel.DataAnnotations.Required]
public System.Collections.Generic.IList<string> TargetTokens { get; set; } = new System.Collections.ObjectModel.Collection<string>();

[Newtonsoft.Json.JsonProperty("confidences", Required = Newtonsoft.Json.Required.Always)]
[System.ComponentModel.DataAnnotations.Required]
public System.Collections.Generic.IList<double> Confidences { get; set; } = new System.Collections.ObjectModel.Collection<double>();

[Newtonsoft.Json.JsonProperty("alignment", Required = Newtonsoft.Json.Required.Always)]
[System.ComponentModel.DataAnnotations.Required]
public System.Collections.Generic.IList<AlignedWordPair> Alignment { get; set; } = new System.Collections.ObjectModel.Collection<AlignedWordPair>();
Expand Down Expand Up @@ -10644,10 +10638,6 @@ public partial class WordAlignment
[System.ComponentModel.DataAnnotations.Required]
public System.Collections.Generic.IList<string> TargetTokens { get; set; } = new System.Collections.ObjectModel.Collection<string>();

[Newtonsoft.Json.JsonProperty("confidences", Required = Newtonsoft.Json.Required.Always)]
[System.ComponentModel.DataAnnotations.Required]
public System.Collections.Generic.IList<double> Confidences { get; set; } = new System.Collections.ObjectModel.Collection<double>();

[Newtonsoft.Json.JsonProperty("alignment", Required = Newtonsoft.Json.Required.Always)]
[System.ComponentModel.DataAnnotations.Required]
public System.Collections.Generic.IList<AlignedWordPair> Alignment { get; set; } = new System.Collections.ObjectModel.Collection<AlignedWordPair>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,12 @@ message GetQueueSizeResponse {
message AlignedWordPair {
int32 source_index = 1;
int32 target_index = 2;
double score = 3;
}

message WordAlignmentResult {
repeated string source_tokens = 1;
repeated string target_tokens = 2;
repeated double confidences = 3;
repeated AlignedWordPair alignment = 4;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,7 @@ message InsertWordAlignmentsRequest {
repeated string refs = 4;
repeated string source_tokens = 5;
repeated string target_tokens = 6;
repeated double confidences = 7;
repeated AlignedWordPair alignment = 8;
repeated AlignedWordPair alignment = 7;
}

message UpdateBuildExecutionDataRequest {
Expand Down
1 change: 1 addition & 0 deletions src/Serval/src/Serval.Shared/Models/AlignedWordPair.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ public record AlignedWordPair
{
public required int SourceIndex { get; set; }
public required int TargetIndex { get; set; }
public double? Score { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ await GetAllAsync(engineId, modelRevision, corpusId, textId, cancellationToken)
textId,
pretranslations.ToList(),
fullName: targetSettings.FullName,
behavior: UpdateUsfmBehavior.PreferExisting
textBehavior: UpdateUsfmTextBehavior.PreferExisting

Check failure on line 103 in src/Serval/src/Serval.Translation/Services/PretranslationService.cs

View workflow job for this annotation

GitHub Actions / Build

The name 'UpdateUsfmTextBehavior' does not exist in the current context
) ?? "";
break;
case PretranslationUsfmTextOrigin.PreferPretranslated:
Expand All @@ -109,7 +109,7 @@ await GetAllAsync(engineId, modelRevision, corpusId, textId, cancellationToken)
textId,
pretranslations.ToList(),
fullName: targetSettings.FullName,
behavior: UpdateUsfmBehavior.PreferNew
textBehavior: UpdateUsfmTextBehavior.PreferNew

Check failure on line 112 in src/Serval/src/Serval.Translation/Services/PretranslationService.cs

View workflow job for this annotation

GitHub Actions / Build

The name 'UpdateUsfmTextBehavior' does not exist in the current context
) ?? "";
break;
case PretranslationUsfmTextOrigin.OnlyExisting:
Expand All @@ -118,7 +118,7 @@ await GetAllAsync(engineId, modelRevision, corpusId, textId, cancellationToken)
textId,
[], // don't put any pretranslations, we only want the existing text.
fullName: targetSettings.FullName,
behavior: UpdateUsfmBehavior.PreferNew
textBehavior: UpdateUsfmTextBehavior.PreferNew

Check failure on line 121 in src/Serval/src/Serval.Translation/Services/PretranslationService.cs

View workflow job for this annotation

GitHub Actions / Build

The name 'UpdateUsfmTextBehavior' does not exist in the current context
) ?? "";
break;
case PretranslationUsfmTextOrigin.OnlyPretranslated:
Expand All @@ -127,7 +127,7 @@ await GetAllAsync(engineId, modelRevision, corpusId, textId, cancellationToken)
textId,
pretranslations.ToList(),
fullName: targetSettings.FullName,
behavior: UpdateUsfmBehavior.StripExisting
textBehavior: UpdateUsfmTextBehavior.StripExisting
) ?? "";
break;
}
Expand All @@ -151,14 +151,14 @@ await GetAllAsync(engineId, modelRevision, corpusId, textId, cancellationToken)
textId,
pretranslations.ToList(),
fullName: targetSettings.FullName,
behavior: UpdateUsfmBehavior.StripExisting
textBehavior: UpdateUsfmTextBehavior.StripExisting
) ?? "";
case PretranslationUsfmTextOrigin.OnlyExisting:
return updater.UpdateUsfm(
textId,
[], // don't pass the pretranslations, we only want the existing text.
fullName: targetSettings.FullName,
behavior: UpdateUsfmBehavior.StripExisting
textBehavior: UpdateUsfmTextBehavior.StripExisting
) ?? "";
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,5 @@ public record WordAlignmentDto
public required IReadOnlyList<string> Refs { get; init; }
public required IReadOnlyList<string> SourceTokens { get; init; }
public required IReadOnlyList<string> TargetTokens { get; init; }
public required IReadOnlyList<double> Confidences { get; init; }
public required IReadOnlyList<AlignedWordPairDto> Alignment { get; init; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,5 @@ public record WordAlignmentResultDto
{
public required IReadOnlyList<string> SourceTokens { get; init; }
public required IReadOnlyList<string> TargetTokens { get; init; }
public required IReadOnlyList<double> Confidences { get; init; }
public required IReadOnlyList<AlignedWordPairDto> Alignment { get; init; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -387,8 +387,7 @@ CancellationToken cancellationToken
/// * An auto-generated reference of `[TextId]:[lineNumber]`, 1 indexed.
/// * **SourceTokens**: the tokenized source segment
/// * **TargetTokens**: the tokenized target segment
/// * **Confidences**: the confidence of the alignment on a scale from 0 to 1
/// * **Alignment**: a list of aligned word pairs
/// * **Alignment**: a list of aligned word pairs with associated scores
///
/// Word alignments can be filtered by text id if provided.
/// Only word alignments for the most recent successful build of the engine are returned.
Expand Down Expand Up @@ -984,7 +983,6 @@ private WordAlignmentResultDto Map(WordAlignmentResult source)
{
SourceTokens = source.SourceTokens.ToList(),
TargetTokens = source.TargetTokens.ToList(),
Confidences = source.Confidences.Select(c => Math.Round(c, 8)).ToList(),
Alignment = source.Alignment.Select(Map).ToList(),
};
}
Expand All @@ -1002,7 +1000,6 @@ private static WordAlignmentDto Map(Models.WordAlignment source)
Refs = source.Refs,
SourceTokens = source.SourceTokens.ToList(),
TargetTokens = source.TargetTokens.ToList(),
Confidences = source.Confidences.Select(c => Math.Round(c, 8)).ToList(),
Alignment = source
.Alignment.Select(c => new AlignedWordPairDto()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,5 @@ public class WordAlignment : IEntity
public required IReadOnlyList<string> Refs { get; init; }
public required IReadOnlyList<string> SourceTokens { get; set; }
public required IReadOnlyList<string> TargetTokens { get; set; }
public required IReadOnlyList<double> Confidences { get; set; }
public required IReadOnlyList<AlignedWordPair> Alignment { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,5 @@ public record WordAlignmentResult
{
public required IReadOnlyList<string> SourceTokens { get; set; }
public required IReadOnlyList<string> TargetTokens { get; set; }
public required IReadOnlyList<double> Confidences { get; set; }
public required IReadOnlyList<AlignedWordPair> Alignment { get; set; }
}
8 changes: 6 additions & 2 deletions src/Serval/src/Serval.WordAlignment/Services/EngineService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -437,14 +437,18 @@ private Models.WordAlignmentResult Map(V1.WordAlignmentResult source)
{
SourceTokens = source.SourceTokens.ToList(),
TargetTokens = source.TargetTokens.ToList(),
Confidences = source.Confidences.ToList(),
Alignment = source.Alignment.Select(Map).ToList(),
};
}

private Shared.Models.AlignedWordPair Map(V1.AlignedWordPair source)
{
return new Shared.Models.AlignedWordPair { SourceIndex = source.SourceIndex, TargetIndex = source.TargetIndex };
return new Shared.Models.AlignedWordPair
{
SourceIndex = source.SourceIndex,
TargetIndex = source.TargetIndex,
Score = source.Score
};
}

private V1.ParallelCorpus Map(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,12 +307,12 @@ ServerCallContext context
Refs = request.Refs.ToList(),
SourceTokens = request.SourceTokens.ToList(),
TargetTokens = request.TargetTokens.ToList(),
Confidences = request.Confidences.ToList(),
Alignment = request
.Alignment.Select(a => new Shared.Models.AlignedWordPair
{
SourceIndex = a.SourceIndex,
TargetIndex = a.TargetIndex
TargetIndex = a.TargetIndex,
Score = a.Score
})
.ToList()
}
Expand Down
Loading

0 comments on commit a020acf

Please sign in to comment.