Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LT-21988: Improve Hermit Crab performance #275

Open
wants to merge 14 commits into
base: release/9.3
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Build/mkall.targets
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@
<ParatextNugetVersion>9.4.0.1-beta</ParatextNugetVersion>
<LcmNugetVersion>11.0.0-beta0111</LcmNugetVersion>
<IcuNugetVersion>70.1.123</IcuNugetVersion>
<HermitCrabNugetVersion>3.4.2</HermitCrabNugetVersion>
<HermitCrabNugetVersion>3.6.1</HermitCrabNugetVersion>
<IPCFrameworkVersion>1.1.1-beta0001</IPCFrameworkVersion>
<!-- bt393 is the master branch build of ExCss for Windows development. Update when appropriate. -->
<ExCssBuildType Condition="'$(OS)'=='Windows_NT'">bt393</ExCssBuildType>
Expand Down
4 changes: 2 additions & 2 deletions Build/nuget-common/packages.config
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@
<package id="SIL.libpalaso.l10ns" version="6.0.0" targetFramework="net461" />
<package id="SIL.Lift" version="15.0.0-beta0117" targetFramework="net462" />
<package id="SIL.Media" version="15.0.0-beta0117" targetFramework="net462" />
<package id="SIL.Machine" version="3.4.2" targetFramework="netstandard2.0" />
<package id="SIL.Machine.Morphology.HermitCrab" version="3.4.2" targetFramework="netstandard2.0" />
<package id="SIL.Machine" version="3.6.1" targetFramework="netstandard2.0" />
<package id="SIL.Machine.Morphology.HermitCrab" version="3.6.1" targetFramework="netstandard2.0" />
<package id="SIL.ParatextShared" version="7.4.0.1" targetFramework="net40" /> <!-- REVIEW (Hasso) 2023.05: do we still integrate with PT 7? -->
<package id="SIL.Scripture" version="15.0.0-beta0117" targetFramework="net461" />
<package id="SIL.TestUtilities" version="15.0.0-beta0117" targetFramework="net461" />
Expand Down
5 changes: 5 additions & 0 deletions Src/GenerateHCConfig/ConsoleLogger.cs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ public void InvalidRewriteRule(IPhRegularRule rule, string reason)
Console.WriteLine("The rewrite rule \"{0}\" is invalid. Reason: {1}", rule.Name.BestAnalysisVernacularAlternative.Text, reason);
}

public void InvalidStrata(string strata, string reason)
{
Console.WriteLine(reason);
}

public void OutOfScopeSlot(IMoInflAffixSlot slot, IMoInflAffixTemplate template, string reason)
{
Console.WriteLine(reason);
Expand Down
223 changes: 212 additions & 11 deletions Src/LexText/ParserCore/HCLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
using System.Globalization;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml;
using System.Xml.Linq;

Expand Down Expand Up @@ -61,6 +62,9 @@ public static Language Load(LcmCache cache, IHCLoadErrorLogger logger)
private readonly bool m_noDefaultCompounding;
private readonly bool m_notOnClitics;
private readonly bool m_acceptUnspecifiedGraphemes;
private readonly string m_strataString;
private readonly IList<IList<string>> m_strata;
private readonly Dictionary<LexEntry, string> m_entryName;

private SimpleContext m_any;
private CharacterDefinition m_null;
Expand Down Expand Up @@ -88,12 +92,52 @@ private HCLoader(LcmCache cache, IHCLoadErrorLogger logger)
m_noDefaultCompounding = hcElem != null && ((bool?)hcElem.Element("NoDefaultCompounding") ?? false);
m_notOnClitics = hcElem == null || ((bool?)hcElem.Element("NotOnClitics") ?? true);
m_acceptUnspecifiedGraphemes = hcElem != null && ((bool?)hcElem.Element("AcceptUnspecifiedGraphemes") ?? false);
m_strata = new List<IList<string>>();
if (hcElem != null && hcElem.Element("Strata") != null)
{
m_strataString = (string)hcElem.Element("Strata");
m_strata = ParseStrataString(m_strataString);
}
m_entryName = new Dictionary<LexEntry, string>();

m_naturalClasses = new Dictionary<IPhNaturalClass, NaturalClass>();
m_charDefs = new Dictionary<IPhTerminalUnit, CharacterDefinition>();
}

private string[] RemoveDottedCircles(string[] phonemes)
private IList<IList<string>> ParseStrataString(string strataString)
{
// Tokenize strataString based on commas and parentheses.
string[] tokens = Regex.Split(strataString, @"([(,)])")
.Select(sValue => sValue.Trim())
.Where(s => !string.IsNullOrWhiteSpace(s))
.ToArray();
// Group rules into strata based on parentheses.
IList<IList<string>> strata = new List<IList<string>>();
bool parentheses = false;
foreach (string token in tokens)
{
if (token == "(")
{
parentheses = true;
strata.Add(new List<string>());
}
else if (token == ")")
{
parentheses = false;
}
else if (token != ",")
{
if (!parentheses)
{
strata.Add(new List<string>());
}
strata.Last().Add(token);
}
}
return strata;
}

private string[] RemoveDottedCircles(string[] phonemes)
{
return phonemes.Select(RemoveDottedCircles).ToArray();
}
Expand Down Expand Up @@ -164,10 +208,10 @@ private void LoadLanguage()
}
}

m_morphophonemic = new Stratum(m_table) { Name = "Morphophonemic", MorphologicalRuleOrder = MorphologicalRuleOrder.Unordered };
m_morphophonemic = new Stratum(m_table) { Name = "Morphology", MorphologicalRuleOrder = MorphologicalRuleOrder.Unordered };
m_language.Strata.Add(m_morphophonemic);

m_clitic = new Stratum(m_table) { Name = "Clitic", MorphologicalRuleOrder = MorphologicalRuleOrder.Unordered };
m_clitic = new Stratum(m_table) { Name = "Clitics", MorphologicalRuleOrder = MorphologicalRuleOrder.Unordered };
m_language.Strata.Add(m_clitic);

m_language.Strata.Add(new Stratum(m_table) { Name = "Surface" });
Expand Down Expand Up @@ -289,6 +333,162 @@ private void LoadLanguage()
{
LoadMorphemeCoOccurrenceRules(morphAdhocProhib);
}

if (m_strata.Count > 0)
{
CreateStrata();
}
}

private void CreateStrata()
{
// Replace the default strata of m_morphophonemics and m_clitic with the user-defined strata.
// The phonological rules are stored in m_morphophonemics unless NotOnClitics is false.
Stratum cliticsStratum = null;
Stratum compoundRulesStratum = null;
Stratum morphologyStratum = null;
Stratum phonologyStratum = null;
Stratum templateStratum = null;
foreach (IList<string> stratumRules in m_strata)
{
if (stratumRules.Count == 0)
{
continue;
}
Stratum stratum = new Stratum(m_table) { Name = stratumRules[0], MorphologicalRuleOrder = MorphologicalRuleOrder.Unordered };
// m_clitic should always be last.
int cliticIndex = m_language.Strata.IndexOf(m_clitic);
m_language.Strata.Insert(cliticIndex, stratum);
foreach (string rule in stratumRules)
{
// Save predefined classes for later.
switch (rule)
{
case "Clitics":
cliticsStratum = stratum;
break;
case "CompoundRules":
compoundRulesStratum = stratum;
break;
case "Morphology":
morphologyStratum = stratum;
break;
case "Phonology":
phonologyStratum = stratum;
break;
case "Templates":
templateStratum = stratum;
break;
default:
{
// Move the given rule to stratum.
bool found = false;
if (MoveRule(rule, m_morphophonemic, stratum))
found = true;
if (MoveRule(rule, m_clitic, stratum))
found = true;
if (!found)
m_logger.InvalidStrata(m_strataString, "Unknown rule in Strata: " + rule + ".");
break;
}
}
}
}

// Process phonology before cliticsStratum and morphologyStratum.
if (phonologyStratum != null)
{
// Move remaining phonological rules to phonologyStratum.
phonologyStratum.PhonologicalRules.AddRange(m_morphophonemic.PhonologicalRules);
phonologyStratum.PhonologicalRules.AddRange(m_clitic.PhonologicalRules);
m_morphophonemic.PhonologicalRules.Clear();
m_clitic.PhonologicalRules.Clear();
}
else
{
// Move remaining phonological rules just before clitic stratum.
int cliticIndex = m_language.Strata.IndexOf(m_clitic);
if (cliticIndex > 1)
{
m_language.Strata[cliticIndex - 1].PhonologicalRules.AddRange(m_morphophonemic.PhonologicalRules);
m_morphophonemic.PhonologicalRules.Clear();
}
}
if (compoundRulesStratum != null)
{
// Move remaining compound rules to compoundRulesStratum.
foreach (IMorphologicalRule rule in m_morphophonemic.MorphologicalRules.ToList())
{
if (rule is CompoundingRule)
{
compoundRulesStratum.MorphologicalRules.Add(rule);
m_morphophonemic.MorphologicalRules.Remove(rule);
}
}
}
if (templateStratum != null)
{
// Move remaining templates to templateStratum.
templateStratum.AffixTemplates.AddRange(m_morphophonemic.AffixTemplates);
m_morphophonemic.AffixTemplates.Clear();
}
if (cliticsStratum != null)
{
// Replace m_clitic with cliticsStratum.
MoveRules(m_clitic, cliticsStratum);
}
// Process morphology last.
if (morphologyStratum != null)
{
MoveRules(m_morphophonemic, morphologyStratum);
}

// Remove empty strata.
foreach (Stratum stratum in m_language.Strata.ToList())
{
if (stratum.Entries.Count == 0 &&
stratum.AffixTemplates.Count == 0 &&
stratum.MorphologicalRules.Count == 0 &&
stratum.PhonologicalRules.Count == 0)
{
m_language.Strata.Remove(stratum);
}
}
}

void MoveRules(Stratum source, Stratum target)
{
target.AffixTemplates.AddRange(source.AffixTemplates);
target.Entries.AddRange(source.Entries);
target.MorphologicalRules.AddRange(source.MorphologicalRules);
target.PhonologicalRules.AddRange(source.PhonologicalRules);
m_language.Strata.Remove(source);
}

private bool MoveRule(string ruleName, Stratum source, Stratum target)
{
bool found = false;

found |= MoveMatchingItems(source.Entries, target.Entries, entry => m_entryName[entry] == ruleName);
found |= MoveMatchingItems(source.MorphologicalRules, target.MorphologicalRules, rule => rule.Name == ruleName);
found |= MoveMatchingItems(source.PhonologicalRules, target.PhonologicalRules, rule => rule.Name == ruleName);
found |= MoveMatchingItems(source.AffixTemplates, target.AffixTemplates, rule => rule.Name == ruleName);

return found;
}

private bool MoveMatchingItems<T>(ICollection<T> source, ICollection<T> target, Func<T, bool> filterFunction)
{
var itemsToMove = source.Where(filterFunction).ToList();
if (itemsToMove.Count == 0) return false;

foreach (var item in itemsToMove)
{
target.Add(item);
source.Remove(item);
}

return true;
}

private void LoadInflClassMprFeature(IMoInflClass inflClass, MprFeatureGroup inflClassesGroup)
Expand Down Expand Up @@ -421,20 +621,20 @@ private void LoadLexEntries(Stratum stratum, ILexEntry entry, IList<IMoStemAllom
if (mainEntry != null)
{
foreach (IMoStemMsa msa in mainEntry.MorphoSyntaxAnalysesOC.OfType<IMoStemMsa>())
LoadLexEntryOfVariant(stratum, inflType, msa, allos);
LoadLexEntryOfVariant(stratum, inflType, msa, allos, entry.ShortName);
}
else
{
ILexSense sense = (ILexSense)component;
LoadLexEntryOfVariant(stratum, inflType, (IMoStemMsa)sense.MorphoSyntaxAnalysisRA, allos);
LoadLexEntryOfVariant(stratum, inflType, (IMoStemMsa)sense.MorphoSyntaxAnalysisRA, allos, entry.ShortName);
}
}
}
}
}

foreach (IMoStemMsa msa in entry.MorphoSyntaxAnalysesOC.OfType<IMoStemMsa>())
LoadLexEntry(stratum, msa, allos);
LoadLexEntry(stratum, msa, allos, entry.ShortName);
}

private IEnumerable<ILexEntryInflType> GetInflTypes(ILexEntryRef lexEntryRef)
Expand All @@ -461,16 +661,17 @@ private IEnumerable<ILexEntryInflType> GetInflTypes(ILexEntryRef lexEntryRef)
}
}

private void AddEntry(Stratum stratum, LexEntry hcEntry, IMoMorphSynAnalysis msa)
private void AddEntry(Stratum stratum, LexEntry hcEntry, IMoMorphSynAnalysis msa, string name)
{
if (hcEntry.Allomorphs.Count > 0)
{
stratum.Entries.Add(hcEntry);
m_entryName[hcEntry] = name;
m_morphemes.GetOrCreate(msa, () => new List<Morpheme>()).Add(hcEntry);
}
}

private void LoadLexEntry(Stratum stratum, IMoStemMsa msa, IList<IMoStemAllomorph> allos)
private void LoadLexEntry(Stratum stratum, IMoStemMsa msa, IList<IMoStemAllomorph> allos, string name)
{
var hcEntry = new LexEntry();

Expand Down Expand Up @@ -509,10 +710,10 @@ private void LoadLexEntry(Stratum stratum, IMoStemMsa msa, IList<IMoStemAllomorp
}
}

AddEntry(stratum, hcEntry, msa);
AddEntry(stratum, hcEntry, msa, name);
}

private void LoadLexEntryOfVariant(Stratum stratum, ILexEntryInflType inflType, IMoStemMsa msa, IList<IMoStemAllomorph> allos)
private void LoadLexEntryOfVariant(Stratum stratum, ILexEntryInflType inflType, IMoStemMsa msa, IList<IMoStemAllomorph> allos, string name)
{
var hcEntry = new LexEntry();

Expand Down Expand Up @@ -585,7 +786,7 @@ private void LoadLexEntryOfVariant(Stratum stratum, ILexEntryInflType inflType,
}
}

AddEntry(stratum, hcEntry, msa);
AddEntry(stratum, hcEntry, msa, name);
}

private RootAllomorph LoadRootAllomorph(IMoStemAllomorph allo, IMoMorphSynAnalysis msa)
Expand Down
8 changes: 8 additions & 0 deletions Src/LexText/ParserCore/HCParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,14 @@ public void InvalidRewriteRule(IPhRegularRule rule, string reason)
m_xmlWriter.WriteEndElement();
}

public void InvalidStrata(string strata, string reason)
{
m_xmlWriter.WriteStartElement("LoadError");
m_xmlWriter.WriteAttributeString("type", "invalid-strata");
m_xmlWriter.WriteElementString("Reason", reason);
m_xmlWriter.WriteEndElement();
}

public void OutOfScopeSlot(IMoInflAffixSlot slot, IMoInflAffixTemplate template, string reason)
{
m_xmlWriter.WriteStartElement("LoadError");
Expand Down
1 change: 1 addition & 0 deletions Src/LexText/ParserCore/IHCLoadErrorLogger.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ public interface IHCLoadErrorLogger
void InvalidEnvironment(IMoForm form, IPhEnvironment env, string reason, IMoMorphSynAnalysis msa);
void InvalidReduplicationForm(IMoForm form, string reason, IMoMorphSynAnalysis msa);
void InvalidRewriteRule(IPhRegularRule prule, string reason);
void InvalidStrata(string strata, string reason);
void OutOfScopeSlot(IMoInflAffixSlot slot, IMoInflAffixTemplate template, string reason);
}
}
10 changes: 8 additions & 2 deletions Src/LexText/ParserCore/ParserCoreTests/HCLoaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ private enum LoadErrorType
DuplicateGrapheme,
InvalidEnvironment,
InvalidRedupForm,
InvalidRewriteRule
InvalidRewriteRule,
InvalidStrata
}

private class TestHCLoadErrorLogger : IHCLoadErrorLogger
Expand Down Expand Up @@ -86,7 +87,12 @@ public void InvalidReduplicationForm(IMoForm form, string reason, IMoMorphSynAna

public void InvalidRewriteRule(IPhRegularRule rule, string reason)
{
m_loadErrors.Add(Tuple.Create(LoadErrorType.InvalidRedupForm, (ICmObject) rule));
m_loadErrors.Add(Tuple.Create(LoadErrorType.InvalidRewriteRule, (ICmObject)rule));
}

public void InvalidStrata(string strata, string reason)
{
m_loadErrors.Add(Tuple.Create(LoadErrorType.InvalidStrata, (ICmObject)null));
}

public void OutOfScopeSlot(IMoInflAffixSlot slot, IMoInflAffixTemplate template, string reason)
Expand Down
Loading
Loading