diff --git a/Build/mkall.targets b/Build/mkall.targets index 6c5d92a33d..4c8d256940 100644 --- a/Build/mkall.targets +++ b/Build/mkall.targets @@ -287,7 +287,7 @@ 9.4.0.1-beta 11.0.0-beta0111 70.1.123 - 3.4.2 + 3.6.1 1.1.1-beta0001 bt393 diff --git a/Build/nuget-common/packages.config b/Build/nuget-common/packages.config index 20cea28e21..8e2d8936e7 100644 --- a/Build/nuget-common/packages.config +++ b/Build/nuget-common/packages.config @@ -65,8 +65,8 @@ - - + + diff --git a/Src/GenerateHCConfig/ConsoleLogger.cs b/Src/GenerateHCConfig/ConsoleLogger.cs index 5e6f9e4e66..46685db5f7 100644 --- a/Src/GenerateHCConfig/ConsoleLogger.cs +++ b/Src/GenerateHCConfig/ConsoleLogger.cs @@ -109,6 +109,11 @@ public void InvalidRewriteRule(IPhRegularRule rule, string reason) Console.WriteLine("The rewrite rule \"{0}\" is invalid. Reason: {1}", rule.Name.BestAnalysisVernacularAlternative.Text, reason); } + public void InvalidStrata(string strata, string reason) + { + Console.WriteLine(reason); + } + public void OutOfScopeSlot(IMoInflAffixSlot slot, IMoInflAffixTemplate template, string reason) { Console.WriteLine(reason); diff --git a/Src/LexText/ParserCore/HCLoader.cs b/Src/LexText/ParserCore/HCLoader.cs index 92e371707a..9b40107b59 100644 --- a/Src/LexText/ParserCore/HCLoader.cs +++ b/Src/LexText/ParserCore/HCLoader.cs @@ -19,6 +19,7 @@ using System.Globalization; using System.Linq; using System.Text; +using System.Text.RegularExpressions; using System.Xml; using System.Xml.Linq; @@ -61,6 +62,9 @@ public static Language Load(LcmCache cache, IHCLoadErrorLogger logger) private readonly bool m_noDefaultCompounding; private readonly bool m_notOnClitics; private readonly bool m_acceptUnspecifiedGraphemes; + private readonly string m_strataString; + private readonly IList> m_strata; + private readonly Dictionary m_entryName; private SimpleContext m_any; private CharacterDefinition m_null; @@ -88,12 +92,52 @@ private HCLoader(LcmCache cache, IHCLoadErrorLogger logger) m_noDefaultCompounding = hcElem != null && ((bool?)hcElem.Element("NoDefaultCompounding") ?? false); m_notOnClitics = hcElem == null || ((bool?)hcElem.Element("NotOnClitics") ?? true); m_acceptUnspecifiedGraphemes = hcElem != null && ((bool?)hcElem.Element("AcceptUnspecifiedGraphemes") ?? false); + m_strata = new List>(); + if (hcElem != null && hcElem.Element("Strata") != null) + { + m_strataString = (string)hcElem.Element("Strata"); + m_strata = ParseStrataString(m_strataString); + } + m_entryName = new Dictionary(); m_naturalClasses = new Dictionary(); m_charDefs = new Dictionary(); } - private string[] RemoveDottedCircles(string[] phonemes) + private IList> ParseStrataString(string strataString) + { + // Tokenize strataString based on commas and parentheses. + string[] tokens = Regex.Split(strataString, @"([(,)])") + .Select(sValue => sValue.Trim()) + .Where(s => !string.IsNullOrWhiteSpace(s)) + .ToArray(); + // Group rules into strata based on parentheses. + IList> strata = new List>(); + bool parentheses = false; + foreach (string token in tokens) + { + if (token == "(") + { + parentheses = true; + strata.Add(new List()); + } + else if (token == ")") + { + parentheses = false; + } + else if (token != ",") + { + if (!parentheses) + { + strata.Add(new List()); + } + strata.Last().Add(token); + } + } + return strata; + } + + private string[] RemoveDottedCircles(string[] phonemes) { return phonemes.Select(RemoveDottedCircles).ToArray(); } @@ -164,10 +208,10 @@ private void LoadLanguage() } } - m_morphophonemic = new Stratum(m_table) { Name = "Morphophonemic", MorphologicalRuleOrder = MorphologicalRuleOrder.Unordered }; + m_morphophonemic = new Stratum(m_table) { Name = "Morphology", MorphologicalRuleOrder = MorphologicalRuleOrder.Unordered }; m_language.Strata.Add(m_morphophonemic); - m_clitic = new Stratum(m_table) { Name = "Clitic", MorphologicalRuleOrder = MorphologicalRuleOrder.Unordered }; + m_clitic = new Stratum(m_table) { Name = "Clitics", MorphologicalRuleOrder = MorphologicalRuleOrder.Unordered }; m_language.Strata.Add(m_clitic); m_language.Strata.Add(new Stratum(m_table) { Name = "Surface" }); @@ -289,6 +333,162 @@ private void LoadLanguage() { LoadMorphemeCoOccurrenceRules(morphAdhocProhib); } + + if (m_strata.Count > 0) + { + CreateStrata(); + } + } + + private void CreateStrata() + { + // Replace the default strata of m_morphophonemics and m_clitic with the user-defined strata. + // The phonological rules are stored in m_morphophonemics unless NotOnClitics is false. + Stratum cliticsStratum = null; + Stratum compoundRulesStratum = null; + Stratum morphologyStratum = null; + Stratum phonologyStratum = null; + Stratum templateStratum = null; + foreach (IList stratumRules in m_strata) + { + if (stratumRules.Count == 0) + { + continue; + } + Stratum stratum = new Stratum(m_table) { Name = stratumRules[0], MorphologicalRuleOrder = MorphologicalRuleOrder.Unordered }; + // m_clitic should always be last. + int cliticIndex = m_language.Strata.IndexOf(m_clitic); + m_language.Strata.Insert(cliticIndex, stratum); + foreach (string rule in stratumRules) + { + // Save predefined classes for later. + switch (rule) + { + case "Clitics": + cliticsStratum = stratum; + break; + case "CompoundRules": + compoundRulesStratum = stratum; + break; + case "Morphology": + morphologyStratum = stratum; + break; + case "Phonology": + phonologyStratum = stratum; + break; + case "Templates": + templateStratum = stratum; + break; + default: + { + // Move the given rule to stratum. + bool found = false; + if (MoveRule(rule, m_morphophonemic, stratum)) + found = true; + if (MoveRule(rule, m_clitic, stratum)) + found = true; + if (!found) + m_logger.InvalidStrata(m_strataString, "Unknown rule in Strata: " + rule + "."); + break; + } + } + } + } + + // Process phonology before cliticsStratum and morphologyStratum. + if (phonologyStratum != null) + { + // Move remaining phonological rules to phonologyStratum. + phonologyStratum.PhonologicalRules.AddRange(m_morphophonemic.PhonologicalRules); + phonologyStratum.PhonologicalRules.AddRange(m_clitic.PhonologicalRules); + m_morphophonemic.PhonologicalRules.Clear(); + m_clitic.PhonologicalRules.Clear(); + } + else + { + // Move remaining phonological rules just before clitic stratum. + int cliticIndex = m_language.Strata.IndexOf(m_clitic); + if (cliticIndex > 1) + { + m_language.Strata[cliticIndex - 1].PhonologicalRules.AddRange(m_morphophonemic.PhonologicalRules); + m_morphophonemic.PhonologicalRules.Clear(); + } + } + if (compoundRulesStratum != null) + { + // Move remaining compound rules to compoundRulesStratum. + foreach (IMorphologicalRule rule in m_morphophonemic.MorphologicalRules.ToList()) + { + if (rule is CompoundingRule) + { + compoundRulesStratum.MorphologicalRules.Add(rule); + m_morphophonemic.MorphologicalRules.Remove(rule); + } + } + } + if (templateStratum != null) + { + // Move remaining templates to templateStratum. + templateStratum.AffixTemplates.AddRange(m_morphophonemic.AffixTemplates); + m_morphophonemic.AffixTemplates.Clear(); + } + if (cliticsStratum != null) + { + // Replace m_clitic with cliticsStratum. + MoveRules(m_clitic, cliticsStratum); + } + // Process morphology last. + if (morphologyStratum != null) + { + MoveRules(m_morphophonemic, morphologyStratum); + } + + // Remove empty strata. + foreach (Stratum stratum in m_language.Strata.ToList()) + { + if (stratum.Entries.Count == 0 && + stratum.AffixTemplates.Count == 0 && + stratum.MorphologicalRules.Count == 0 && + stratum.PhonologicalRules.Count == 0) + { + m_language.Strata.Remove(stratum); + } + } + } + + void MoveRules(Stratum source, Stratum target) + { + target.AffixTemplates.AddRange(source.AffixTemplates); + target.Entries.AddRange(source.Entries); + target.MorphologicalRules.AddRange(source.MorphologicalRules); + target.PhonologicalRules.AddRange(source.PhonologicalRules); + m_language.Strata.Remove(source); + } + + private bool MoveRule(string ruleName, Stratum source, Stratum target) + { + bool found = false; + + found |= MoveMatchingItems(source.Entries, target.Entries, entry => m_entryName[entry] == ruleName); + found |= MoveMatchingItems(source.MorphologicalRules, target.MorphologicalRules, rule => rule.Name == ruleName); + found |= MoveMatchingItems(source.PhonologicalRules, target.PhonologicalRules, rule => rule.Name == ruleName); + found |= MoveMatchingItems(source.AffixTemplates, target.AffixTemplates, rule => rule.Name == ruleName); + + return found; + } + + private bool MoveMatchingItems(ICollection source, ICollection target, Func filterFunction) + { + var itemsToMove = source.Where(filterFunction).ToList(); + if (itemsToMove.Count == 0) return false; + + foreach (var item in itemsToMove) + { + target.Add(item); + source.Remove(item); + } + + return true; } private void LoadInflClassMprFeature(IMoInflClass inflClass, MprFeatureGroup inflClassesGroup) @@ -421,12 +621,12 @@ private void LoadLexEntries(Stratum stratum, ILexEntry entry, IList()) - LoadLexEntryOfVariant(stratum, inflType, msa, allos); + LoadLexEntryOfVariant(stratum, inflType, msa, allos, entry.ShortName); } else { ILexSense sense = (ILexSense)component; - LoadLexEntryOfVariant(stratum, inflType, (IMoStemMsa)sense.MorphoSyntaxAnalysisRA, allos); + LoadLexEntryOfVariant(stratum, inflType, (IMoStemMsa)sense.MorphoSyntaxAnalysisRA, allos, entry.ShortName); } } } @@ -434,7 +634,7 @@ private void LoadLexEntries(Stratum stratum, ILexEntry entry, IList()) - LoadLexEntry(stratum, msa, allos); + LoadLexEntry(stratum, msa, allos, entry.ShortName); } private IEnumerable GetInflTypes(ILexEntryRef lexEntryRef) @@ -461,16 +661,17 @@ private IEnumerable GetInflTypes(ILexEntryRef lexEntryRef) } } - private void AddEntry(Stratum stratum, LexEntry hcEntry, IMoMorphSynAnalysis msa) + private void AddEntry(Stratum stratum, LexEntry hcEntry, IMoMorphSynAnalysis msa, string name) { if (hcEntry.Allomorphs.Count > 0) { stratum.Entries.Add(hcEntry); + m_entryName[hcEntry] = name; m_morphemes.GetOrCreate(msa, () => new List()).Add(hcEntry); } } - private void LoadLexEntry(Stratum stratum, IMoStemMsa msa, IList allos) + private void LoadLexEntry(Stratum stratum, IMoStemMsa msa, IList allos, string name) { var hcEntry = new LexEntry(); @@ -509,10 +710,10 @@ private void LoadLexEntry(Stratum stratum, IMoStemMsa msa, IList allos) + private void LoadLexEntryOfVariant(Stratum stratum, ILexEntryInflType inflType, IMoStemMsa msa, IList allos, string name) { var hcEntry = new LexEntry(); @@ -585,7 +786,7 @@ private void LoadLexEntryOfVariant(Stratum stratum, ILexEntryInflType inflType, } } - AddEntry(stratum, hcEntry, msa); + AddEntry(stratum, hcEntry, msa, name); } private RootAllomorph LoadRootAllomorph(IMoStemAllomorph allo, IMoMorphSynAnalysis msa) diff --git a/Src/LexText/ParserCore/HCParser.cs b/Src/LexText/ParserCore/HCParser.cs index cb07448296..ea092e29e1 100644 --- a/Src/LexText/ParserCore/HCParser.cs +++ b/Src/LexText/ParserCore/HCParser.cs @@ -654,6 +654,14 @@ public void InvalidRewriteRule(IPhRegularRule rule, string reason) m_xmlWriter.WriteEndElement(); } + public void InvalidStrata(string strata, string reason) + { + m_xmlWriter.WriteStartElement("LoadError"); + m_xmlWriter.WriteAttributeString("type", "invalid-strata"); + m_xmlWriter.WriteElementString("Reason", reason); + m_xmlWriter.WriteEndElement(); + } + public void OutOfScopeSlot(IMoInflAffixSlot slot, IMoInflAffixTemplate template, string reason) { m_xmlWriter.WriteStartElement("LoadError"); diff --git a/Src/LexText/ParserCore/IHCLoadErrorLogger.cs b/Src/LexText/ParserCore/IHCLoadErrorLogger.cs index b00ff57554..529deff8ef 100644 --- a/Src/LexText/ParserCore/IHCLoadErrorLogger.cs +++ b/Src/LexText/ParserCore/IHCLoadErrorLogger.cs @@ -11,6 +11,7 @@ public interface IHCLoadErrorLogger void InvalidEnvironment(IMoForm form, IPhEnvironment env, string reason, IMoMorphSynAnalysis msa); void InvalidReduplicationForm(IMoForm form, string reason, IMoMorphSynAnalysis msa); void InvalidRewriteRule(IPhRegularRule prule, string reason); + void InvalidStrata(string strata, string reason); void OutOfScopeSlot(IMoInflAffixSlot slot, IMoInflAffixTemplate template, string reason); } } diff --git a/Src/LexText/ParserCore/ParserCoreTests/HCLoaderTests.cs b/Src/LexText/ParserCore/ParserCoreTests/HCLoaderTests.cs index 7f5f039b56..ef49d58733 100644 --- a/Src/LexText/ParserCore/ParserCoreTests/HCLoaderTests.cs +++ b/Src/LexText/ParserCore/ParserCoreTests/HCLoaderTests.cs @@ -42,7 +42,8 @@ private enum LoadErrorType DuplicateGrapheme, InvalidEnvironment, InvalidRedupForm, - InvalidRewriteRule + InvalidRewriteRule, + InvalidStrata } private class TestHCLoadErrorLogger : IHCLoadErrorLogger @@ -86,7 +87,12 @@ public void InvalidReduplicationForm(IMoForm form, string reason, IMoMorphSynAna public void InvalidRewriteRule(IPhRegularRule rule, string reason) { - m_loadErrors.Add(Tuple.Create(LoadErrorType.InvalidRedupForm, (ICmObject) rule)); + m_loadErrors.Add(Tuple.Create(LoadErrorType.InvalidRewriteRule, (ICmObject)rule)); + } + + public void InvalidStrata(string strata, string reason) + { + m_loadErrors.Add(Tuple.Create(LoadErrorType.InvalidStrata, (ICmObject)null)); } public void OutOfScopeSlot(IMoInflAffixSlot slot, IMoInflAffixTemplate template, string reason) diff --git a/Src/LexText/ParserCore/ParserWorker.cs b/Src/LexText/ParserCore/ParserWorker.cs index 524474ca86..acdbc4b19b 100644 --- a/Src/LexText/ParserCore/ParserWorker.cs +++ b/Src/LexText/ParserCore/ParserWorker.cs @@ -33,6 +33,7 @@ using SIL.ObjectModel; using XCore; using SIL.LCModel.DomainServices; +using System.Xml.Linq; namespace SIL.FieldWorks.WordWorks.Parser { @@ -111,7 +112,11 @@ public void TryAWord(string sForm, bool fDoTrace, int[] sSelectTraceMorphs) { // Assume that the user used the correct case. string normForm = CustomIcu.GetIcuNormalizer(FwNormalizationMode.knmNFD).Normalize(sForm); + var stopWatch = System.Diagnostics.Stopwatch.StartNew(); task.Details = fDoTrace ? m_parser.TraceWordXml(normForm, sSelectTraceMorphs) : m_parser.ParseWordXml(normForm); + stopWatch.Stop(); + double seconds = stopWatch.ElapsedMilliseconds / 1000.0; + task.Details.Element("Wordform")?.Add(new XAttribute("parseTime", seconds.ToString("0.000"))); } } diff --git a/Src/LexText/ParserUI/ParserParametersDlg.cs b/Src/LexText/ParserUI/ParserParametersDlg.cs index c9bdcad8a7..a59ffea864 100644 --- a/Src/LexText/ParserUI/ParserParametersDlg.cs +++ b/Src/LexText/ParserUI/ParserParametersDlg.cs @@ -32,6 +32,7 @@ public class ParserParametersDlg : Form private const string NoDefaultCompounding = "NoDefaultCompounding"; private const string AcceptUnspecifiedGraphemes = "AcceptUnspecifiedGraphemes"; private const string GuessRoots = "GuessRoots"; + private const string Strata = "Strata"; private const string XAmple = "XAmple"; private const string MaxNulls = "MaxNulls"; @@ -295,6 +296,7 @@ public void SetDlgInfo(string title, string parserParameters) PopulateDataGrid(m_dataGrid2, HC); m_dataGrid2.TableStyles[0].GridColumnStyles[2].Width = 130; m_dataGrid2.TableStyles[0].GridColumnStyles[3].Width = 160; + m_dataGrid2.TableStyles[0].GridColumnStyles[5].Width = 400; } private void LoadParserData(DataSet dsParserParameters) @@ -317,6 +319,8 @@ private void LoadParserData(DataSet dsParserParameters) hcElem.Add(new XElement(AcceptUnspecifiedGraphemes, false)); if (hcElem.Element(GuessRoots) == null) hcElem.Add(new XElement(GuessRoots, true)); + if (hcElem.Element(Strata) == null) + hcElem.Add(new XElement(Strata, "")); using (XmlReader reader = parserParamsElem.CreateReader()) dsParserParameters.ReadXml(reader, XmlReadMode.IgnoreSchema); @@ -331,6 +335,14 @@ private void PopulateDataGrid(DataGrid dataGrid, string parser) dataGrid.TableStyles.Add(new DataGridTableStyle { MappingName = parser, RowHeadersVisible = false, AllowSorting = false }); foreach (DataGridBoolColumn col in dataGrid.TableStyles[0].GridColumnStyles.OfType()) col.AllowNull = false; + foreach (DataGridTextBoxColumn col in dataGrid.TableStyles[0].GridColumnStyles.OfType()) + { + TextBox textBox1 = col.TextBox; + textBox1.Multiline = true; + textBox1.ScrollBars = ScrollBars.Vertical; + textBox1.WordWrap = true; + dataGrid.TableStyles[0].PreferredRowHeight = 50; + } } private DataView CreateDataView(DataTable table) @@ -359,6 +371,7 @@ private DataTable CreateHCDataTable() tblHC.Columns.Add(NoDefaultCompounding, typeof(bool)); tblHC.Columns.Add(AcceptUnspecifiedGraphemes, typeof(bool)); tblHC.Columns.Add(GuessRoots, typeof(bool)); + tblHC.Columns.Add(Strata, typeof(string)); return tblHC; } } diff --git a/Src/LexText/ParserUI/ParserParametersDlg.resx b/Src/LexText/ParserUI/ParserParametersDlg.resx index bf2bec05f7..794d380c93 100644 --- a/Src/LexText/ParserUI/ParserParametersDlg.resx +++ b/Src/LexText/ParserUI/ParserParametersDlg.resx @@ -174,7 +174,7 @@ 6 - 347, 253 + 347, 353 75, 23 @@ -198,7 +198,7 @@ 5 - 428, 253 + 428, 353 75, 23 @@ -246,7 +246,7 @@ 3 - 509, 253 + 509, 353 75, 23 @@ -276,7 +276,7 @@ 8, 165 - 576, 72 + 976, 100 8 @@ -327,7 +327,7 @@ 5, 13 - 594, 288 + 1000, 388 diff --git a/Src/Transforms/Presentation/FormatHCTrace.xsl b/Src/Transforms/Presentation/FormatHCTrace.xsl index d490b37dcc..a431d934ea 100644 --- a/Src/Transforms/Presentation/FormatHCTrace.xsl +++ b/Src/Transforms/Presentation/FormatHCTrace.xsl @@ -80,6 +80,11 @@ Main template . +

+ Parse time: + + seconds. +

diff --git a/Src/Transforms/Presentation/FormatXAmpleParse.xsl b/Src/Transforms/Presentation/FormatXAmpleParse.xsl index 8b76be582e..fd0359cfcf 100644 --- a/Src/Transforms/Presentation/FormatXAmpleParse.xsl +++ b/Src/Transforms/Presentation/FormatXAmpleParse.xsl @@ -75,6 +75,11 @@ Main template . +

+ Parse time: + + seconds. +

diff --git a/Src/Transforms/Presentation/FormatXAmpleTrace.xsl b/Src/Transforms/Presentation/FormatXAmpleTrace.xsl index 4385a34eee..923a6fb10b 100644 --- a/Src/Transforms/Presentation/FormatXAmpleTrace.xsl +++ b/Src/Transforms/Presentation/FormatXAmpleTrace.xsl @@ -118,6 +118,11 @@ Main template . +

+ Parse time: + + milliseconds. +

diff --git a/Src/XCore/xWindow.cs b/Src/XCore/xWindow.cs index 50e1e78791..1f2bcc8afa 100644 --- a/Src/XCore/xWindow.cs +++ b/Src/XCore/xWindow.cs @@ -1929,6 +1929,8 @@ public bool OnCloseWindow(object sender) { CheckDisposed(); + if (Mediator != null) + Mediator.SendMessage("StopParser", null); this.Close(); return true;