From 46423da28a23f1b03e6024f2ba6c0863d408349e Mon Sep 17 00:00:00 2001
From: Dmitri Prime <bolms@google.com>
Date: Fri, 11 Oct 2024 15:58:12 -0700
Subject: [PATCH] Emit a warning when the cached parser is not used. (#204)

This change checks to see if the cached parser was discarded due to a
mismatch between the cached parser and the grammar specified in
module_ir.py, and, if so, emits a warning that the cached parser was not
used, along with informational messages on the nature of the mismatch.

Adjusted the "warning" color from magenta to yellow.  (This is the first
warning in Emboss, so no magenta messages would have ever been emitted.)

Adjusted the "note" color from "bright black" (dark grey) to "white"
(light grey), becaused at least some terminals display "bright black"
as just black.
---
 compiler/front_end/emboss_front_end.py | 30 +++++++++++++++
 compiler/front_end/parser.py           | 51 +++++++++++++++++++++-----
 compiler/util/error.py                 |  4 +-
 compiler/util/error_test.py            | 15 ++++----
 4 files changed, 80 insertions(+), 20 deletions(-)
diff --git a/compiler/front_end/emboss_front_end.py b/compiler/front_end/emboss_front_end.py
index 269cc34..c81ac45 100644
--- a/compiler/front_end/emboss_front_end.py
+++ b/compiler/front_end/emboss_front_end.py
@@ -29,6 +29,7 @@
 
 from compiler.front_end import glue
 from compiler.front_end import module_ir
+from compiler.front_end import parser
 from compiler.util import error
 from compiler.util import ir_data_utils
 
@@ -151,6 +152,34 @@ def _find_and_read(file_name):
     return _find_and_read
 
 
+def _warn_if_cached_parser_is_mismatched(color_output):
+    cached_parser_mismatch = parser.module_parser_cache_mismatch()
+    extra_production_notes = [
+        error.note("<internal>", None, f"New production {prod}")
+        for prod in cached_parser_mismatch[1]
+    ]
+    missing_production_notes = [
+        error.note("<internal>", None, f"Missing production {prod}")
+        for prod in cached_parser_mismatch[0]
+    ]
+    if extra_production_notes or missing_production_notes:
+        _show_errors(
+            [
+                [
+                    error.warn(
+                        "<internal>",
+                        None,
+                        "Cached parser does not match actual grammar; using newly-generated parser.",
+                    )
+                ]
+                + extra_production_notes
+                + missing_production_notes
+            ],
+            None,
+            color_output,
+        )
+
+
 def parse_and_log_errors(input_file, import_dirs, color_output, stop_before_step=None):
     """Fully parses an .emb and logs any errors.
 
@@ -162,6 +191,7 @@ def parse_and_log_errors(input_file, import_dirs, color_output, stop_before_step
     Returns:
       (ir, debug_info, errors)
     """
+    _warn_if_cached_parser_is_mismatched(color_output)
     ir, debug_info, errors = glue.parse_emboss_file(
         input_file,
         _find_in_dirs_and_read(import_dirs),
diff --git a/compiler/front_end/parser.py b/compiler/front_end/parser.py
index a7d5826..be95656 100644
--- a/compiler/front_end/parser.py
+++ b/compiler/front_end/parser.py
@@ -14,6 +14,8 @@
 
 """Routines to load a shift-reduce parser for the module_ir module."""
 
+import collections
+
 from compiler.front_end.generated import cached_parser
 from compiler.front_end import lr1
 from compiler.front_end import make_parser
@@ -21,32 +23,61 @@
 from compiler.util import parser_types
 from compiler.util import simple_memoizer
 
+ParserAndIsCached = collections.namedtuple(
+    "ParserAndIsCached",
+    [
+        "parser",
+        "cache_mismatch",
+    ],
+)
+
 
 @simple_memoizer.memoize
 def _load_module_parser():
     module_parser = cached_parser.module_parser()
-    if module_parser.productions == set(module_ir.PRODUCTIONS) | {
+    module_ir_productions = set(module_ir.PRODUCTIONS) | {
         parser_types.Production(lr1.START_PRIME, (module_ir.START_SYMBOL,))
-    }:
-        return module_parser
-    return make_parser.build_module_parser()
+    }
+    if module_parser.productions == module_ir_productions:
+        return ParserAndIsCached(module_parser, (set(), set()))
+    return ParserAndIsCached(
+        make_parser.build_module_parser(),
+        (
+            module_parser.productions - module_ir_productions,
+            module_ir_productions - module_parser.productions,
+        ),
+    )
 
 
 @simple_memoizer.memoize
 def _load_expression_parser():
     expression_parser = cached_parser.expression_parser()
-    if expression_parser.productions == set(module_ir.PRODUCTIONS) | {
+    module_ir_productions = set(module_ir.PRODUCTIONS) | {
         parser_types.Production(lr1.START_PRIME, (module_ir.EXPRESSION_START_SYMBOL,))
-    }:
-        return expression_parser
-    return make_parser.build_expression_parser()
+    }
+    if expression_parser.productions == module_ir_productions:
+        return ParserAndIsCached(
+            expression_parser,
+            (set(), set()),
+        )
+    return ParserAndIsCached(
+        make_parser.build_expression_parser(),
+        (
+            expression_parser.productions - module_ir_productions,
+            module_ir_productions - expression_parser.productions,
+        ),
+    )
+
+
+def module_parser_cache_mismatch():
+    return _load_module_parser().cache_mismatch
 
 
 def parse_module(tokens):
     """Parses the provided Emboss token list into an Emboss module parse tree."""
-    return _load_module_parser().parse(tokens)
+    return _load_module_parser().parser.parse(tokens)
 
 
 def parse_expression(tokens):
     """Parses the provided Emboss token list into an expression parse tree."""
-    return _load_expression_parser().parse(tokens)
+    return _load_expression_parser().parser.parse(tokens)
diff --git a/compiler/util/error.py b/compiler/util/error.py
index a22fa4a..c580520 100644
--- a/compiler/util/error.py
+++ b/compiler/util/error.py
@@ -131,8 +131,8 @@ def format(self, source_code):
         #     messages.
         severity_colors = {
             ERROR: (BRIGHT_RED, BOLD),
-            WARNING: (BRIGHT_MAGENTA, BOLD),
-            NOTE: (BRIGHT_BLACK, WHITE),
+            WARNING: (BRIGHT_YELLOW, BOLD),
+            NOTE: (WHITE, WHITE),
         }
 
         result = []
diff --git a/compiler/util/error_test.py b/compiler/util/error_test.py
index 23beddd..45f9920 100644
--- a/compiler/util/error_test.py
+++ b/compiler/util/error_test.py
@@ -201,10 +201,10 @@ def test_multiline_error(self):
                 (error.BRIGHT_RED, "error: "),  # Severity
                 (error.BOLD, "Bad thing\n"),  # Message
                 (error.BOLD, "foo.emb:3:4: "),  # Location, line 2
-                (error.BRIGHT_BLACK, "note: "),  # "Note" severity, line 2
+                (error.WHITE, "note: "),  # "Note" severity, line 2
                 (error.WHITE, "Some explanation\n"),  # Message, line 2
                 (error.BOLD, "foo.emb:3:4: "),  # Location, line 3
-                (error.BRIGHT_BLACK, "note: "),  # "Note" severity, line 3
+                (error.WHITE, "note: "),  # "Note" severity, line 3
                 (error.WHITE, "More explanation"),  # Message, line 3
             ],
             sourceless_format,
@@ -223,10 +223,10 @@ def test_multiline_error(self):
                 (error.BRIGHT_RED, "error: "),  # Severity
                 (error.BOLD, "Bad thing\n"),  # Message
                 (error.BOLD, "foo.emb:3:4: "),  # Location, line 2
-                (error.BRIGHT_BLACK, "note: "),  # "Note" severity, line 2
+                (error.WHITE, "note: "),  # "Note" severity, line 2
                 (error.WHITE, "Some explanation\n"),  # Message, line 2
                 (error.BOLD, "foo.emb:3:4: "),  # Location, line 3
-                (error.BRIGHT_BLACK, "note: "),  # "Note" severity, line 3
+                (error.WHITE, "note: "),  # "Note" severity, line 3
                 (error.WHITE, "More explanation\n"),  # Message, line 3
                 (error.WHITE, "abcdefghijklm\n"),  # Source snippet
                 (error.BRIGHT_GREEN, "   ^^"),  # Column indicator
@@ -252,7 +252,7 @@ def test_warn(self):
         self.assertEqual(
             [
                 (error.BOLD, "foo.emb:3:4: "),  # Location
-                (error.BRIGHT_MAGENTA, "warning: "),  # Severity
+                (error.BRIGHT_YELLOW, "warning: "),  # Severity
                 (error.BOLD, "Not good thing\n"),  # Message
                 (error.WHITE, "abcdefghijklm\n"),  # Source snippet
                 (error.BRIGHT_GREEN, "   ^^"),  # Column indicator
@@ -278,7 +278,7 @@ def test_note(self):
         self.assertEqual(
             [
                 (error.BOLD, "foo.emb:3:4: "),  # Location
-                (error.BRIGHT_BLACK, "note: "),  # Severity
+                (error.WHITE, "note: "),  # Severity
                 (error.WHITE, "OK thing\n"),  # Message
                 (error.WHITE, "abcdefghijklm\n"),  # Source snippet
                 (error.BRIGHT_GREEN, "   ^^"),  # Column indicator
@@ -457,13 +457,12 @@ def test_format_errors(self):
         bold = error.BOLD
         reset = error.RESET
         white = error.WHITE
-        bright_black = error.BRIGHT_BLACK
         bright_green = error.BRIGHT_GREEN
         self.assertEqual(
             bold
             + "foo.emb:3:4: "
             + reset
-            + bright_black
+            + white
             + "note: "
             + reset
             + white