add a generate option to fill in TODOs for everything missing

bugzilla · May 3, 2024 · 434d93b · 434d93b
1 parent 9a2ad3d
commit 434d93b
Show file tree

Hide file tree

Showing 2 changed files with 317 additions and 16 deletions.
diff --git a/make_schema_doc.py b/make_schema_doc.py
@@ -22,15 +22,19 @@
 import time
 import sys
 import argparse
+from pprint import pprint, pformat
+from black import FileMode, format_str
 
 import schema_remarks
 import get_schema
 
 
 class BzSchemaProcessingException(Exception):
-    def __init__(self, message):
+    def __init__(self, errors):
+        message = 'foobar' #'<br />\n'.join(errors)
         super().__init__(message)
         self.message = message
+        self.errors = errors
 
     def __str__(self):
         return '\n' + self.message
@@ -768,27 +772,29 @@ def get_versioned_tables(first, last):
     global errors
     errors = []
     if not first in schema_remarks.version_order:
-        raise BzSchemaProcessingException("I don't know about version '%s'." % last)
+        raise BzSchemaProcessingException([f"I don't know about version '{last}'."])
     if not last in schema_remarks.version_order:
-        raise BzSchemaProcessingException("I don't know about version '%s'." % last)
+        raise BzSchemaProcessingException([f"I don't know about version '{last}'."])
     if not (
         schema_remarks.version_order.index(last)
         >= schema_remarks.version_order.index(first)
     ):
         raise BzSchemaProcessingException(
-            "Version '%s' comes before version '%s'." % (last, first)
+            [f"Version '{last}' comes before version '{first}'."]
         )
     colours = {}
     tr = {}
     if not first in schema_remarks.version_schema_map:
         raise BzSchemaProcessingException(
-            "I know version '%s' exists, but I seem to be missing the data for it."
-            % last
+            [
+                f"I know version '{first}' exists, but I seem to be missing the data for it."
+            ]
         )
     if not last in schema_remarks.version_schema_map:
         raise BzSchemaProcessingException(
-            "I know version '%s' exists, but I seem to be missing the data for it."
-            % last
+            [
+                f"I know version '{last}' exists, but I seem to be missing the data for it."
+            ]
         )
     schema_name = schema_remarks.version_schema_map[first]
     schema, errors = get_schema.get_schema(schema_name, errors)
@@ -811,8 +817,7 @@ def get_versioned_tables(first, last):
     # if we have errors at this point, it's fatal, there's no point
     # in letting make_versioned_schema spew a ton more of them.
     if errors:
-        e = str.join('<br/>\n', errors)
-        raise BzSchemaProcessingException(e)
+        raise BzSchemaProcessingException(errors)
     schema = make_versioned_schema(schemas, colours, tr)
     stringify_schema(schema)
     return (schema, tr, colours, tuple(bugzilla_versions), errors)
@@ -864,8 +869,7 @@ def make_tables(first, last):
     header = process(schema_remarks.header, bv, dict)
     footer = process(schema_remarks.footer, bv, dict)
     if errors:
-        e = str.join('<br/>\n', errors)
-        raise BzSchemaProcessingException(e)
+        raise BzSchemaProcessingException(errors)
     return (header, body, footer)
 
 def make_body(first, last):
@@ -894,14 +898,126 @@ def test_schema_remarks(args):
         try:
             (header, body, footer) = make_tables(first, last)
         except BzSchemaProcessingException as e:
-            message = e.message
-            message = message.replace('<br/>', '')
-            print(message)
+            print('\n'.join(e.errors))
             sys.exit()
         print("Succeeded!")
 
 
-def validate_schema_remarks(*args, **kwargs):
+class regex_in:
+    string: str
+    match: re.Match = None
+
+    def __init__(self, thestring):
+        self.string = thestring
+
+    def __eq__(self, other: str | re.Pattern):
+        if isinstance(other, str):
+            other = re.compile(other)
+        assert isinstance(other, re.Pattern)
+        # TODO extend for search and match variants
+        self.match = other.fullmatch(self.string)
+        return self.match is not None
+
+    def __getitem__(self, group):
+        return self.match[group]
+
+
+# Note the `as m` in in the case specification
+# match regex_in(validated_string):
+#    case r'\d(\d)' as m:
+#        print(f'The second digit is {m[1]}')
+#        print(f'The whole match is {m.match}')
+
+
+def generate_schema_remarks(args):
+    first = args.first
+    last = args.last
+    if last == None:
+        last = first
+    print(f"generating missing remarks for {first} .. {last}")
+    try:
+        (header, body, footer) = make_tables(first, last)
+    except BzSchemaProcessingException as e:
+        for error in e.errors:
+            match regex_in(error):
+                case r"No column remarks for table '(\S+)'\." as m:
+                    if not m[1] in schema_remarks.table_remark:
+                        schema_remarks.table_remark[m[1]] = 'TODO'
+                    schema_remarks.column_remark[m[1]] = {}
+                case r"Table '(\S+)' has no remark for column '(\S+)'\." as m:
+                    schema_remarks.column_remark[m[1]][m[2]] = 'TODO'
+                case r"No index remarks for table '(\S+)'\." as m:
+                    schema_remarks.index_remark[m[1]] = {}
+                case r"Table '(\S+)' has no remark for index '(\S+)'\." as m:
+                    schema_remarks.index_remark[m[1]][m[2]] = 'TODO'
+                case _:
+                    print(f"Unhandled error: {error}")
+        # pprint(schema_remarks.column_remark)
+        if first != last:
+            # we're comparing two versions, run it a second time to catch added/removed
+            try:
+                (header, body, footer) = make_tables(first, last)
+            except BzSchemaProcessingException as e:
+                for error in e.errors:
+                    match regex_in(error):
+                        case r"No remark to add table (\S+)" as m:
+                            schema_remarks.table_added_remark[m[1]] = 'TODO'
+                        case r"No remark to remove table (\S+)" as m:
+                            schema_remarks.table_removed_remark[m[1]] = 'TODO'
+                        case r"No remark to add column (\S+)\.(\S+)\." as m:
+                            if not m[1] in schema_remarks.column_added_remark:
+                                schema_remarks.column_added_remark[m[1]] = {}
+                            schema_remarks.column_added_remark[m[1]][m[2]] = 'TODO'
+                        case r"No remark to remove column (\S+)\.(\S+)\." as m:
+                            if not m[1] in schema_remarks.column_removed_remark:
+                                schema_remarks.column_removed_remark[m[1]] = {}
+                            schema_remarks.column_removed_remark[m[1]][m[2]] = 'TODO'
+                        case r"No remark to add index (\S+):(\S+)\." as m:
+                            if not m[1] in schema_remarks.index_added_remark:
+                                schema_remarks.index_added_remark[m[1]] = {}
+                            schema_remarks.index_added_remark[m[1]][m[2]] = 'TODO'
+                        case r"No remark to remove index (\S+):(\S+)\." as m:
+                            if not m[1] in schema_remarks.index_removed_remark:
+                                schema_remarks.index_removed_remark[m[1]] = {}
+                            schema_remarks.index_removed_remark[m[1]][m[2]] = 'TODO'
+                        case _:
+                            print(f"Unhandled error: {error}")
+        var_dict = {
+            'version_order': format_str('version_order = %s' % pformat(schema_remarks.version_order), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'default_first_version': format_str('default_first_version = %s' % pformat(schema_remarks.default_first_version), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'default_last_version': format_str('default_last_version = %s' % pformat(schema_remarks.default_last_version), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'version_schema_map': format_str('version_schema_map = %s' % pformat(schema_remarks.version_schema_map), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'version_remark': format_str('version_remark = %s' % pformat(schema_remarks.version_remark), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'table_remark': format_str('table_remark = %s' % pformat(schema_remarks.table_remark), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'table_added_remark': format_str('table_added_remark = %s' % pformat(schema_remarks.table_added_remark), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'table_removed_remark': format_str('table_removed_remark = %s' % pformat(schema_remarks.table_removed_remark), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'column_remark': format_str('column_remark = %s' % pformat(schema_remarks.column_remark), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'column_renamed': format_str('column_renamed = %s' % pformat(schema_remarks.column_renamed), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'column_added_remark': format_str('column_added_remark = %s' % pformat(schema_remarks.column_added_remark), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'column_removed_remark': format_str('column_removed_remark = %s' % pformat(schema_remarks.column_removed_remark), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'index_remark': format_str('index_remark = %s' % pformat(schema_remarks.index_remark), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'index_renamed': format_str('index_renamed = %s' % pformat(schema_remarks.index_renamed), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'index_removed_remark': format_str('index_removed_remark = %s' % pformat(schema_remarks.index_removed_remark), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'index_added_remark': format_str('index_added_remark = %s' % pformat(schema_remarks.index_added_remark), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'notation_guide': format_str('notation_guide = %s' % pformat(schema_remarks.notation_guide), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'header': format_str('header = %s' % pformat(schema_remarks.header), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'footer': format_str('footer = %s' % pformat(schema_remarks.footer), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'prelude': format_str('prelude = %s' % pformat(schema_remarks.prelude), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+            'afterword': format_str('afterword = %s' % pformat(schema_remarks.afterword), mode=FileMode(string_normalization=False,experimental_string_processing=True)),
+        }
+
+        with open('schema_remarks_template.txt', 'r') as infile:
+            template = infile.read()
+        with open('schema_remarks_new.py', 'w') as outfile:
+            output = template.format(**var_dict)
+            outfile.write(output)
+        print("Wrote changes to schema_remarks_new.py.")
+        print("diff the changes from schema_remarks.py and if you like them, move it overtop of it.")
+        sys.exit()
+    print("No changes detected.")
+
+
+def validate_schema_remarks(args):
     for v in schema_remarks.version_order:
         if not v in schema_remarks.version_schema_map:
             errors.append(
@@ -979,6 +1095,26 @@ def validate_schema_remarks(*args, **kwargs):
         help="A file to write the generated schema doc to. Passing - will write it to standard out.",
     )
     parser_test.set_defaults(func=test_schema_remarks)
+    parser_generate = subparsers.add_parser(
+        'generate',
+        help='Add all of the missing remarks from a new schema to schema_remarks.py for you as TODO items.',
+        description='Add all of the missing remarks from a new schema to schema_remarks.py for you to as TODO items. Saves you the trouble of searching through the massive file looking for the right spot in alphabetical order to put them. Two benefits: gets a schema live faster (just without things documented), and you can search for TODO in the file to find the things that need updating.',
+    )
+    parser_generate.add_argument(
+        'first',
+        metavar="first",
+        choices=schema_remarks.version_order,
+        help="The starting version of the schemas to compare, or the single version to display if 'last' is not provided.",
+    )
+    parser_generate.add_argument(
+        'last',
+        metavar="last",
+        choices=schema_remarks.version_order,
+        nargs="?",
+        default=None,
+        help="The destination version of the schema to compare",
+    )
+    parser_generate.set_defaults(func=generate_schema_remarks)
     args = parser.parse_args()
     args.func(args)