From 87d927d7e98c76e825a1a550cd1943c90a7c702c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 9 Nov 2013 22:05:05 +0530 Subject: [PATCH] Improved regex module for python Taken from: https://code.google.com/p/mrab-regex-hg/ --- COPYRIGHT | 4 + setup/extensions.py | 40 +- src/calibre/constants.py | 1 + src/calibre/test_build.py | 7 + src/regex/README | 5 + src/regex/__init__.py | 678 ++ src/regex/_regex.c | 19957 +++++++++++++++++++++++++++++++++++ src/regex/_regex.h | 228 + src/regex/_regex_core.py | 4004 +++++++ src/regex/_regex_unicode.c | 11997 +++++++++++++++++++++ src/regex/_regex_unicode.h | 220 + 11 files changed, 37123 insertions(+), 18 deletions(-) create mode 100644 src/regex/README create mode 100644 src/regex/__init__.py create mode 100644 src/regex/_regex.c create mode 100644 src/regex/_regex.h create mode 100644 src/regex/_regex_core.py create mode 100644 src/regex/_regex_unicode.c create mode 100644 src/regex/_regex_unicode.h diff --git a/COPYRIGHT b/COPYRIGHT index a5763841f321..b4d1160cada8 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -34,6 +34,10 @@ License: LGPL-2.1 The full text of the LGPL is distributed as in /usr/share/common-licenses/LGPL-2.1 on Debian systems. +Files: srx/regex/* +Copyright: Matthew Barnett +License: Python Software Foundation License + Files: src/calibre/ebooks/hyphenate.py Copyright: Copyright (C) 1990, 2004, 2005 Gerard D.C. Kuiken. License: other diff --git a/setup/extensions.py b/setup/extensions.py index 3b4a37a8063a..1ac6570543b6 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -30,7 +30,6 @@ def absolutize(self, paths): return list(set([x if os.path.isabs(x) else os.path.join(SRC, x.replace('/', os.sep)) for x in paths])) - def __init__(self, name, sources, **kwargs): self.name = name self.needs_cxx = bool([1 for x in sources if os.path.splitext(x)[1] in @@ -67,10 +66,15 @@ def preflight(self, obj_dir, compiler, linker, builder, cflags, ldflags): icu_libs = ['icudt', 'icuin', 'icuuc', 'icuio'] if isosx: icu_libs = ['icucore'] - icu_cflags = ['-DU_DISABLE_RENAMING'] # Needed to use system libicucore.dylib + icu_cflags = ['-DU_DISABLE_RENAMING'] # Needed to use system libicucore.dylib extensions = [ + Extension('_regex', + ['regex/_regex.c', 'regex/_regex_unicode.c'], + headers=['regex/_regex.h'] + ), + Extension('speedup', ['calibre/utils/speedup.c'], ), @@ -127,7 +131,7 @@ def preflight(self, obj_dir, compiler, linker, builder, cflags, ldflags): Extension('freetype', ['calibre/utils/fonts/freetype.cpp'], - inc_dirs = ft_inc_dirs, + inc_dirs=ft_inc_dirs, libraries=ft_libs, lib_dirs=ft_lib_dirs), @@ -171,23 +175,23 @@ def preflight(self, obj_dir, compiler, linker, builder, cflags, ldflags): Extension('pictureflow', ['calibre/gui2/pictureflow/pictureflow.cpp'], - inc_dirs = ['calibre/gui2/pictureflow'], - headers = ['calibre/gui2/pictureflow/pictureflow.h'], - sip_files = ['calibre/gui2/pictureflow/pictureflow.sip'] + inc_dirs=['calibre/gui2/pictureflow'], + headers=['calibre/gui2/pictureflow/pictureflow.h'], + sip_files=['calibre/gui2/pictureflow/pictureflow.sip'] ), Extension('progress_indicator', ['calibre/gui2/progress_indicator/QProgressIndicator.cpp'], - inc_dirs = ['calibre/gui2/progress_indicator'], - headers = ['calibre/gui2/progress_indicator/QProgressIndicator.h'], - sip_files = ['calibre/gui2/progress_indicator/QProgressIndicator.sip'] + inc_dirs=['calibre/gui2/progress_indicator'], + headers=['calibre/gui2/progress_indicator/QProgressIndicator.h'], + sip_files=['calibre/gui2/progress_indicator/QProgressIndicator.sip'] ), Extension('qt_hack', ['calibre/ebooks/pdf/render/qt_hack.cpp'], - inc_dirs = qt_private_inc + ['calibre/ebooks/pdf/render', 'qt-harfbuzz/src'], - headers = ['calibre/ebooks/pdf/render/qt_hack.h'], - sip_files = ['calibre/ebooks/pdf/render/qt_hack.sip'] + inc_dirs=qt_private_inc + ['calibre/ebooks/pdf/render', 'qt-harfbuzz/src'], + headers=['calibre/ebooks/pdf/render/qt_hack.h'], + sip_files=['calibre/ebooks/pdf/render/qt_hack.sip'] ), Extension('unrar', @@ -200,7 +204,7 @@ def preflight(self, obj_dir, compiler, linker, builder, cflags, ldflags): volume.o list.o find.o unpack.o cmddata.o filestr.o scantree.o '''.split()] + ['calibre/utils/unrar.cpp'], inc_dirs=['unrar'], - cflags = [('/' if iswindows else '-') + x for x in ( + cflags=[('/' if iswindows else '-') + x for x in ( 'DSILENT', 'DRARDLL', 'DUNRAR')] + ( [] if iswindows else ['-D_FILE_OFFSET_BITS=64', '-D_LARGEFILE_SOURCE']), @@ -436,9 +440,9 @@ def build(self, ext, dest): if iswindows: #manifest = dest+'.manifest' #cmd = [MT, '-manifest', manifest, '-outputresource:%s;2'%dest] - #self.info(*cmd) - #self.check_call(cmd) - #os.remove(manifest) + # self.info(*cmd) + # self.check_call(cmd) + # os.remove(manifest) for x in ('.exp', '.lib'): x = os.path.splitext(dest)[0]+x if os.path.exists(x): @@ -487,7 +491,7 @@ def path(x): "style/windowmanager.cpp", ] if not iswindows and not isosx: - headers.append( "style/shadowhelper.h") + headers.append("style/shadowhelper.h") sources.append('style/shadowhelper.cpp') pro = textwrap.dedent(''' @@ -586,7 +590,7 @@ def build_pyqt_extension(self, ext, dest): sbf = self.j(src_dir, self.b(sipf)+'.sbf') if self.newer(sbf, [sipf]+ext.headers): exe = '.exe' if iswindows else '' - cmd = [pyqt.sip_bin+exe, '-w', '-c', src_dir, '-b', sbf, '-I'+\ + cmd = [pyqt.sip_bin+exe, '-w', '-c', src_dir, '-b', sbf, '-I'+ pyqt.pyqt_sip_dir] + shlex.split(pyqt.pyqt_sip_flags) + [sipf] self.info(' '.join(cmd)) self.check_call(cmd) diff --git a/src/calibre/constants.py b/src/calibre/constants.py index fb7da67e9f49..df78af205181 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -139,6 +139,7 @@ def __init__(self): 'woff', 'unrar', 'qt_hack', + '_regex' ] if iswindows: plugins.extend(['winutil', 'wpd', 'winfonts']) diff --git a/src/calibre/test_build.py b/src/calibre/test_build.py index c0e99a5c43a1..51759044270e 100644 --- a/src/calibre/test_build.py +++ b/src/calibre/test_build.py @@ -15,6 +15,12 @@ import cStringIO from calibre.constants import plugins, iswindows +def test_regex(): + import regex + if regex.findall(r'(?i)(a)(b)', 'ab cd AB 1a1b') != [('a', 'b'), ('A', 'B')]: + raise ValueError('regex module failed on a simple search') + print ('regex OK!') + def test_html5lib(): import html5lib.html5parser # noqa from html5lib import parse # noqa @@ -119,6 +125,7 @@ def test(): test_woff() test_qt() test_html5lib() + test_regex() if iswindows: test_winutil() test_wpd() diff --git a/src/regex/README b/src/regex/README new file mode 100644 index 000000000000..87069c018280 --- /dev/null +++ b/src/regex/README @@ -0,0 +1,5 @@ +This regex engine is taken, with thanks, from: https://code.google.com/p/mrab-regex-hg/ + +It is licensed under the Python Software Foundation License + +Author: Matthew Barnett diff --git a/src/regex/__init__.py b/src/regex/__init__.py new file mode 100644 index 000000000000..76825e00483a --- /dev/null +++ b/src/regex/__init__.py @@ -0,0 +1,678 @@ +# +# Secret Labs' Regular Expression Engine +# +# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. +# +# This version of the SRE library can be redistributed under CNRI's +# Python 1.6 license. For any other use, please contact Secret Labs +# AB (info@pythonware.com). +# +# Portions of this engine have been developed in cooperation with +# CNRI. Hewlett-Packard provided funding for 1.6 integration and +# other compatibility work. +# +# 2010-01-16 mrab Python front-end re-written and extended + +r"""Support for regular expressions (RE). + +This module provides regular expression matching operations similar to those +found in Perl. It supports both 8-bit and Unicode strings; both the pattern and +the strings being processed can contain null bytes and characters outside the +US ASCII range. + +Regular expressions can contain both special and ordinary characters. Most +ordinary characters, like "A", "a", or "0", are the simplest regular +expressions; they simply match themselves. You can concatenate ordinary +characters, so last matches the string 'last'. + +There are a few differences between the old (legacy) behaviour and the new +(enhanced) behaviour, which are indicated by VERSION0 or VERSION1. + +The special characters are: + "." Matches any character except a newline. + "^" Matches the start of the string. + "$" Matches the end of the string or just before the + newline at the end of the string. + "*" Matches 0 or more (greedy) repetitions of the preceding + RE. Greedy means that it will match as many repetitions + as possible. + "+" Matches 1 or more (greedy) repetitions of the preceding + RE. + "?" Matches 0 or 1 (greedy) of the preceding RE. + *?,+?,?? Non-greedy versions of the previous three special + characters. + *+,++,?+ Possessive versions of the previous three special + characters. + {m,n} Matches from m to n repetitions of the preceding RE. + {m,n}? Non-greedy version of the above. + {m,n}+ Possessive version of the above. + {...} Fuzzy matching constraints. + "\\" Either escapes special characters or signals a special + sequence. + [...] Indicates a set of characters. A "^" as the first + character indicates a complementing set. + "|" A|B, creates an RE that will match either A or B. + (...) Matches the RE inside the parentheses. The contents are + captured and can be retrieved or matched later in the + string. + (?flags-flags) VERSION1: Sets/clears the flags for the remainder of + the group or pattern; VERSION0: Sets the flags for the + entire pattern. + (?:...) Non-capturing version of regular parentheses. + (?>...) Atomic non-capturing version of regular parentheses. + (?flags-flags:...) Non-capturing version of regular parentheses with local + flags. + (?P...) The substring matched by the group is accessible by + name. + (?...) The substring matched by the group is accessible by + name. + (?P=name) Matches the text matched earlier by the group named + name. + (?#...) A comment; ignored. + (?=...) Matches if ... matches next, but doesn't consume the + string. + (?!...) Matches if ... doesn't match next. + (?<=...) Matches if preceded by .... + (? Matches the text matched by the group named name. + \G Matches the empty string, but only at the position where + the search started. + \L Named list. The list is provided as a keyword argument. + \m Matches the empty string, but only at the start of a word. + \M Matches the empty string, but only at the end of a word. + \n Matches the newline character. + \N{name} Matches the named character. + \p{name=value} Matches the character if its property has the specified + value. + \P{name=value} Matches the character if its property hasn't the specified + value. + \r Matches the carriage-return character. + \s Matches any whitespace character; equivalent to + [ \t\n\r\f\v]. + \S Matches any non-whitespace character; equivalent to [^\s]. + \t Matches the tab character. + \uXXXX Matches the Unicode codepoint with 4-digit hex code XXXX. + \UXXXXXXXX Matches the Unicode codepoint with 8-digit hex code + XXXXXXXX. + \v Matches the vertical tab character. + \w Matches any alphanumeric character; equivalent to + [a-zA-Z0-9_] when matching a bytestring or a Unicode string + with the ASCII flag, or the whole range of Unicode + alphanumeric characters (letters plus digits plus + underscore) when matching a Unicode string. With LOCALE, it + will match the set [0-9_] plus characters defined as + letters for the current locale. + \W Matches the complement of \w; equivalent to [^\w]. + \xXX Matches the character with 2-digit hex code XX. + \X Matches a grapheme. + \Z Matches only at the end of the string. + \\ Matches a literal backslash. + +This module exports the following functions: + match Match a regular expression pattern at the beginning of a string. + fullmatch Match a regular expression pattern against all of a string. + search Search a string for the presence of a pattern. + sub Substitute occurrences of a pattern found in a string using a + template string. + subf Substitute occurrences of a pattern found in a string using a + format string. + subn Same as sub, but also return the number of substitutions made. + subfn Same as subf, but also return the number of substitutions made. + split Split a string by the occurrences of a pattern. VERSION1: will + split at zero-width match; VERSION0: won't split at zero-width + match. + splititer Return an iterator yielding the parts of a split string. + findall Find all occurrences of a pattern in a string. + finditer Return an iterator yielding a match object for each match. + compile Compile a pattern into a Pattern object. + purge Clear the regular expression cache. + escape Backslash all non-alphanumerics or special characters in a + string. + +Most of the functions support a concurrent parameter: if True, the GIL will be +released during matching, allowing other Python threads to run concurrently. If +the string changes during matching, the behaviour is undefined. This parameter +is not needed when working on the builtin (immutable) string classes. + +Some of the functions in this module take flags as optional parameters. Most of +these flags can also be set within an RE: + A a ASCII Make \w, \W, \b, \B, \d, and \D match the + corresponding ASCII character categories. Default + when matching a bytestring. + B b BESTMATCH Find the best fuzzy match (default is first). + D DEBUG Print the parsed pattern. + F f FULLCASE Use full case-folding when performing + case-insensitive matching in Unicode. + I i IGNORECASE Perform case-insensitive matching. + L L LOCALE Make \w, \W, \b, \B, \d, and \D dependent on the + current locale. (One byte per character only.) + M m MULTILINE "^" matches the beginning of lines (after a newline) + as well as the string. "$" matches the end of lines + (before a newline) as well as the end of the string. + E e ENHANCEMATCH Attempt to improve the fit after finding the first + fuzzy match. + R r REVERSE Searches backwards. + S s DOTALL "." matches any character at all, including the + newline. + U u UNICODE Make \w, \W, \b, \B, \d, and \D dependent on the + Unicode locale. Default when matching a Unicode + string. + V0 V0 VERSION0 Turn on the old legacy behaviour. + V1 V1 VERSION1 Turn on the new enhanced behaviour. This flag + includes the FULLCASE flag. + W w WORD Make \b and \B work with default Unicode word breaks + and make ".", "^" and "$" work with Unicode line + breaks. + X x VERBOSE Ignore whitespace and comments for nicer looking REs. + +This module also defines an exception 'error'. + +""" + +# Public symbols. +__all__ = ["compile", "escape", "findall", "finditer", "fullmatch", "match", + "purge", "search", "split", "splititer", "sub", "subf", "subfn", "subn", + "template", "Scanner", "A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E", + "ENHANCEMATCH", "S", "DOTALL", "F", "FULLCASE", "I", "IGNORECASE", "L", + "LOCALE", "M", "MULTILINE", "R", "REVERSE", "T", "TEMPLATE", "U", "UNICODE", + "V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", "WORD", "error", + "Regex"] + +__version__ = "2.4.35" + +# -------------------------------------------------------------------- +# Public interface. + +def match(pattern, string, flags=0, pos=None, endpos=None, concurrent=None, + **kwargs): + """Try to apply the pattern at the start of the string, returning a match + object, or None if no match was found.""" + return _compile(pattern, flags, kwargs).match(string, pos, endpos, + concurrent) + +def fullmatch(pattern, string, flags=0, pos=None, endpos=None, concurrent=None, + **kwargs): + """Try to apply the pattern against all of the string, returning a match + object, or None if no match was found.""" + return _compile(pattern, flags, kwargs).fullmatch(string, pos, endpos, + concurrent) + +def search(pattern, string, flags=0, pos=None, endpos=None, concurrent=None, + **kwargs): + """Search through string looking for a match to the pattern, returning a + match object, or None if no match was found.""" + return _compile(pattern, flags, kwargs).search(string, pos, endpos, + concurrent) + +def sub(pattern, repl, string, count=0, flags=0, pos=None, endpos=None, + concurrent=None, **kwargs): + """Return the string obtained by replacing the leftmost (or rightmost with a + reverse pattern) non-overlapping occurrences of the pattern in string by the + replacement repl. repl can be either a string or a callable; if a string, + backslash escapes in it are processed; if a callable, it's passed the match + object and must return a replacement string to be used.""" + return _compile(pattern, flags, kwargs).sub(repl, string, count, pos, + endpos, concurrent) + +def subf(pattern, format, string, count=0, flags=0, pos=None, endpos=None, + concurrent=None, **kwargs): + """Return the string obtained by replacing the leftmost (or rightmost with a + reverse pattern) non-overlapping occurrences of the pattern in string by the + replacement format. format can be either a string or a callable; if a string, + it's treated as a format string; if a callable, it's passed the match object + and must return a replacement string to be used.""" + return _compile(pattern, flags, kwargs).subf(format, string, count, pos, + endpos, concurrent) + +def subn(pattern, repl, string, count=0, flags=0, pos=None, endpos=None, + concurrent=None, **kwargs): + """Return a 2-tuple containing (new_string, number). new_string is the string + obtained by replacing the leftmost (or rightmost with a reverse pattern) + non-overlapping occurrences of the pattern in the source string by the + replacement repl. number is the number of substitutions that were made. repl + can be either a string or a callable; if a string, backslash escapes in it + are processed; if a callable, it's passed the match object and must return a + replacement string to be used.""" + return _compile(pattern, flags, kwargs).subn(repl, string, count, pos, + endpos, concurrent) + +def subfn(pattern, format, string, count=0, flags=0, pos=None, endpos=None, + concurrent=None, **kwargs): + """Return a 2-tuple containing (new_string, number). new_string is the string + obtained by replacing the leftmost (or rightmost with a reverse pattern) + non-overlapping occurrences of the pattern in the source string by the + replacement format. number is the number of substitutions that were made. format + can be either a string or a callable; if a string, it's treated as a format + string; if a callable, it's passed the match object and must return a + replacement string to be used.""" + return _compile(pattern, flags, kwargs).subfn(format, string, count, pos, + endpos, concurrent) + +def split(pattern, string, maxsplit=0, flags=0, concurrent=None, **kwargs): + """Split the source string by the occurrences of the pattern, returning a + list containing the resulting substrings. If capturing parentheses are used + in pattern, then the text of all groups in the pattern are also returned as + part of the resulting list. If maxsplit is nonzero, at most maxsplit splits + occur, and the remainder of the string is returned as the final element of + the list.""" + return _compile(pattern, flags, kwargs).split(string, maxsplit, concurrent) + +def splititer(pattern, string, maxsplit=0, flags=0, concurrent=None, **kwargs): + "Return an iterator yielding the parts of a split string." + return _compile(pattern, flags, kwargs).splititer(string, maxsplit, + concurrent) + +def findall(pattern, string, flags=0, pos=None, endpos=None, overlapped=False, + concurrent=None, **kwargs): + """Return a list of all matches in the string. The matches may be overlapped + if overlapped is True. If one or more groups are present in the pattern, + return a list of groups; this will be a list of tuples if the pattern has + more than one group. Empty matches are included in the result.""" + return _compile(pattern, flags, kwargs).findall(string, pos, endpos, + overlapped, concurrent) + +def finditer(pattern, string, flags=0, pos=None, endpos=None, overlapped=False, + concurrent=None, **kwargs): + """Return an iterator over all matches in the string. The matches may be + overlapped if overlapped is True. For each match, the iterator returns a + match object. Empty matches are included in the result.""" + return _compile(pattern, flags, kwargs).finditer(string, pos, endpos, + overlapped, concurrent) + +def compile(pattern, flags=0, **kwargs): + "Compile a regular expression pattern, returning a pattern object." + return _compile(pattern, flags, kwargs) + +def purge(): + "Clear the regular expression cache" + _cache.clear() + +def template(pattern, flags=0): + "Compile a template pattern, returning a pattern object." + return _compile(pattern, flags | TEMPLATE) + +def escape(pattern, special_only=False): + "Escape all non-alphanumeric characters or special characters in pattern." + if isinstance(pattern, unicode): + s = [] + if special_only: + for c in pattern: + if c in _METACHARS: + s.append(u"\\") + s.append(c) + elif c == u"\x00": + s.append(u"\\000") + else: + s.append(c) + else: + for c in pattern: + if c in _ALNUM: + s.append(c) + elif c == u"\x00": + s.append(u"\\000") + else: + s.append(u"\\") + s.append(c) + + return u"".join(s) + else: + s = [] + if special_only: + for c in pattern: + if c in _METACHARS: + s.append("\\") + s.append(c) + elif c == "\x00": + s.append("\\000") + else: + s.append(c) + else: + for c in pattern: + if c in _ALNUM: + s.append(c) + elif c == "\x00": + s.append("\\000") + else: + s.append("\\") + s.append(c) + + return "".join(s) + +# -------------------------------------------------------------------- +# Internals. + +from . import _regex_core +from calibre.constants import plugins +_regex = plugins['_regex'][0] +from threading import RLock as _RLock +from ._regex_core import * +from ._regex_core import (_ALL_VERSIONS, _ALL_ENCODINGS, _FirstSetError, + _UnscopedFlagSet, _check_group_features, _compile_firstset, + _compile_replacement, _flatten_code, _fold_case, _get_required_string, + _parse_pattern, _shrink_cache) +from ._regex_core import (ALNUM as _ALNUM, Info as _Info, OP as _OP, Source as + _Source, Fuzzy as _Fuzzy) + +# Version 0 is the old behaviour, compatible with the original 're' module. +# Version 1 is the new behaviour, which differs slightly. + +DEFAULT_VERSION = VERSION0 + +_METACHARS = frozenset("()[]{}?*+|^$\\.") + +_regex_core.DEFAULT_VERSION = DEFAULT_VERSION + +# Caches for the patterns and replacements. +_cache = {} +_cache_lock = _RLock() +_named_args = {} +_replacement_cache = {} + +# Maximum size of the cache. +_MAXCACHE = 500 +_MAXREPCACHE = 500 + +def _compile(pattern, flags=0, kwargs={}): + "Compiles a regular expression to a PatternObject." + try: + # Do we know what keyword arguments are needed? + args_key = pattern, type(pattern), flags + args_needed = _named_args[args_key] + + # Are we being provided with its required keyword arguments? + args_supplied = set() + if args_needed: + for k, v in args_needed: + try: + args_supplied.add((k, frozenset(kwargs[k]))) + except KeyError: + raise error("missing named list") + + args_supplied = frozenset(args_supplied) + + # Have we already seen this regular expression and named list? + pattern_key = (pattern, type(pattern), flags, args_supplied, + DEFAULT_VERSION) + return _cache[pattern_key] + except KeyError: + # It's a new pattern, or new named list for a known pattern. + pass + + # Guess the encoding from the class of the pattern string. + if isinstance(pattern, unicode): + guess_encoding = UNICODE + elif isinstance(pattern, str): + guess_encoding = ASCII + elif isinstance(pattern, _pattern_type): + if flags: + raise ValueError("can't process flags argument with a compiled pattern") + + return pattern + else: + raise TypeError("first argument must be a string or compiled pattern") + + # Set the default version in the core code in case it has been changed. + _regex_core.DEFAULT_VERSION = DEFAULT_VERSION + + caught_exception = None + + while True: + try: + source = _Source(pattern) + info = _Info(flags, source.char_type, kwargs) + info.guess_encoding = guess_encoding + source.ignore_space = bool(info.flags & VERBOSE) + parsed = _parse_pattern(source, info) + break + except _UnscopedFlagSet: + # Remember the global flags for the next attempt. + flags = info.global_flags + except error, e: + caught_exception = e + + if caught_exception: + raise error(str(caught_exception)) + + if not source.at_end(): + raise error("trailing characters in pattern at position %d" % source.pos) + + # Check the global flags for conflicts. + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + if version not in (0, VERSION0, VERSION1): + raise ValueError("VERSION0 and VERSION1 flags are mutually incompatible") + + if (info.flags & _ALL_ENCODINGS) not in (0, ASCII, LOCALE, UNICODE): + raise ValueError("ASCII, LOCALE and UNICODE flags are mutually incompatible") + + if not (info.flags & _ALL_ENCODINGS): + if isinstance(pattern, unicode): + info.flags |= UNICODE + else: + info.flags |= ASCII + + reverse = bool(info.flags & REVERSE) + fuzzy = isinstance(parsed, _Fuzzy) + + # Should we print the parsed pattern? + if flags & DEBUG: + parsed.dump(indent=0, reverse=reverse) + + # Fix the group references. + parsed.fix_groups(reverse, False) + + # Optimise the parsed pattern. + parsed = parsed.optimise(info) + parsed = parsed.pack_characters(info) + + # Get the required string. + req_offset, req_chars, req_flags = _get_required_string(parsed, info.flags) + + # Build the named lists. + named_lists = {} + named_list_indexes = [None] * len(info.named_lists_used) + args_needed = set() + for key, index in info.named_lists_used.items(): + name, case_flags = key + values = frozenset(kwargs[name]) + if case_flags: + items = frozenset(_fold_case(info, v) for v in values) + else: + items = values + named_lists[name] = values + named_list_indexes[index] = items + args_needed.add((name, values)) + + # Check the features of the groups. + _check_group_features(info, parsed) + + # Compile the parsed pattern. The result is a list of tuples. + code = parsed.compile(reverse) + + # Is there a group call to the pattern as a whole? + key = (0, reverse, fuzzy) + ref = info.call_refs.get(key) + if ref is not None: + code = [(_OP.CALL_REF, ref)] + code + [(_OP.END, )] + + # Add the final 'success' opcode. + code += [(_OP.SUCCESS, )] + + # Compile the additional copies of the groups that we need. + for group, rev, fuz in info.additional_groups: + code += group.compile(rev, fuz) + + # Flatten the code into a list of ints. + code = _flatten_code(code) + + if not parsed.has_simple_start(): + # Get the first set, if possible. + try: + fs_code = _compile_firstset(info, parsed.get_firstset(reverse)) + fs_code = _flatten_code(fs_code) + code = fs_code + code + except _FirstSetError: + pass + + # The named capture groups. + index_group = dict((v, n) for n, v in info.group_index.items()) + + # Create the PatternObject. + # + # Local flags like IGNORECASE affect the code generation, but aren't needed + # by the PatternObject itself. Conversely, global flags like LOCALE _don't_ + # affect the code generation but _are_ needed by the PatternObject. + compiled_pattern = _regex.compile(pattern, info.flags | version, code, + info.group_index, index_group, named_lists, named_list_indexes, + req_offset, req_chars, req_flags, info.group_count) + + # Do we need to reduce the size of the cache? + if len(_cache) >= _MAXCACHE: + _cache_lock.acquire() + try: + _shrink_cache(_cache, _named_args, _MAXCACHE) + finally: + _cache_lock.release() + + args_needed = frozenset(args_needed) + + # Store this regular expression and named list. + pattern_key = (pattern, type(pattern), flags, args_needed, DEFAULT_VERSION) + _cache[pattern_key] = compiled_pattern + + # Store what keyword arguments are needed. + _named_args[args_key] = args_needed + + return compiled_pattern + +def _compile_replacement_helper(pattern, template): + "Compiles a replacement template." + # This function is called by the _regex module. + + # Have we seen this before? + key = pattern.pattern, pattern.flags, template + compiled = _replacement_cache.get(key) + if compiled is not None: + return compiled + + if len(_replacement_cache) >= _MAXREPCACHE: + _replacement_cache.clear() + + is_unicode = isinstance(template, unicode) + source = _Source(template) + if is_unicode: + def make_string(char_codes): + return u"".join(unichr(c) for c in char_codes) + else: + def make_string(char_codes): + return "".join(chr(c) for c in char_codes) + + compiled = [] + literal = [] + while True: + ch = source.get() + if not ch: + break + if ch == "\\": + # '_compile_replacement' will return either an int group reference + # or a string literal. It returns items (plural) in order to handle + # a 2-character literal (an invalid escape sequence). + is_group, items = _compile_replacement(source, pattern, is_unicode) + if is_group: + # It's a group, so first flush the literal. + if literal: + compiled.append(make_string(literal)) + literal = [] + compiled.extend(items) + else: + literal.extend(items) + else: + literal.append(ord(ch)) + + # Flush the literal. + if literal: + compiled.append(make_string(literal)) + + _replacement_cache[key] = compiled + + return compiled + +# We define _pattern_type here after all the support objects have been defined. +_pattern_type = type(_compile("", 0, {})) + +# We'll define an alias for the 'compile' function so that the repr of a +# pattern object is eval-able. +Regex = compile + +# Register myself for pickling. +import copy_reg as _copy_reg + +def _pickle(p): + return _compile, (p.pattern, p.flags) + +_copy_reg.pickle(_pattern_type, _pickle, _compile) + +if not hasattr(str, "format"): + # Strings don't have the .format method (below Python 2.6). + while True: + _start = __doc__.find(" subf") + if _start < 0: + break + + _end = __doc__.find("\n", _start) + 1 + while __doc__.startswith(" ", _end): + _end = __doc__.find("\n", _end) + 1 + + __doc__ = __doc__[ : _start] + __doc__[_end : ] + + __all__ = [_name for _name in __all__ if not _name.startswith("subf")] + + del _start, _end + + del subf, subfn diff --git a/src/regex/_regex.c b/src/regex/_regex.c new file mode 100644 index 000000000000..f0a4f9a1877a --- /dev/null +++ b/src/regex/_regex.c @@ -0,0 +1,19957 @@ +/* Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * partial history: + * 1999-10-24 fl created (based on existing template matcher code) + * 2000-03-06 fl first alpha, sort of + * 2000-08-01 fl fixes for 1.6b1 + * 2000-08-07 fl use PyOS_CheckStack() if available + * 2000-09-20 fl added expand method + * 2001-03-20 fl lots of fixes for 2.1b2 + * 2001-04-15 fl export copyright as Python attribute, not global + * 2001-04-28 fl added __copy__ methods (work in progress) + * 2001-05-14 fl fixes for 1.5.2 compatibility + * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) + * 2001-10-18 fl fixed group reset issue (from Matthew Mueller) + * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1 + * 2001-10-21 fl added sub/subn primitive + * 2001-10-24 fl added finditer primitive (for 2.2 only) + * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum) + * 2002-11-09 fl fixed empty sub/subn return type + * 2003-04-18 mvl fully support 4-byte codes + * 2003-10-17 gn implemented non recursive scheme + * 2009-07-26 mrab completely re-designed matcher code + * 2011-11-18 mrab added support for PEP 393 strings + * + * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. + * + * This version of the SRE library can be redistributed under CNRI's + * Python 1.6 license. For any other use, please contact Secret Labs + * AB (info@pythonware.com). + * + * Portions of this engine have been developed in cooperation with + * CNRI. Hewlett-Packard provided funding for 1.6 integration and + * other compatibility work. + */ + +/* #define VERBOSE */ + +#if defined(VERBOSE) +#define TRACE(X) printf X; +#else +#define TRACE(X) +#endif + +#include "Python.h" +#include "structmember.h" /* offsetof */ +#include +#include "_regex.h" +#include "pyport.h" +#include "pythread.h" + +#define RE_MIN(X, Y) ((X) <= (Y) ? (X) : (Y)) +#define RE_MAX(X, Y) ((X) >= (Y) ? (X) : (Y)) + +#if PY_VERSION_HEX < 0x02060000 +#if SIZEOF_SIZE_T == SIZEOF_LONG_LONG +#define T_PYSSIZET T_LONGLONG +#elif SIZEOF_SIZE_T == SIZEOF_LONG +#define T_PYSSIZET T_LONG +#else +#error size_t is the same size as neither LONG nor LONGLONG +#endif + +#endif +typedef unsigned char Py_UCS1; +typedef unsigned short Py_UCS2; + +typedef RE_UINT32 RE_CODE; + +/* Case-sensitive letters in the General Category. */ +#define RE_PROP_GC_LU ((RE_PROP_GC << 16) | RE_PROP_LU) +#define RE_PROP_GC_LL ((RE_PROP_GC << 16) | RE_PROP_LL) +#define RE_PROP_GC_LT ((RE_PROP_GC << 16) | RE_PROP_LT) + +/* Unlimited repeat count. */ +#define RE_UNLIMITED (~(RE_CODE)0) + +/* The status of a node. */ +typedef unsigned short RE_STATUS_T; + +/* Whether to match concurrently, i.e. release the GIL while matching. */ +#define RE_CONC_NO 0 +#define RE_CONC_YES 1 +#define RE_CONC_DEFAULT 2 + +/* Flags for the kind of 'sub' call: 'sub', 'subn', 'subf', 'subfn'. */ +#define RE_SUB 0x0 +#define RE_SUBN 0x1 +#if PY_VERSION_HEX >= 0x02060000 +#define RE_SUBF 0x2 +#endif + +/* The name of this module, minus the leading underscore. */ +#define RE_MODULE "regex" +#define RE_MODULE_UPPER "REGEX" + +/* Error codes. */ +#define RE_ERROR_SUCCESS 1 /* Successful match. */ +#define RE_ERROR_FAILURE 0 /* Unsuccessful match. */ +#define RE_ERROR_ILLEGAL -1 /* Illegal code. */ +#define RE_ERROR_INTERNAL -2 /* Internal error. */ +#define RE_ERROR_CONCURRENT -3 /* "concurrent" invalid. */ +#define RE_ERROR_MEMORY -9 /* Out of memory. */ +#define RE_ERROR_INTERRUPTED -10 /* Signal handler raised exception. */ +#define RE_ERROR_REPLACEMENT -11 /* Invalid replacement string. */ +#define RE_ERROR_INVALID_GROUP_REF -12 /* Invalid group reference. */ +#define RE_ERROR_GROUP_INDEX_TYPE -13 /* Group index type error. */ +#define RE_ERROR_NO_SUCH_GROUP -14 /* No such group. */ +#define RE_ERROR_INDEX -15 /* String index. */ +#define RE_ERROR_BACKTRACKING -16 /* Too much backtracking. */ +#define RE_ERROR_NOT_STRING -17 /* Not a string. */ +#define RE_ERROR_NOT_UNICODE -18 /* Not a Unicode string. */ + +/* The number of backtrack entries per allocated block. */ +#define RE_BACKTRACK_BLOCK_SIZE 64 + +/* The maximum number of backtrack entries to allocate. */ +#define RE_MAX_BACKTRACK_ALLOC (1024 * 1024) + +/* The initial maximum capacity of the guard block. */ +#define RE_INIT_GUARDS_BLOCK_SIZE 16 + +/* The initial maximum capacity of the node list. */ +#define RE_INIT_NODE_LIST_SIZE 16 + +/* The size increment for various allocation lists. */ +#define RE_LIST_SIZE_INC 16 + +/* The initial maximum capacity of the capture groups. */ +#define RE_INIT_CAPTURE_SIZE 16 + +/* Node bitflags. */ +#define RE_POSITIVE_OP 0x1 +#define RE_ZEROWIDTH_OP 0x2 +#define RE_FUZZY_OP 0x4 +#define RE_REVERSE_OP 0x8 +#define RE_REQUIRED_OP 0x10 + +/* Guards against further matching can occur at the start of the body and the + * tail of a repeat containing a repeat. + */ +#define RE_STATUS_BODY 0x1 +#define RE_STATUS_TAIL 0x2 + +/* Whether a guard is added depends on whether there's a repeat in the body of + * the repeat or a group reference in the body or tail of the repeat. + */ +#define RE_STATUS_NEITHER 0x0 +#define RE_STATUS_REPEAT 0x4 +#define RE_STATUS_LIMITED 0x8 +#define RE_STATUS_REF 0x10 +#define RE_STATUS_VISITED_AG 0x20 +#define RE_STATUS_VISITED_REP 0x40 + +/* Whether a string node has been initialised for fast searching. */ +#define RE_STATUS_FAST_INIT 0x80 + +/* Whether a node us being used. (Additional nodes may be created while the + * pattern is being built. + */ +#define RE_STATUS_USED 0x100 + +/* Whether a node is a string node. */ +#define RE_STATUS_STRING 0x200 + +/* Whether a repeat node is within another repeat. */ +#define RE_STATUS_INNER 0x400 + +/* Various flags stored in a node status member. */ +#define RE_STATUS_SHIFT 11 + +#define RE_STATUS_FUZZY (RE_FUZZY_OP << RE_STATUS_SHIFT) +#define RE_STATUS_REVERSE (RE_REVERSE_OP << RE_STATUS_SHIFT) +#define RE_STATUS_REQUIRED (RE_REQUIRED_OP << RE_STATUS_SHIFT) + +/* The different error types for fuzzy matching. */ +#define RE_FUZZY_SUB 0 +#define RE_FUZZY_INS 1 +#define RE_FUZZY_DEL 2 +#define RE_FUZZY_ERR 3 +#define RE_FUZZY_COUNT 3 + +#define RE_FUZZY_THRESHOLD 10 + +/* The various values in a FUZZY node. */ +#define RE_FUZZY_VAL_MAX_SUB 1 +#define RE_FUZZY_VAL_MAX_INS 2 +#define RE_FUZZY_VAL_MAX_DEL 3 +#define RE_FUZZY_VAL_MAX_ERR 4 +#define RE_FUZZY_VAL_SUB_COST 5 +#define RE_FUZZY_VAL_INS_COST 6 +#define RE_FUZZY_VAL_DEL_COST 7 +#define RE_FUZZY_VAL_MAX_COST 8 + +#define RE_FUZZY_VAL_MAX_BASE 1 +#define RE_FUZZY_VAL_COST_BASE 5 + +/* The various values in a END_FUZZY node. */ +#define RE_FUZZY_VAL_MIN_SUB 1 +#define RE_FUZZY_VAL_MIN_INS 2 +#define RE_FUZZY_VAL_MIN_DEL 3 +#define RE_FUZZY_VAL_MIN_ERR 4 + +/* The flags which will be set for full Unicode case folding. */ +#define RE_FULL_CASE_FOLDING (RE_FLAG_UNICODE | RE_FLAG_FULLCASE | RE_FLAG_IGNORECASE) + +/* The shortest string prefix for which we'll use a fast string search. */ +#define RE_MIN_FAST_LENGTH 5 + +static char copyright[] = + " RE 2.3.0 Copyright (c) 1997-2002 by Secret Labs AB "; + +/* The exception to raise on error. */ +static PyObject* error_exception; + +/* The dictionary of Unicode properties. */ +static PyObject* property_dict; + +typedef struct RE_State* RE_StatePtr; + +/* Handlers for ASCII, locale and Unicode. */ +typedef struct RE_EncodingTable { + BOOL (*has_property)(RE_CODE property, Py_UCS4 ch); + Py_UCS4 (*lower)(Py_UCS4 ch); + Py_UCS4 (*upper)(Py_UCS4 ch); + Py_UCS4 (*title)(Py_UCS4 ch); + BOOL (*at_boundary)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*at_word_start)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*at_word_end)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*at_default_boundary)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*at_default_word_start)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*at_default_word_end)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*at_grapheme_boundary)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*is_line_sep)(Py_UCS4 ch); + BOOL (*at_line_start)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*at_line_end)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*possible_turkic)(Py_UCS4 ch); + int (*all_cases)(Py_UCS4 ch, Py_UCS4* codepoints); + Py_UCS4 (*simple_case_fold)(Py_UCS4 ch); + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + int (*all_turkic_i)(Py_UCS4 ch, Py_UCS4* cases); +} RE_EncodingTable; + +/* Position within the regex and text. */ +typedef struct RE_Position { + struct RE_Node* node; + Py_ssize_t text_pos; +} RE_Position; + +/* Info about fuzzy matching. */ +typedef struct RE_FuzzyInfo { + struct RE_Node* node; + size_t counts[RE_FUZZY_COUNT + 1]; /* Add 1 for total errors. */ + size_t total_cost; +} RE_FuzzyInfo; + +/* Storage for backtrack data. */ +typedef struct RE_BacktrackData { + union { + struct { + size_t capture_change; + BOOL too_few_errors; + } atomic; + struct { + RE_Position position; + } branch; + struct { + RE_FuzzyInfo fuzzy_info; + Py_ssize_t text_pos; + RE_UINT32 index; + } fuzzy; + struct { + RE_Position position; + Py_ssize_t count; + struct RE_Node* fuzzy_node; + BOOL too_few_errors; + } fuzzy_insert; + struct { + RE_Position position; + RE_INT8 fuzzy_type; + RE_INT8 step; + } fuzzy_one; + struct { + RE_Position position; + Py_ssize_t string_pos; + size_t string_len; + RE_INT8 fuzzy_type; + RE_INT8 folded_pos; + RE_INT8 folded_len; + RE_INT8 gfolded_pos; + RE_INT8 gfolded_len; + RE_INT8 step; + } fuzzy_string; + struct { + RE_Position position; + RE_INT8 fuzzy_type; + } fuzzy_zero; + struct { + Py_ssize_t text_pos; + Py_ssize_t current_capture; + RE_UINT32 private_index; + RE_UINT32 public_index; + BOOL capture; + } group; + struct { + struct RE_Node* node; + size_t capture_change; + } group_call; + struct { + size_t capture_change; + BOOL too_few_errors; + } lookaround; + struct { + RE_Position position; + Py_ssize_t text_pos; + size_t count; + Py_ssize_t start; + size_t capture_change; + RE_UINT32 index; + } repeat; + struct { + size_t* capture_counts; + } saved; + }; + RE_UINT8 op; +} RE_BacktrackData; + +/* Storage for backtrack data is allocated in blocks for speed. */ +typedef struct RE_BacktrackBlock { + RE_BacktrackData items[RE_BACKTRACK_BLOCK_SIZE]; + struct RE_BacktrackBlock* previous; + struct RE_BacktrackBlock* next; + size_t capacity; + size_t count; +} RE_BacktrackBlock; + +/* Storage for saved groups. */ +typedef struct RE_SavedGroups { + struct RE_SavedGroups* previous; + struct RE_SavedGroups* next; + struct RE_GroupSpan* spans; + size_t* counts; +} RE_SavedGroups; + +/* Storage for info around a recursive by 'basic'match'. */ +typedef struct RE_Info { + RE_BacktrackBlock* current_backtrack_block; + size_t backtrack_count; + RE_SavedGroups* current_saved_groups; + size_t captures_count; + struct RE_GroupCallFrame* current_group_call_frame; + BOOL must_advance; +} RE_Info; + +/* Storage for the next node. */ +typedef struct RE_NextNode { + struct RE_Node* node; + struct RE_Node* test; + struct RE_Node* match_next; + Py_ssize_t match_step; +} RE_NextNode; + +/* A pattern node. */ +typedef struct RE_Node { + RE_NextNode next_1; + union { + struct { + RE_NextNode next_2; + } nonstring; + struct { + /* Used only if (node->status & RE_STATUS_STRING) is true. */ + Py_ssize_t* bad_character_offset; + Py_ssize_t* good_suffix_offset; + } string; + }; + Py_ssize_t step; + size_t value_count; + RE_CODE* values; + RE_STATUS_T status; + RE_UINT8 op; + BOOL match; +} RE_Node; + +/* Info about a group's span. */ +typedef struct RE_GroupSpan { + Py_ssize_t start; + Py_ssize_t end; +} RE_GroupSpan; + +/* Span of a guard (inclusive range). */ +typedef struct RE_GuardSpan { + Py_ssize_t low; + Py_ssize_t high; + BOOL protect; +} RE_GuardSpan; + +/* Spans guarded against further matching. */ +typedef struct RE_GuardList { + size_t capacity; + size_t count; + RE_GuardSpan* spans; + Py_ssize_t last_text_pos; + size_t last_low; +} RE_GuardList; + +/* Info about a group in a context. */ +typedef struct RE_GroupData { + RE_GroupSpan span; + size_t capture_count; + size_t capture_capacity; + Py_ssize_t current_capture; + RE_GroupSpan* captures; +} RE_GroupData; + +/* Info about a repeat. */ +typedef struct RE_RepeatData { + RE_GuardList body_guard_list; + RE_GuardList tail_guard_list; + size_t count; + Py_ssize_t start; + size_t capture_change; +} RE_RepeatData; + +/* Storage for saved repeats. */ +typedef struct RE_SavedRepeats { + struct RE_SavedRepeats* previous; + struct RE_SavedRepeats* next; + RE_RepeatData* repeats; +} RE_SavedRepeats; + +/* Guards for fuzzy sections. */ +typedef struct RE_FuzzyGuards { + RE_GuardList body_guard_list; + RE_GuardList tail_guard_list; +} RE_FuzzyGuards; + +/* Info about a capture group. */ +typedef struct RE_GroupInfo { + Py_ssize_t end_index; + RE_Node* node; + BOOL referenced; + BOOL has_name; + BOOL called; +} RE_GroupInfo; + +/* Info about a call_ref. */ +typedef struct RE_CallRefInfo { + RE_Node* node; + BOOL defined; + BOOL used; +} RE_CallRefInfo; + +/* Info about a repeat. */ +typedef struct RE_RepeatInfo { + RE_STATUS_T status; +} RE_RepeatInfo; + +/* Stack frame for a group call. */ +typedef struct RE_GroupCallFrame { + struct RE_GroupCallFrame* previous; + struct RE_GroupCallFrame* next; + RE_Node* node; + RE_GroupData* groups; + RE_RepeatData* repeats; +} RE_GroupCallFrame; + +/* Info about a string argument. */ +typedef struct RE_StringInfo { +#if PY_VERSION_HEX >= 0x02060000 + Py_buffer view; /* View of the string if it's a buffer object. */ +#endif + void* characters; /* Pointer to the characters of the string. */ + Py_ssize_t length; /* Length of the string. */ + Py_ssize_t charsize; /* Size of the characters in the string. */ + BOOL is_unicode; /* Whether the string is Unicode. */ + BOOL should_release; /* Whether the buffer should be released. */ +} RE_StringInfo; + +/* Info about where the next match was found, starting from a certain search + * position. This is used when a pattern starts with a BRANCH. + */ +#define MAX_SEARCH_POSITIONS 7 + +typedef struct { + Py_ssize_t start_pos; + Py_ssize_t match_pos; +} RE_SearchPosition; + +/* The state object used during matching. */ +typedef struct RE_State { + struct PatternObject* pattern; /* Parent PatternObject. */ + /* Info about the string being matched. */ + PyObject* string; +#if PY_VERSION_HEX >= 0x02060000 + Py_buffer view; /* View of the string if it's a buffer object. */ +#endif + Py_ssize_t charsize; + void* text; + Py_ssize_t text_length; + /* The slice of the string being searched. */ + Py_ssize_t slice_start; + Py_ssize_t slice_end; + /* Info about the capture groups. */ + RE_GroupData* groups; + Py_ssize_t lastindex; + Py_ssize_t lastgroup; + /* Info about the repeats. */ + RE_RepeatData* repeats; + Py_ssize_t search_anchor; /* Where the last match finished. */ + Py_ssize_t match_pos; /* The start position of the match. */ + Py_ssize_t text_pos; /* The current position of the match. */ + Py_ssize_t final_newline; /* The index of newline at end of string, or -1. */ + Py_ssize_t final_line_sep; /* The index of line separator at end of string, or -1. */ + /* Storage for backtrack info. */ + RE_BacktrackBlock backtrack_block; + RE_BacktrackBlock* current_backtrack_block; + size_t backtrack_allocated; + RE_BacktrackData* backtrack; + /* Storage for saved capture groups. */ + RE_SavedGroups* first_saved_groups; + RE_SavedGroups* current_saved_groups; + RE_SavedRepeats* first_saved_repeats; + RE_SavedRepeats* current_saved_repeats; + size_t min_width; /* The minimum width of the string to match (assuming it's not a fuzzy pattern). */ + RE_EncodingTable* encoding; /* The 'encoding' of the string being searched. */ + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); + void* (*point_to)(void* text, Py_ssize_t pos); + PyThread_type_lock lock; /* A lock for accessing the state across threads. */ + RE_FuzzyInfo fuzzy_info; /* Info about fuzzy matching. */ + RE_FuzzyGuards* fuzzy_guards; /* The guards for a fuzzy match. */ + size_t total_errors; /* The total number of errors of a fuzzy match. */ + size_t total_cost; /* The total cost of a fuzzy match. */ + size_t max_cost; /* The maximum permitted fuzzy cost. */ + /* The group call stack. */ + RE_GroupCallFrame* first_group_call_frame; + RE_GroupCallFrame* current_group_call_frame; + RE_GuardList* group_call_guard_list; + RE_SearchPosition search_positions[MAX_SEARCH_POSITIONS]; /* Where the search matches next. */ + size_t capture_change; /* Incremented every time a captive group changes. */ + Py_ssize_t req_pos; /* The position where the required string matched. */ + Py_ssize_t req_end; /* The end position where the required string matched. */ + RE_UINT16 iterations; /* The number of iterations the matching engine has performed since checking for KeyboardInterrupt. */ + BOOL is_unicode; /* Whether the string to be matched is Unicode. */ + BOOL should_release; /* Whether the buffer should be released. */ + BOOL overlapped; /* Whether the matches can be overlapped. */ + BOOL reverse; /* Whether it's a reverse pattern. */ + BOOL visible_captures; /* Whether the 'captures' method will be visible. */ + BOOL zero_width; /* Whether to enable the correct handling of zero-width matches. */ + BOOL must_advance; /* Whether the end of the match must advance past its start. */ + BOOL is_multithreaded; /* Whether to release the GIL while matching. */ + BOOL too_few_errors; /* Whether there were too few fuzzy errors. */ + BOOL match_all; /* Whether to match all of the string ('fullmatch'). */ +} RE_State; + +/* Storage for the regex state and thread state. + * + * Scanner objects can sometimes be shared across threads, which means that + * their RE_State structs are also shared. This isn't safe when the GIL is + * released, so in such instances we have a lock (mutex) in the RE_State struct + * to protect it during matching. We also need a thread-safe place to store the + * thread state when releasing the GIL. + */ +typedef struct RE_SafeState { + RE_State* re_state; + PyThreadState* thread_state; +} RE_SafeState; + +/* The PatternObject created from a regular expression. */ +typedef struct PatternObject { + PyObject_HEAD + PyObject* pattern; /* Pattern source (or None). */ + RE_CODE flags; /* Flags used when compiling pattern source. */ + PyObject* weakreflist; /* List of weak references */ + /* Nodes into which the regular expression is compiled. */ + RE_Node* start_node; + RE_Node* start_test; + size_t true_group_count; /* The true number of capture groups. */ + size_t public_group_count; /* The number of public capture groups. */ + size_t repeat_count; /* The number of repeats. */ + Py_ssize_t group_end_index; /* The number of group closures. */ + PyObject* groupindex; + PyObject* indexgroup; + PyObject* named_lists; + PyObject* named_list_indexes; + /* Storage for the pattern nodes. */ + size_t node_capacity; + size_t node_count; + RE_Node** node_list; + /* Info about the capture groups. */ + size_t group_info_capacity; + RE_GroupInfo* group_info; + /* Info about the call_refs. */ + size_t call_ref_info_capacity; + size_t call_ref_info_count; + RE_CallRefInfo* call_ref_info; + Py_ssize_t pattern_call_ref; + /* Info about the repeats. */ + size_t repeat_info_capacity; + RE_RepeatInfo* repeat_info; + size_t min_width; /* The minimum width of the string to match (assuming it isn't a fuzzy pattern). */ + RE_EncodingTable* encoding; /* Encoding handlers. */ + RE_GroupData* groups_storage; + RE_RepeatData* repeats_storage; + size_t fuzzy_count; /* The number of fuzzy sections. */ + Py_ssize_t req_offset; /* The offset to the required string. */ + RE_Node* req_string; /* The required string. */ + BOOL is_fuzzy; /* Whether it's a fuzzy pattern. */ + BOOL do_search_start; /* Whether to do an initial search. */ + BOOL recursive; /* Whether the entire pattern is recursive. */ +} PatternObject; + +/* The MatchObject created when a match is found. */ +typedef struct MatchObject { + PyObject_HEAD + PyObject* string; /* Link to the target string or NULL if detached. */ + PyObject* substring; /* Link to (a substring of) the target string. */ + Py_ssize_t substring_offset; /* Offset into the target string. */ + PatternObject* pattern; /* Link to the regex (pattern) object. */ + Py_ssize_t pos; /* Start of current slice. */ + Py_ssize_t endpos; /* End of current slice. */ + Py_ssize_t match_start; /* Start of matched slice. */ + Py_ssize_t match_end; /* End of matched slice. */ + Py_ssize_t lastindex; /* Last group seen by the engine (-1 if none). */ + Py_ssize_t lastgroup; /* Last named group seen by the engine (-1 if none). */ + size_t group_count; /* The number of groups. */ + RE_GroupData* groups; /* The capture groups. */ + PyObject* regs; +} MatchObject; + +/* The ScannerObject. */ +typedef struct ScannerObject { + PyObject_HEAD + PatternObject* pattern; + RE_State state; + int status; +} ScannerObject; + +/* The SplitterObject. */ +typedef struct SplitterObject { + PyObject_HEAD + PatternObject* pattern; + RE_State state; + Py_ssize_t maxsplit; + Py_ssize_t last_pos; + Py_ssize_t split_count; + Py_ssize_t index; + int status; +} SplitterObject; + +/* Info used when compiling a pattern to nodes. */ +typedef struct RE_CompileArgs { + RE_CODE* code; /* The start of the compiled pattern. */ + RE_CODE* end_code; /* The end of the compiled pattern. */ + PatternObject* pattern; /* The pattern object. */ + size_t min_width; /* The minimum width of the string to match (assuming it isn't a fuzzy pattern). */ + RE_Node* start; /* The start node. */ + RE_Node* end; /* The end node. */ + size_t repeat_depth; /* The nesting depth of the repeat. */ + BOOL forward; /* Whether it's a forward (not reverse) pattern. */ + BOOL visible_captures; /* Whether all of the captures will be visible. */ + BOOL has_captures; /* Whether the pattern has capture groups. */ + BOOL is_fuzzy; /* Whether the pattern (or some part of it) is fuzzy. */ + BOOL within_fuzzy; /* Whether the subpattern is within a fuzzy section. */ +} RE_CompileArgs; + +/* The string slices which will be concatenated to make the result string of + * the 'sub' method. + * + * This allows us to avoid creating a list of slices if there of fewer than 2 + * of them. Empty strings aren't recorded, so if 'list' and 'item' are both + * NULL then the result is an empty string. + */ +typedef struct JoinInfo { + PyObject* list; /* The list of slices if there are more than 2 of them. */ + PyObject* item; /* The slice if there is only 1 of them. */ + BOOL reversed; /* Whether the slices have been found in reverse order. */ + BOOL is_unicode; /* Whether the string is Unicode. */ +} JoinInfo; + +typedef struct { + RE_Node* new_node; + Py_ssize_t new_text_pos; + Py_ssize_t limit; + Py_ssize_t new_string_pos; + int step; + int new_folded_pos; + int folded_len; + int new_gfolded_pos; + int new_group_pos; + int fuzzy_type; + BOOL permit_insertion; +} RE_FuzzyData; + +/* Function types for getting info from a MatchObject. */ +typedef PyObject* (*RE_GetByIndexFunc)(MatchObject* self, Py_ssize_t index); + +Py_LOCAL_INLINE(Py_ssize_t) abs_ssize_t(Py_ssize_t x) { + return x >= 0 ? x : -x; +} + +/* Gets a character at the given position assuming 1 byte per character. */ +static Py_UCS4 bytes1_char_at(void* text, Py_ssize_t pos) { + return *((Py_UCS1*)text + pos); +} + +/* Sets a character at the given position assuming 1 byte per character. */ +static void bytes1_set_char_at(void* text, Py_ssize_t pos, Py_UCS4 ch) { + *((Py_UCS1*)text + pos) = (Py_UCS1)ch; +} + +/* Gets a pointer to the character at the given position assuming 1 byte per + * character. + */ +static void* bytes1_point_to(void* text, Py_ssize_t pos) { + return (Py_UCS1*)text + pos; +} + +/* Gets a character at the given position assuming 2 bytes per character. */ +static Py_UCS4 bytes2_char_at(void* text, Py_ssize_t pos) { + return *((Py_UCS2*)text + pos); +} + +/* Sets a character at the given position assuming 2 bytes per character. */ +static void bytes2_set_char_at(void* text, Py_ssize_t pos, Py_UCS4 ch) { + *((Py_UCS2*)text + pos) = (Py_UCS2)ch; +} + +/* Gets a pointer to the character at the given position assuming 2 bytes per + * character. + */ +static void* bytes2_point_to(void* text, Py_ssize_t pos) { + return (Py_UCS2*)text + pos; +} + +/* Gets a character at the given position assuming 4 bytes per character. */ +static Py_UCS4 bytes4_char_at(void* text, Py_ssize_t pos) { + return *((Py_UCS4*)text + pos); +} + +/* Sets a character at the given position assuming 4 bytes per character. */ +static void bytes4_set_char_at(void* text, Py_ssize_t pos, Py_UCS4 ch) { + *((Py_UCS4*)text + pos) = (Py_UCS4)ch; +} + +/* Gets a pointer to the character at the given position assuming 4 bytes per + * character. + */ +static void* bytes4_point_to(void* text, Py_ssize_t pos) { + return (Py_UCS4*)text + pos; +} + +/* Default for whether the current text position is on a boundary. */ +static BOOL at_boundary_always(RE_State* state, Py_ssize_t text_pos) { + return TRUE; +} + +/* ASCII-specific. */ + +static BOOL unicode_has_property(RE_CODE property, Py_UCS4 ch); + +/* Checks whether a character has the given property. */ +static BOOL ascii_has_property(RE_CODE property, Py_UCS4 ch) { + if (ch > RE_ASCII_MAX) { + /* Outside the ASCII range. */ + RE_UINT32 value; + + value = property & 0xFFFF; + + return value == 0; + } + + return unicode_has_property(property, ch); +} + +/* Converts a character to lowercase. */ +static Py_UCS4 ascii_lower(Py_UCS4 ch) { + if (ch > RE_ASCII_MAX || !re_get_uppercase(ch)) + return ch; + + return ch ^ 0x20; +} + +/* Converts a character to uppercase. */ +static Py_UCS4 ascii_upper(Py_UCS4 ch) { + if (ch > RE_ASCII_MAX || !re_get_lowercase(ch)) + return ch; + + return ch ^ 0x20; +} + +/* Checks whether the current text position is on a word boundary. */ +static BOOL ascii_at_boundary(RE_State* state, Py_ssize_t text_pos) { + BOOL before; + BOOL after; + + before = text_pos > 0 && ascii_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos - 1)); + after = text_pos < state->text_length && ascii_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos)); + + return before != after; +} + +/* Checks whether the current text position is at the start of a word. */ +static BOOL ascii_at_word_start(RE_State* state, Py_ssize_t text_pos) { + BOOL before; + BOOL after; + + before = text_pos > 0 && ascii_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos - 1)); + after = text_pos < state->text_length && ascii_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos)); + + return !before && after; +} + +/* Checks whether the current text position is at the end of a word. */ +static BOOL ascii_at_word_end(RE_State* state, Py_ssize_t text_pos) { + BOOL before; + BOOL after; + + before = text_pos > 0 && ascii_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos - 1)); + after = text_pos < state->text_length && ascii_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos)); + + return before && !after; +} + +/* Checks whether a character is a line separator. */ +static BOOL ascii_is_line_sep(Py_UCS4 ch) { + return 0x0A <= ch && ch <= 0x0D; +} + +/* Checks whether the current text position is at the start of a line. */ +static BOOL ascii_at_line_start(RE_State* state, Py_ssize_t text_pos) { + Py_UCS4 ch; + + if (text_pos == 0) + return TRUE; + + ch = state->char_at(state->text, text_pos - 1); + + if (ch == 0x0D) + /* No line break inside CRLF. */ + return text_pos >= state->text_length || state->char_at(state->text, + text_pos) != 0x0A; + + return 0x0A <= ch && ch <= 0x0D; +} + +/* Checks whether the current text position is at the end of a line. */ +static BOOL ascii_at_line_end(RE_State* state, Py_ssize_t text_pos) { + Py_UCS4 ch; + + if (text_pos >= state->text_length) + return TRUE; + + ch = state->char_at(state->text, text_pos); + + if (ch == 0x0A) + /* No line break inside CRLF. */ + return text_pos >= 1 || state->char_at(state->text, text_pos - 1) != + 0x0D; + + return 0x0A <= ch && ch <= 0x0D; +} + +/* Checks whether a character could be Turkic (variants of I/i). For ASCII, it + * won't be. + */ +static BOOL ascii_possible_turkic(Py_UCS4 ch) { + return FALSE; +} + +/* Gets all the cases of a character. */ +static int ascii_all_cases(Py_UCS4 ch, Py_UCS4* codepoints) { + int count; + + count = 0; + + codepoints[count++] = ch; + + if (('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z')) + /* It's a letter, so add the other case. */ + codepoints[count++] = ch ^ 0x20; + + return count; +} + +/* Returns a character with its case folded. */ +static Py_UCS4 ascii_simple_case_fold(Py_UCS4 ch) { + if ('A' <= ch && ch <= 'Z') + /* Uppercase folds to lowercase. */ + return ch ^ 0x20; + + return ch; +} + +/* Returns a character with its case folded. */ +static int ascii_full_case_fold(Py_UCS4 ch, Py_UCS4* folded) { + if ('A' <= ch && ch <= 'Z') + /* Uppercase folds to lowercase. */ + folded[0] = ch ^ 0x20; + else + folded[0] = ch; + + return 1; +} + +/* Gets all the case variants of Turkic 'I'. The given character will be listed + * first. + */ +static int ascii_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) { + int count; + + count = 0; + + cases[count++] = ch; + + if (ch != 'I') + cases[count++] = 'I'; + + if (ch != 'i') + cases[count++] = 'i'; + + return count; +} + +/* The handlers for ASCII characters. */ +static RE_EncodingTable ascii_encoding = { + ascii_has_property, + ascii_lower, + ascii_upper, + ascii_upper, /* For ASCII, titlecase is the same as uppercase. */ + ascii_at_boundary, + ascii_at_word_start, + ascii_at_word_end, + ascii_at_boundary, /* No special "default word boundary" for ASCII. */ + ascii_at_word_start, /* No special "default start of word" for ASCII. */ + ascii_at_word_end, /* No special "default end of a word" for ASCII. */ + at_boundary_always, /* No special "grapheme boundary" for ASCII. */ + ascii_is_line_sep, + ascii_at_line_start, + ascii_at_line_end, + ascii_possible_turkic, + ascii_all_cases, + ascii_simple_case_fold, + ascii_full_case_fold, + ascii_all_turkic_i, +}; + +/* Locale-specific. */ + +/* Checks whether a character has the given property. */ +static BOOL locale_has_property(RE_CODE property, Py_UCS4 ch) { + RE_UINT32 value; + RE_UINT32 v; + + value = property & 0xFFFF; + + if (ch > RE_LOCALE_MAX) + /* Outside the locale range. */ + return value == 0; + + switch (property >> 16) { + case RE_PROP_ALNUM >> 16: + v = isalnum(ch) != 0; + break; + case RE_PROP_ALPHA >> 16: + v = isalpha(ch) != 0; + break; + case RE_PROP_ANY >> 16: + v = 1; + break; + case RE_PROP_ASCII >> 16: + v = ch <= RE_ASCII_MAX; + break; + case RE_PROP_ASSIGNED >> 16: + v = ch <= RE_LOCALE_MAX; + break; + case RE_PROP_BLANK >> 16: + v = ch == '\t' || ch == ' '; + break; + case RE_PROP_GC: + switch (property) { + case RE_PROP_CNTRL: + v = iscntrl(ch) ? value : 0xFFFF; + break; + case RE_PROP_DIGIT: + v = isdigit(ch) ? value : 0xFFFF; + break; + case RE_PROP_GC_LL: + v = islower(ch) ? value : 0xFFFF; + break; + case RE_PROP_GC_LU: + v = isupper(ch) ? value : 0xFFFF; + break; + case RE_PROP_PUNCT: + v = ispunct(ch) ? value : 0xFFFF; + break; + default: + v = 0xFFFF; + break; + } + break; + case RE_PROP_GRAPH >> 16: + v = isgraph(ch) != 0; + break; + case RE_PROP_LOWER >> 16: + v = islower(ch) != 0; + break; + case RE_PROP_PRINT >> 16: + v = isprint(ch) != 0; + break; + case RE_PROP_SPACE >> 16: + v = isspace(ch) != 0; + break; + case RE_PROP_UPPER >> 16: + v = isupper(ch) != 0; + break; + case RE_PROP_WORD >> 16: + v = ch == '_' || isalnum(ch) != 0; + break; + case RE_PROP_XDIGIT >> 16: + v = re_get_hex_digit(ch) != 0; + break; + default: + v = 0; + break; + } + + return v == value; +} + +/* Converts a character to lowercase. */ +static Py_UCS4 locale_lower(Py_UCS4 ch) { + if (ch > RE_LOCALE_MAX) + return ch; + + return tolower(ch); +} + +/* Converts a character to uppercase. */ +static Py_UCS4 locale_upper(Py_UCS4 ch) { + if (ch > RE_LOCALE_MAX) + return ch; + + return toupper(ch); +} + +/* Checks whether the current text position is on a word boundary. */ +static BOOL locale_at_boundary(RE_State* state, Py_ssize_t text_pos) { + BOOL before; + BOOL after; + + before = text_pos > 0 && locale_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos - 1)); + after = text_pos < state->text_length && locale_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos)); + + return before != after; +} + +/* Checks whether the current text position is at the start of a word. */ +static BOOL locale_at_word_start(RE_State* state, Py_ssize_t text_pos) { + BOOL before; + BOOL after; + + before = text_pos > 0 && locale_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos - 1)); + after = text_pos < state->text_length && locale_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos)); + + return !before && after; +} + +/* Checks whether the current text position is at the end of a word. */ +static BOOL locale_at_word_end(RE_State* state, Py_ssize_t text_pos) { + BOOL before; + BOOL after; + + before = text_pos > 0 && locale_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos - 1)); + after = text_pos < state->text_length && locale_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos)); + + return before && !after; +} + +/* Checks whether a character could be Turkic (variants of I/i). */ +static BOOL locale_possible_turkic(Py_UCS4 ch) { + return toupper(ch) == 'I' || tolower(ch) == 'i'; +} + +/* Gets all the cases of a character. */ +static int locale_all_cases(Py_UCS4 ch, Py_UCS4* codepoints) { + int count; + Py_UCS4 other; + + count = 0; + + codepoints[count++] = ch; + + other = toupper(ch); + if (other != ch) + codepoints[count++] = other; + + other = tolower(ch); + if (other != ch) + codepoints[count++] = other; + + return count; +} + +/* Returns a character with its case folded. */ +static Py_UCS4 locale_simple_case_fold(Py_UCS4 ch) { + if (ch <= RE_LOCALE_MAX) + return tolower(ch); + + return ch; +} + +/* Returns a character with its case folded. */ +static int locale_full_case_fold(Py_UCS4 ch, Py_UCS4* folded) { + if (ch <= RE_LOCALE_MAX) + folded[0] = tolower(ch); + else + folded[0] = ch; + + return 1; +} + +/* Gets all the case variants of Turkic 'I'. The given character will be listed + * first. + */ +static int locale_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) { + int count; + Py_UCS4 other; + + count = 0; + + cases[count++] = ch; + + if (ch != 'I') + cases[count++] = 'I'; + + if (ch != 'i') + cases[count++] = 'i'; + + /* Uppercase 'i' will be either dotted (Turkic) or dotless (non-Turkic). */ + other = toupper('i'); + if (other != ch && other != 'I') + cases[count++] = other; + + /* Lowercase 'I' will be either dotless (Turkic) or dotted (non-Turkic). */ + other = tolower('I'); + if (other != ch && other != 'i') + cases[count++] = other; + + return count; +} + +/* The handlers for locale characters. */ +static RE_EncodingTable locale_encoding = { + locale_has_property, + locale_lower, + locale_upper, + locale_upper, /* For locale, titlecase is the same as uppercase. */ + locale_at_boundary, + locale_at_word_start, + locale_at_word_end, + locale_at_boundary, /* No special "default word boundary" for locale. */ + locale_at_word_start, /* No special "default start of a word" for locale. */ + locale_at_word_end, /* No special "default end of a word" for locale. */ + at_boundary_always, /* No special "grapheme boundary" for locale. */ + ascii_is_line_sep, /* Assume locale line separators are same as ASCII. */ + ascii_at_line_start, /* Assume locale line separators are same as ASCII. */ + ascii_at_line_end, /* Assume locale line separators are same as ASCII. */ + locale_possible_turkic, + locale_all_cases, + locale_simple_case_fold, + locale_full_case_fold, + locale_all_turkic_i, +}; + +/* Unicode-specific. */ + +/* Checks whether a Unicode character has the given property. */ +static BOOL unicode_has_property(RE_CODE property, Py_UCS4 ch) { + RE_UINT32 prop; + RE_UINT32 value; + RE_UINT32 v; + + prop = property >> 16; + if (prop >= sizeof(re_get_property) / sizeof(re_get_property[0])) + return FALSE; + + value = property & 0xFFFF; + v = re_get_property[prop](ch); + + if (v == value) + return TRUE; + + if (prop == RE_PROP_GC) { + switch (value) { + case RE_PROP_C: + return (RE_PROP_C_MASK & (1 << v)) != 0; + case RE_PROP_L: + return (RE_PROP_L_MASK & (1 << v)) != 0; + case RE_PROP_M: + return (RE_PROP_M_MASK & (1 << v)) != 0; + case RE_PROP_N: + return (RE_PROP_N_MASK & (1 << v)) != 0; + case RE_PROP_P: + return (RE_PROP_P_MASK & (1 << v)) != 0; + case RE_PROP_S: + return (RE_PROP_S_MASK & (1 << v)) != 0; + case RE_PROP_Z: + return (RE_PROP_Z_MASK & (1 << v)) != 0; + } + } + + return FALSE; +} + +/* Converts a Unicode character to lowercase. */ +static Py_UCS4 unicode_lower(Py_UCS4 ch) { + return Py_UNICODE_TOLOWER((Py_UCS4)ch); +} + +/* Converts a Unicode character to uppercase. */ +static Py_UCS4 unicode_upper(Py_UCS4 ch) { + return Py_UNICODE_TOUPPER((Py_UCS4)ch); +} + +/* Converts a Unicode character to titlecase. */ +static Py_UCS4 unicode_title(Py_UCS4 ch) { + return Py_UNICODE_TOTITLE((Py_UCS4)ch); +} + +/* Checks whether the current text position is on a word boundary. */ +static BOOL unicode_at_boundary(RE_State* state, Py_ssize_t text_pos) { + BOOL before; + BOOL after; + + before = text_pos > 0 && unicode_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos - 1)); + after = text_pos < state->text_length && unicode_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos)); + + return before != after; +} + +/* Checks whether the current text position is at the start of a word. */ +static BOOL unicode_at_word_start(RE_State* state, Py_ssize_t text_pos) { + BOOL before; + BOOL after; + + before = text_pos > 0 && unicode_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos - 1)); + after = text_pos < state->text_length && unicode_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos)); + + return !before && after; +} + +/* Checks whether the current text position is at the end of a word. */ +static BOOL unicode_at_word_end(RE_State* state, Py_ssize_t text_pos) { + BOOL before; + BOOL after; + + before = text_pos > 0 && unicode_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos - 1)); + after = text_pos < state->text_length && unicode_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos)); + + return before && !after; +} + +/* Checks whether a character is a Unicode vowel. + * + * Only a limited number are treated as vowels. + */ +Py_LOCAL_INLINE(BOOL) is_unicode_vowel(Py_UCS4 ch) { + switch (unicode_lower(ch)) { + case 'a': case 0xE0: case 0xE1: case 0xE2: + case 'e': case 0xE8: case 0xE9: case 0xEA: + case 'i': case 0xEC: case 0xED: case 0xEE: + case 'o': case 0xF2: case 0xF3: case 0xF4: + case 'u': case 0xF9: case 0xFA: case 0xFB: + return TRUE; + default: + return FALSE; + } +} + +/* Checks whether the current text position is on a default word boundary. + * + * The rules are defined here: + * http://www.unicode.org/reports/tr29/#Default_Word_Boundaries + */ +static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) { + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void* text; + int prop; + int prop_m1; + Py_ssize_t pos_m1; + Py_ssize_t pos_m2; + int prop_m2; + Py_ssize_t pos_p0; + int prop_p0; + Py_ssize_t pos_p1; + int prop_p1; + + /* Break at the start and end of the text. */ + if (text_pos <= 0 || text_pos >= state->text_length) + return TRUE; + + char_at = state->char_at; + text = state->text; + + prop = (int)re_get_word_break(char_at(text, text_pos)); + prop_m1 = (int)re_get_word_break(char_at(text, text_pos - 1)); + + /* Don't break within CRLF. */ + if (prop_m1 == RE_BREAK_CR && prop == RE_BREAK_LF) + return FALSE; + + /* Otherwise break before and after Newlines (including CR and LF). */ + if (prop_m1 == RE_BREAK_NEWLINE || prop_m1 == RE_BREAK_CR || prop_m1 == + RE_BREAK_LF || prop == RE_BREAK_NEWLINE || prop == RE_BREAK_CR || prop == + RE_BREAK_LF) + return TRUE; + + /* Get the property of the previous character. */ + pos_m1 = text_pos - 1; + prop_m1 = RE_BREAK_OTHER; + while (pos_m1 >= 0) { + prop_m1 = (int)re_get_word_break(char_at(text, pos_m1)); + if (prop_m1 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT) + break; + + --pos_m1; + } + + /* Get the property of the preceding character. */ + pos_m2 = pos_m1 - 1; + prop_m2 = RE_BREAK_OTHER; + while (pos_m2 >= 0) { + prop_m2 = (int)re_get_word_break(char_at(text, pos_m2)); + if (prop_m2 != RE_BREAK_EXTEND && prop_m2 != RE_BREAK_FORMAT) + break; + + --pos_m2; + } + + /* Get the property of the next character. */ + pos_p0 = text_pos; + prop_p0 = prop; + while (pos_p0 < state->text_length) { + prop_p0 = (int)re_get_word_break(char_at(text, pos_p0)); + if (prop_p0 != RE_BREAK_EXTEND && prop_p0 != RE_BREAK_FORMAT) + break; + + ++pos_p0; + } + + /* Get the property of the following character. */ + pos_p1 = pos_p0 + 1; + prop_p1 = RE_BREAK_OTHER; + while (pos_p1 < state->text_length) { + prop_p1 = (int)re_get_word_break(char_at(text, pos_p1)); + if (prop_p1 != RE_BREAK_EXTEND && prop_p1 != RE_BREAK_FORMAT) + break; + + ++pos_p1; + } + + /* Don't break between most letters. */ + if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) && + (prop_p0 == RE_BREAK_ALETTER || prop_p0 == RE_BREAK_HEBREWLETTER)) + return FALSE; + + /* Break between apostrophe and vowels (French, Italian). */ + if (pos_m1 >= 0 && char_at(text, pos_m1) == '\'' && + is_unicode_vowel(char_at(text, text_pos))) + return TRUE; + + /* Don't break letters across certain punctuation. */ + if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) && + (prop_p0 == RE_BREAK_MIDLETTER || prop_p0 == RE_BREAK_MIDNUMLET || + prop_p0 == RE_BREAK_SINGLEQUOTE) && (prop_p1 == RE_BREAK_ALETTER || + prop_p1 == RE_BREAK_HEBREWLETTER)) + return FALSE; + if ((prop_m2 == RE_BREAK_ALETTER || prop_m2 == RE_BREAK_HEBREWLETTER) && + (prop_m1 == RE_BREAK_MIDLETTER || prop_m1 == RE_BREAK_MIDNUMLET || + prop_m1 == RE_BREAK_SINGLEQUOTE) && (prop_p0 == RE_BREAK_ALETTER || + prop_p0 == RE_BREAK_HEBREWLETTER)) + return FALSE; + if (prop_m1 == RE_BREAK_HEBREWLETTER && prop_p0 == RE_BREAK_SINGLEQUOTE) + return FALSE; + if (prop_m1 == RE_BREAK_HEBREWLETTER && prop_p0 == RE_BREAK_DOUBLEQUOTE && + prop_p1 == RE_BREAK_HEBREWLETTER) + return FALSE; + if (prop_m2 == RE_BREAK_HEBREWLETTER && prop_m1 == RE_BREAK_DOUBLEQUOTE && + prop_p0 == RE_BREAK_HEBREWLETTER) + return FALSE; + + /* Don't break within sequences of digits, or digits adjacent to letters + * ("3a", or "A3"). + */ + if (prop_m1 == RE_BREAK_NUMERIC && prop_p0 == RE_BREAK_NUMERIC) + return FALSE; + if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) && + prop_p0 == RE_BREAK_NUMERIC) + return FALSE; + if (prop_m1 == RE_BREAK_NUMERIC && (prop_p0 == RE_BREAK_ALETTER || prop_p0 + == RE_BREAK_HEBREWLETTER)) + return FALSE; + + /* Don't break within sequences, such as "3.2" or "3,456.789". */ + if (prop_m2 == RE_BREAK_NUMERIC && (prop_m1 == RE_BREAK_MIDNUM || prop_m1 + == RE_BREAK_MIDNUMLET || prop_m1 == RE_BREAK_SINGLEQUOTE) && prop_p0 == + RE_BREAK_NUMERIC) + return FALSE; + if (prop_m1 == RE_BREAK_NUMERIC && (prop_p0 == RE_BREAK_MIDNUM || prop_p0 + == RE_BREAK_MIDNUMLET || prop_p0 == RE_BREAK_SINGLEQUOTE) && prop_p1 == + RE_BREAK_NUMERIC) + return FALSE; + + /* Don't break between Katakana. */ + if (prop_m1 == RE_BREAK_KATAKANA && prop_p0 == RE_BREAK_KATAKANA) + return FALSE; + + /* Don't break from extenders. */ + if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER || + prop_m1 == RE_BREAK_NUMERIC || prop_m1 == RE_BREAK_KATAKANA || prop_m1 == + RE_BREAK_EXTENDNUMLET) && prop_p0 == RE_BREAK_EXTENDNUMLET) + return FALSE; + if (prop_m1 == RE_BREAK_EXTENDNUMLET && (prop_p0 == RE_BREAK_ALETTER || + prop_p0 == RE_BREAK_HEBREWLETTER || prop_p0 == RE_BREAK_NUMERIC || + prop_p0 == RE_BREAK_KATAKANA)) + return FALSE; + + /* Don't break between regional indicator symbols. */ + if (prop_m1 == RE_BREAK_REGIONALINDICATOR && prop_p0 == + RE_BREAK_REGIONALINDICATOR) + return FALSE; + + /* Otherwise, break everywhere (including around ideographs). */ + return TRUE; +} + +/* Checks whether the current text position is at the start/end of a word. */ +Py_LOCAL_INLINE(BOOL) unicode_at_default_word_start_or_end(RE_State* state, + Py_ssize_t text_pos, BOOL at_start) { + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void* text; + BOOL before; + BOOL after; + Py_UCS4 char_0; + Py_UCS4 char_m1; + int prop; + int prop_m1; + Py_ssize_t pos_m1; + Py_UCS4 char_p1; + Py_ssize_t pos_p1; + int prop_p1; + Py_ssize_t pos_m2; + Py_UCS4 char_m2; + int prop_m2; + + char_at = state->char_at; + text = state->text; + + /* At the start or end of the text. */ + if (text_pos <= 0 || text_pos >= state->text_length) { + before = text_pos > 0 && unicode_has_property(RE_PROP_WORD, + char_at(state->text, text_pos - 1)); + after = text_pos < state->text_length && + unicode_has_property(RE_PROP_WORD, char_at(state->text, text_pos)); + + return before != at_start && after == at_start; + } + + char_0 = char_at(state->text, text_pos); + char_m1 = char_at(state->text, text_pos - 1); + prop = (int)re_get_word_break(char_0); + prop_m1 = (int)re_get_word_break(char_m1); + + /* No break within CRLF. */ + if (prop_m1 == RE_BREAK_CR && prop == RE_BREAK_LF) + return FALSE; + + /* Break before and after Newlines (including CR and LF). */ + if (prop_m1 == RE_BREAK_NEWLINE || prop_m1 == RE_BREAK_CR || prop_m1 == + RE_BREAK_LF || prop == RE_BREAK_NEWLINE || prop == RE_BREAK_CR || prop == + RE_BREAK_LF) { + before = unicode_has_property(RE_PROP_WORD, char_m1); + after = unicode_has_property(RE_PROP_WORD, char_0); + + return before != at_start && after == at_start; + } + + /* No break just before Format or Extend characters. */ + if (prop == RE_BREAK_EXTEND || prop == RE_BREAK_FORMAT) + return FALSE; + + /* Get the property of the previous character. */ + pos_m1 = text_pos - 1; + prop_m1 = RE_BREAK_OTHER; + while (pos_m1 >= 0) { + char_m1 = char_at(text, pos_m1); + prop_m1 = (int)re_get_word_break(char_m1); + if (prop_m1 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT) + break; + --pos_m1; + } + + /* No break between most letters. */ + if (prop_m1 == RE_BREAK_ALETTER && prop == RE_BREAK_ALETTER) + return FALSE; + + if (pos_m1 >= 0 && char_m1 == '\'' && is_unicode_vowel(char_0)) + return TRUE; + + pos_p1 = text_pos + 1; + prop_p1 = RE_BREAK_OTHER; + while (pos_p1 < state->text_length) { + char_p1 = char_at(text, pos_p1); + prop_p1 = (int)re_get_word_break(char_p1); + if (prop_p1 != RE_BREAK_EXTEND && prop_p1 != RE_BREAK_FORMAT) + break; + --pos_p1; + } + + /* No break letters across certain punctuation. */ + if (prop_m1 == RE_BREAK_ALETTER && (prop == RE_BREAK_MIDLETTER || prop == + RE_BREAK_MIDNUMLET) && prop_p1 == RE_BREAK_ALETTER) + return FALSE; + + pos_m2 = pos_m1 - 1; + prop_m2 = RE_BREAK_OTHER; + while (pos_m2 >= 0) { + char_m2 = char_at(text, pos_m2); + prop_m2 = (int)re_get_word_break(char_m2); + if (prop_m2 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT) + break; + --pos_m2; + } + + if (prop_m2 == RE_BREAK_ALETTER && (prop_m1 == RE_BREAK_MIDLETTER || + prop_m1 == RE_BREAK_MIDNUMLET) && prop == RE_BREAK_ALETTER) + return FALSE; + + /* No break within sequences of digits, or digits adjacent to letters + * ("3a", or "A3"). + */ + if ((prop_m1 == RE_BREAK_NUMERIC || prop_m1 == RE_BREAK_ALETTER) && prop == + RE_BREAK_NUMERIC) + return FALSE; + + if (prop_m1 == RE_BREAK_NUMERIC && prop == RE_BREAK_ALETTER) + return FALSE; + + /* No break within sequences, such as "3.2" or "3,456.789". */ + if (prop_m2 == RE_BREAK_NUMERIC && (prop_m1 == RE_BREAK_MIDNUM || prop_m1 + == RE_BREAK_MIDNUMLET) && prop == RE_BREAK_NUMERIC) + return FALSE; + + if (prop_m1 == RE_BREAK_NUMERIC && (prop == RE_BREAK_MIDNUM || prop == + RE_BREAK_MIDNUMLET) && prop_p1 == RE_BREAK_NUMERIC) + return FALSE; + + /* No break between Katakana. */ + if (prop_m1 == RE_BREAK_KATAKANA && prop == RE_BREAK_KATAKANA) + return FALSE; + + /* No break from extenders. */ + if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_NUMERIC || prop_m1 + == RE_BREAK_KATAKANA || prop_m1 == RE_BREAK_EXTENDNUMLET) && prop == + RE_BREAK_EXTENDNUMLET) + return FALSE; + + if (prop_m1 == RE_BREAK_EXTENDNUMLET && (prop == RE_BREAK_ALETTER || prop + == RE_BREAK_NUMERIC || prop == RE_BREAK_KATAKANA)) + return FALSE; + + /* Otherwise, break everywhere (including around ideographs). */ + before = unicode_has_property(RE_PROP_WORD, char_m1); + after = unicode_has_property(RE_PROP_WORD, char_0); + + return before != at_start && after == at_start; +} + +/* Checks whether the current text position is at the start of a word. */ +static BOOL unicode_at_default_word_start(RE_State* state, Py_ssize_t text_pos) + { + return unicode_at_default_word_start_or_end(state, text_pos, TRUE); +} + +/* Checks whether the current text position is at the end of a word. */ +static BOOL unicode_at_default_word_end(RE_State* state, Py_ssize_t text_pos) { + return unicode_at_default_word_start_or_end(state, text_pos, FALSE); +} + +/* Checks whether the current text position is on a grapheme boundary. + * + * The rules are defined here: + * http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries + */ +static BOOL unicode_at_grapheme_boundary(RE_State* state, Py_ssize_t text_pos) + { + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void* text; + int prop; + int prop_m1; + + /* Break at the start and end of the text. */ + if (text_pos <= 0 || text_pos >= state->text_length) + return TRUE; + + char_at = state->char_at; + text = state->text; + + prop = (int)re_get_grapheme_cluster_break(char_at(text, text_pos)); + prop_m1 = (int)re_get_grapheme_cluster_break(char_at(text, text_pos - 1)); + + /* Don't break within CRLF. */ + if (prop_m1 == RE_GBREAK_CR && prop == RE_GBREAK_LF) + return FALSE; + + /* Otherwise break before and after controls (including CR and LF). */ + if (prop_m1 == RE_GBREAK_CONTROL || prop_m1 == RE_GBREAK_CR || prop_m1 == + RE_GBREAK_LF || prop == RE_GBREAK_CONTROL || prop == RE_GBREAK_CR || prop + == RE_GBREAK_LF) + return TRUE; + + /* Don't break Hangul syllable sequences. */ + if (prop_m1 == RE_GBREAK_L && (prop == RE_GBREAK_L || prop == RE_GBREAK_V + || prop == RE_GBREAK_LV || prop == RE_GBREAK_LVT)) + return FALSE; + if ((prop_m1 == RE_GBREAK_LV || prop_m1 == RE_GBREAK_V) && (prop == + RE_GBREAK_V || prop == RE_GBREAK_T)) + return FALSE; + if ((prop_m1 == RE_GBREAK_LVT || prop_m1 == RE_GBREAK_T) && (prop == + RE_GBREAK_T)) + return FALSE; + + /* Don't break between regional indicator symbols. */ + if (prop_m1 == RE_GBREAK_REGIONALINDICATOR && prop == + RE_GBREAK_REGIONALINDICATOR) + return FALSE; + + /* Don't break just before Extend characters. */ + if (prop == RE_GBREAK_EXTEND) + return FALSE; + + /* Don't break before SpacingMarks, or after Prepend characters. */ + if (prop == RE_GBREAK_SPACINGMARK) + return FALSE; + + if (prop_m1 == RE_GBREAK_PREPEND) + return FALSE; + + /* Otherwise, break everywhere. */ + return TRUE; +} + +/* Checks whether a character is a line separator. */ +static BOOL unicode_is_line_sep(Py_UCS4 ch) { + return (0x0A <= ch && ch <= 0x0D) || ch == 0x85 || ch == 0x2028 || ch == + 0x2029; +} + +/* Checks whether the current text position is at the start of a line. */ +static BOOL unicode_at_line_start(RE_State* state, Py_ssize_t text_pos) { + Py_UCS4 ch; + + if (text_pos == 0) + return TRUE; + + ch = state->char_at(state->text, text_pos - 1); + + if (ch == 0x0D) + /* No line break inside CRLF. */ + return text_pos >= state->text_length || state->char_at(state->text, + text_pos) != 0x0A; + + return (0x0A <= ch && ch <= 0x0D) || ch == 0x85 || ch == 0x2028 || ch == + 0x2029; +} + +/* Checks whether the current text position is at the end of a line. */ +static BOOL unicode_at_line_end(RE_State* state, Py_ssize_t text_pos) { + Py_UCS4 ch; + + if (text_pos >= state->text_length) + return TRUE; + + ch = state->char_at(state->text, text_pos); + + if (ch == 0x0A) + /* No line break inside CRLF. */ + return text_pos >= 1 || state->char_at(state->text, text_pos - 1) != + 0x0D; + + return (0x0A <= ch && ch <= 0x0D) || ch == 0x85 || ch == 0x2028 || ch == + 0x2029; +} + +/* Checks whether a character could be Turkic (variants of I/i). */ +static BOOL unicode_possible_turkic(Py_UCS4 ch) { + return ch == 'I' || ch == 'i' || ch == 0x0130 || ch == 0x0131; +} + +/* Gets all the cases of a character. */ +static int unicode_all_cases(Py_UCS4 ch, Py_UCS4* codepoints) { + return re_get_all_cases(ch, codepoints); +} + +/* Returns a character with its case folded, unless it could be Turkic + * (variants of I/i). + */ +static Py_UCS4 unicode_simple_case_fold(Py_UCS4 ch) { + /* Is it a possible Turkic character? If so, pass it through unchanged. */ + if (ch == 'I' || ch == 'i' || ch == 0x0130 || ch == 0x0131) + return ch; + + return (Py_UCS4)re_get_simple_case_folding(ch); +} + +/* Returns a character with its case folded, unless it could be Turkic + * (variants of I/i). + */ +static int unicode_full_case_fold(Py_UCS4 ch, Py_UCS4* folded) { + /* Is it a possible Turkic character? If so, pass it through unchanged. */ + if (ch == 'I' || ch == 'i' || ch == 0x0130 || ch == 0x0131) { + folded[0] = ch; + return 1; + } + + return re_get_full_case_folding(ch, folded); +} + +/* Gets all the case variants of Turkic 'I'. */ +static int unicode_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) { + int count; + + count = 0; + + cases[count++] = ch; + + if (ch != 'I') + cases[count++] = 'I'; + + if (ch != 'i') + cases[count++] = 'i'; + + if (ch != 0x130) + cases[count++] = 0x130; + + if (ch != 0x131) + cases[count++] = 0x131; + + return count; + +} + +/* The handlers for Unicode characters. */ +static RE_EncodingTable unicode_encoding = { + unicode_has_property, + unicode_lower, + unicode_upper, + unicode_title, + unicode_at_boundary, + unicode_at_word_start, + unicode_at_word_end, + unicode_at_default_boundary, + unicode_at_default_word_start, + unicode_at_default_word_end, + unicode_at_grapheme_boundary, + unicode_is_line_sep, + unicode_at_line_start, + unicode_at_line_end, + unicode_possible_turkic, + unicode_all_cases, + unicode_simple_case_fold, + unicode_full_case_fold, + unicode_all_turkic_i, +}; + +Py_LOCAL_INLINE(PyObject*) get_object(char* module_name, char* object_name); + +/* Sets the error message. */ +Py_LOCAL_INLINE(void) set_error(int status, PyObject* object) { + TRACE(("<>\n")) + + if (!error_exception) + error_exception = get_object("_" RE_MODULE "_core", "error"); + + switch (status) { + case RE_ERROR_BACKTRACKING: + PyErr_SetString(error_exception, "too much backtracking"); + break; + case RE_ERROR_CONCURRENT: + PyErr_SetString(PyExc_ValueError, "concurrent not int or None"); + break; + case RE_ERROR_GROUP_INDEX_TYPE: + if (object) + PyErr_Format(PyExc_TypeError, + "group indices must be integers or strings, not %.200s", + object->ob_type->tp_name); + else + PyErr_Format(PyExc_TypeError, + "group indices must be integers or strings"); + break; + case RE_ERROR_ILLEGAL: + PyErr_SetString(PyExc_RuntimeError, "invalid RE code"); + break; + case RE_ERROR_INDEX: + PyErr_SetString(PyExc_TypeError, "string indices must be integers"); + break; + case RE_ERROR_INTERRUPTED: + /* An exception has already been raised, so let it fly. */ + break; + case RE_ERROR_INVALID_GROUP_REF: + PyErr_SetString(error_exception, "invalid group reference"); + break; + case RE_ERROR_MEMORY: + PyErr_NoMemory(); + break; + case RE_ERROR_NOT_STRING: + PyErr_Format(PyExc_TypeError, "expected string instance, %.200s found", + object->ob_type->tp_name); + break; + case RE_ERROR_NOT_UNICODE: + PyErr_Format(PyExc_TypeError, + "expected unicode instance, %.200s found", object->ob_type->tp_name); + break; + case RE_ERROR_NO_SUCH_GROUP: + PyErr_SetString(PyExc_IndexError, "no such group"); + break; + case RE_ERROR_REPLACEMENT: + PyErr_SetString(error_exception, "invalid replacement"); + break; + default: + /* Other error codes indicate compiler/engine bugs. */ + PyErr_SetString(PyExc_RuntimeError, + "internal error in regular expression engine"); + break; + } +} + +/* Allocates memory. + * + * Sets the Python error handler and returns NULL if the allocation fails. + */ +Py_LOCAL_INLINE(void*) re_alloc(size_t size) { + void* new_ptr; + + new_ptr = PyMem_Malloc(size); + if (!new_ptr) + set_error(RE_ERROR_MEMORY, NULL); + + return new_ptr; +} + +/* Reallocates memory. + * + * Sets the Python error handler and returns NULL if the reallocation fails. + */ +Py_LOCAL_INLINE(void*) re_realloc(void* ptr, size_t size) { + void* new_ptr; + + new_ptr = PyMem_Realloc(ptr, size); + if (!new_ptr) + set_error(RE_ERROR_MEMORY, NULL); + + return new_ptr; +} + +/* Deallocates memory. */ +Py_LOCAL_INLINE(void) re_dealloc(void* ptr) { + PyMem_Free(ptr); +} + +/* Releases the GIL if multithreading is enabled. */ +Py_LOCAL_INLINE(void) release_GIL(RE_SafeState* safe_state) { + if (safe_state->re_state->is_multithreaded) + safe_state->thread_state = PyEval_SaveThread(); +} + +/* Acquires the GIL if multithreading is enabled. */ +Py_LOCAL_INLINE(void) acquire_GIL(RE_SafeState* safe_state) { + if (safe_state->re_state->is_multithreaded) + PyEval_RestoreThread(safe_state->thread_state); +} + +/* Allocates memory, holding the GIL during the allocation. + * + * Sets the Python error handler and returns NULL if the allocation fails. + */ +Py_LOCAL_INLINE(void*) safe_alloc(RE_SafeState* safe_state, size_t size) { + void* new_ptr; + + acquire_GIL(safe_state); + + new_ptr = re_alloc(size); + + release_GIL(safe_state); + + return new_ptr; +} + +/* Reallocates memory, holding the GIL during the reallocation. + * + * Sets the Python error handler and returns NULL if the reallocation fails. + */ +Py_LOCAL_INLINE(void*) safe_realloc(RE_SafeState* safe_state, void* ptr, size_t + size) { + void* new_ptr; + + acquire_GIL(safe_state); + + new_ptr = re_realloc(ptr, size); + + release_GIL(safe_state); + + return new_ptr; +} + +/* Deallocates memory, holding the GIL during the deallocation. */ +Py_LOCAL_INLINE(void) safe_dealloc(RE_SafeState* safe_state, void* ptr) { + acquire_GIL(safe_state); + + re_dealloc(ptr); + + release_GIL(safe_state); +} + +/* Checks for KeyboardInterrupt, holding the GIL during the check. */ +Py_LOCAL_INLINE(BOOL) safe_check_signals(RE_SafeState* safe_state) { + BOOL result; + + acquire_GIL(safe_state); + + result = (BOOL)PyErr_CheckSignals(); + + release_GIL(safe_state); + + return result; +} + +/* Checks whether a character is in a range. */ +Py_LOCAL_INLINE(BOOL) in_range(Py_UCS4 lower, Py_UCS4 upper, Py_UCS4 ch) { + return lower <= ch && ch <= upper; +} + +/* Checks whether 2 characters are the same, ignoring case. */ +static BOOL same_char_ign(RE_EncodingTable* encoding, Py_UCS4 ch1, Py_UCS4 ch2) + { + Py_UCS4 cases[RE_MAX_CASES]; + int count; + int i; + + if (ch1 == ch2) + return TRUE; + + count = encoding->all_cases(ch1, cases); + + for (i = 1; i < count; i++) { + if (cases[i] == ch2) + return TRUE; + } + + return FALSE; +} + +/* Checks whether a character has or hasn't the given property, ignoring case. + */ +Py_LOCAL_INLINE(BOOL) has_property_ign(RE_EncodingTable* encoding, RE_CODE + property, Py_UCS4 ch) { + RE_UINT32 prop; + + prop = property >> 16; + + /* We need to do special handling of case-sensitive properties according to + * the 'encoding'. + */ + if (encoding == &unicode_encoding) { + /* We are working with Unicode. */ + if (property == RE_PROP_GC_LU || property == RE_PROP_GC_LL || property + == RE_PROP_GC_LT) { + RE_UINT32 value; + + value = re_get_general_category(ch); + + return value == RE_PROP_LU || value == RE_PROP_LL || value == + RE_PROP_LT; + } else if (prop == RE_PROP_UPPERCASE || prop == RE_PROP_LOWERCASE) + return (BOOL)re_get_cased(ch); + + /* The property is case-insensitive. */ + return unicode_has_property(property, ch); + } else if (encoding == &ascii_encoding) { + /* We are working with ASCII. */ + if (property == RE_PROP_GC_LU || property == RE_PROP_GC_LL || property + == RE_PROP_GC_LT) { + RE_UINT32 value; + + value = re_get_general_category(ch); + + return value == RE_PROP_LU || value == RE_PROP_LL || value == + RE_PROP_LT; + } else if (prop == RE_PROP_UPPERCASE || prop == RE_PROP_LOWERCASE) + return (BOOL)re_get_cased(ch); + + /* The property is case-insensitive. */ + return ascii_has_property(property, ch); + } else { + /* We are working with Locale. */ + if (property == RE_PROP_GC_LU || property == RE_PROP_GC_LL || property + == RE_PROP_GC_LT) + return (isupper(ch) || islower(ch)) != 0; + else if (prop == RE_PROP_UPPERCASE || prop == RE_PROP_LOWERCASE) + return (isupper(ch) || islower(ch)) != 0; + + /* The property is case-insensitive. */ + return locale_has_property(property, ch); + } +} + +/* Checks whether a character is in a range, ignoring case. */ +Py_LOCAL_INLINE(BOOL) in_range_ign(RE_EncodingTable* encoding, Py_UCS4 lower, + Py_UCS4 upper, Py_UCS4 ch) { + Py_UCS4 cases[RE_MAX_CASES]; + int count; + int i; + + count = encoding->all_cases(ch, cases); + + for (i = 0; i < count; i++) { + if (in_range(lower, upper, cases[i])) + return TRUE; + } + + return FALSE; +} + +Py_LOCAL_INLINE(BOOL) in_set_diff(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch); +Py_LOCAL_INLINE(BOOL) in_set_inter(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch); +Py_LOCAL_INLINE(BOOL) in_set_sym_diff(RE_EncodingTable* encoding, RE_Node* + node, Py_UCS4 ch); +Py_LOCAL_INLINE(BOOL) in_set_union(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch); + +/* Checks whether a character matches a set member. */ +Py_LOCAL_INLINE(BOOL) matches_member(RE_EncodingTable* encoding, RE_Node* + member, Py_UCS4 ch) { + switch (member->op) { + case RE_OP_CHARACTER: + /* values are: char_code */ + TRACE(("%s %d %d\n", re_op_text[member->op], member->match, + member->values[0])) + return ch == member->values[0]; + case RE_OP_PROPERTY: + /* values are: property */ + TRACE(("%s %d %d\n", re_op_text[member->op], member->match, + member->values[0])) + return encoding->has_property(member->values[0], ch); + case RE_OP_RANGE: + /* values are: lower, upper */ + TRACE(("%s %d %d %d\n", re_op_text[member->op], member->match, + member->values[0], member->values[1])) + return in_range(member->values[0], member->values[1], ch); + case RE_OP_SET_DIFF: + TRACE(("%s\n", re_op_text[member->op])) + return in_set_diff(encoding, member, ch); + case RE_OP_SET_INTER: + TRACE(("%s\n", re_op_text[member->op])) + return in_set_inter(encoding, member, ch); + case RE_OP_SET_SYM_DIFF: + TRACE(("%s\n", re_op_text[member->op])) + return in_set_sym_diff(encoding, member, ch); + case RE_OP_SET_UNION: + TRACE(("%s\n", re_op_text[member->op])) + return in_set_union(encoding, member, ch); + case RE_OP_STRING: + { + /* values are: char_code, char_code, ... */ + size_t i; + TRACE(("%s %d %d\n", re_op_text[member->op], member->match, + member->value_count)) + + for (i = 0; i < member->value_count; i++) { + if (ch == member->values[i]) + return TRUE; + } + return FALSE; + } + default: + return FALSE; + } +} + +/* Checks whether a character matches a set member, ignoring case. */ +Py_LOCAL_INLINE(BOOL) matches_member_ign(RE_EncodingTable* encoding, RE_Node* + member, int case_count, Py_UCS4* cases) { + int i; + + for (i = 0; i < case_count; i++) { + switch (member->op) { + case RE_OP_CHARACTER: + /* values are: char_code */ + TRACE(("%s %d %d\n", re_op_text[member->op], member->match, + member->values[0])) + if (cases[i] == member->values[0]) + return TRUE; + break; + case RE_OP_PROPERTY: + /* values are: property */ + TRACE(("%s %d %d\n", re_op_text[member->op], member->match, + member->values[0])) + if (encoding->has_property(member->values[0], cases[i])) + return TRUE; + break; + case RE_OP_RANGE: + /* values are: lower, upper */ + TRACE(("%s %d %d %d\n", re_op_text[member->op], member->match, + member->values[0], member->values[1])) + if (in_range(member->values[0], member->values[1], cases[i])) + return TRUE; + break; + case RE_OP_SET_DIFF: + TRACE(("%s\n", re_op_text[member->op])) + if (in_set_diff(encoding, member, cases[i])) + return TRUE; + break; + case RE_OP_SET_INTER: + TRACE(("%s\n", re_op_text[member->op])) + if (in_set_inter(encoding, member, cases[i])) + return TRUE; + break; + case RE_OP_SET_SYM_DIFF: + TRACE(("%s\n", re_op_text[member->op])) + if (in_set_sym_diff(encoding, member, cases[i])) + return TRUE; + break; + case RE_OP_SET_UNION: + TRACE(("%s\n", re_op_text[member->op])) + if (in_set_union(encoding, member, cases[i])) + return TRUE; + break; + case RE_OP_STRING: + { + size_t j; + TRACE(("%s %d %d\n", re_op_text[member->op], member->match, + member->value_count)) + + for (j = 0; j < member->value_count; j++) { + if (cases[i] == member->values[j]) + return TRUE; + } + break; + } + default: + return TRUE; + } + } + + return FALSE; +} + +/* Checks whether a character is in a set difference. */ +Py_LOCAL_INLINE(BOOL) in_set_diff(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch) { + RE_Node* member; + + member = node->nonstring.next_2.node; + + if (matches_member(encoding, member, ch) != member->match) + return FALSE; + + member = member->next_1.node; + + while (member) { + if (matches_member(encoding, member, ch) == member->match) + return FALSE; + + member = member->next_1.node; + } + + return TRUE; +} + +/* Checks whether a character is in a set difference, ignoring case. */ +Py_LOCAL_INLINE(BOOL) in_set_diff_ign(RE_EncodingTable* encoding, RE_Node* + node, int case_count, Py_UCS4* cases) { + RE_Node* member; + + member = node->nonstring.next_2.node; + + if (matches_member_ign(encoding, member, case_count, cases) != + member->match) + return FALSE; + + member = member->next_1.node; + + while (member) { + if (matches_member_ign(encoding, member, case_count, cases) == + member->match) + return FALSE; + + member = member->next_1.node; + } + + return TRUE; +} + +/* Checks whether a character is in a set intersection. */ +Py_LOCAL_INLINE(BOOL) in_set_inter(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch) { + RE_Node* member; + + member = node->nonstring.next_2.node; + + while (member) { + if (matches_member(encoding, member, ch) != member->match) + return FALSE; + + member = member->next_1.node; + } + + return TRUE; +} + +/* Checks whether a character is in a set intersection, ignoring case. */ +Py_LOCAL_INLINE(BOOL) in_set_inter_ign(RE_EncodingTable* encoding, RE_Node* + node, int case_count, Py_UCS4* cases) { + RE_Node* member; + + member = node->nonstring.next_2.node; + + while (member) { + if (matches_member_ign(encoding, member, case_count, cases) != + member->match) + return FALSE; + + member = member->next_1.node; + } + + return TRUE; +} + +/* Checks whether a character is in a set symmetric difference. */ +Py_LOCAL_INLINE(BOOL) in_set_sym_diff(RE_EncodingTable* encoding, RE_Node* + node, Py_UCS4 ch) { + RE_Node* member; + BOOL result; + + member = node->nonstring.next_2.node; + + result = FALSE; + + while (member) { + if (matches_member(encoding, member, ch) == member->match) + result = !result; + + member = member->next_1.node; + } + + return result; +} + +/* Checks whether a character is in a set symmetric difference, ignoring case. + */ +Py_LOCAL_INLINE(BOOL) in_set_sym_diff_ign(RE_EncodingTable* encoding, RE_Node* + node, int case_count, Py_UCS4* cases) { + RE_Node* member; + BOOL result; + + member = node->nonstring.next_2.node; + + result = FALSE; + + while (member) { + if (matches_member_ign(encoding, member, case_count, cases) == + member->match) + result = !result; + + member = member->next_1.node; + } + + return result; +} + +/* Checks whether a character is in a set union. */ +Py_LOCAL_INLINE(BOOL) in_set_union(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch) { + RE_Node* member; + + member = node->nonstring.next_2.node; + + while (member) { + if (matches_member(encoding, member, ch) == member->match) + return TRUE; + + member = member->next_1.node; + } + + return FALSE; +} + +/* Checks whether a character is in a set union, ignoring case. */ +Py_LOCAL_INLINE(BOOL) in_set_union_ign(RE_EncodingTable* encoding, RE_Node* + node, int case_count, Py_UCS4* cases) { + RE_Node* member; + + member = node->nonstring.next_2.node; + + while (member) { + if (matches_member_ign(encoding, member, case_count, cases) == + member->match) + return TRUE; + + member = member->next_1.node; + } + + return FALSE; +} + +/* Checks whether a character is in a set. */ +Py_LOCAL_INLINE(BOOL) in_set(RE_EncodingTable* encoding, RE_Node* node, Py_UCS4 + ch) { + switch (node->op) { + case RE_OP_SET_DIFF: + case RE_OP_SET_DIFF_REV: + return in_set_diff(encoding, node, ch); + case RE_OP_SET_INTER: + case RE_OP_SET_INTER_REV: + return in_set_inter(encoding, node, ch); + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_SYM_DIFF_REV: + return in_set_sym_diff(encoding, node, ch); + case RE_OP_SET_UNION: + case RE_OP_SET_UNION_REV: + return in_set_union(encoding, node, ch); + } + + return FALSE; +} + +/* Checks whether a character is in a set, ignoring case. */ +Py_LOCAL_INLINE(BOOL) in_set_ign(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch) { + Py_UCS4 cases[RE_MAX_CASES]; + int case_count; + + case_count = encoding->all_cases(ch, cases); + + switch (node->op) { + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_DIFF_IGN_REV: + return in_set_diff_ign(encoding, node, case_count, cases); + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_INTER_IGN_REV: + return in_set_inter_ign(encoding, node, case_count, cases); + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_SYM_DIFF_IGN_REV: + return in_set_sym_diff_ign(encoding, node, case_count, cases); + case RE_OP_SET_UNION_IGN: + case RE_OP_SET_UNION_IGN_REV: + return in_set_union_ign(encoding, node, case_count, cases); + } + + return FALSE; +} + +/* Resets a guard list. */ +Py_LOCAL_INLINE(void) reset_guard_list(RE_GuardList* guard_list) { + guard_list->count = 0; + guard_list->last_text_pos = -1; +} + +/* Initialises the state for a match. */ +Py_LOCAL_INLINE(void) init_match(RE_State* state) { + size_t i; + + /* Reset the backtrack. */ + state->current_backtrack_block = &state->backtrack_block; + state->current_backtrack_block->count = 0; + state->current_saved_groups = state->first_saved_groups; + state->backtrack = NULL; + state->search_anchor = state->text_pos; + state->match_pos = state->text_pos; + + /* Reset the guards for the repeats. */ + for (i = 0; i < state->pattern->repeat_count; i++) { + reset_guard_list(&state->repeats[i].body_guard_list); + reset_guard_list(&state->repeats[i].tail_guard_list); + } + + /* Reset the guards for the fuzzy sections. */ + for (i = 0; i < state->pattern->fuzzy_count; i++) { + reset_guard_list(&state->fuzzy_guards[i].body_guard_list); + reset_guard_list(&state->fuzzy_guards[i].tail_guard_list); + } + + for (i = 0; i < state->pattern->true_group_count; i++) { + RE_GroupData* group; + + group = &state->groups[i]; + group->span.start = -1; + group->span.end = -1; + group->capture_count = 0; + group->current_capture = -1; + } + + /* Reset the guards for the group calls. */ + for (i = 0; i < state->pattern->call_ref_info_count; i++) + reset_guard_list(&state->group_call_guard_list[i]); + + /* Clear the counts and cost for matching. */ + memset(state->fuzzy_info.counts, 0, sizeof(state->fuzzy_info.counts)); + state->fuzzy_info.total_cost = 0; + state->total_errors = 0; + state->total_cost = 0; + state->too_few_errors = FALSE; + state->capture_change = 0; + state->iterations = 0; +} + +/* Adds a new backtrack entry. */ +Py_LOCAL_INLINE(BOOL) add_backtrack(RE_SafeState* safe_state, RE_UINT8 op) { + RE_State* state; + RE_BacktrackBlock* current; + + state = safe_state->re_state; + + current = state->current_backtrack_block; + if (current->count >= current->capacity) { + if (!current->next) { + RE_BacktrackBlock* next; + + /* Is there too much backtracking? */ + if (state->backtrack_allocated >= RE_MAX_BACKTRACK_ALLOC) + return FALSE; + + next = (RE_BacktrackBlock*)safe_alloc(safe_state, + sizeof(RE_BacktrackBlock)); + if (!next) + return FALSE; + + next->previous = current; + next->next = NULL; + next->capacity = RE_BACKTRACK_BLOCK_SIZE; + current->next = next; + + state->backtrack_allocated += RE_BACKTRACK_BLOCK_SIZE; + } + + current = current->next; + current->count = 0; + state->current_backtrack_block = current; + } + + state->backtrack = ¤t->items[current->count++]; + state->backtrack->op = op; + + return TRUE; +} + +/* Gets the last backtrack entry. + * + * It'll never be called when there are _no_ entries. + */ +Py_LOCAL_INLINE(RE_BacktrackData*) last_backtrack(RE_State* state) { + RE_BacktrackBlock* current; + + current = state->current_backtrack_block; + state->backtrack = ¤t->items[current->count - 1]; + + return state->backtrack; +} + +/* Discards the last backtrack entry. + * + * It'll never be called to discard the _only_ entry. + */ +Py_LOCAL_INLINE(void) discard_backtrack(RE_State* state) { + RE_BacktrackBlock* current; + + current = state->current_backtrack_block; + --current->count; + if (current->count == 0 && current->previous) + state->current_backtrack_block = current->previous; +} + +/* Copies a repeat guard list. */ +Py_LOCAL_INLINE(BOOL) copy_guard_data(RE_SafeState* safe_state, RE_GuardList* + dst, RE_GuardList* src) { + if (dst->capacity < src->count) { + RE_GuardSpan* new_spans; + + if (!safe_state) + return FALSE; + + dst->capacity = src->count; + new_spans = (RE_GuardSpan*)safe_realloc(safe_state, dst->spans, + dst->capacity * sizeof(RE_GuardSpan)); + if (!new_spans) + return FALSE; + + dst->spans = new_spans; + } + + dst->count = src->count; + memmove(dst->spans, src->spans, dst->count * sizeof(RE_GuardSpan)); + + dst->last_text_pos = -1; + + return TRUE; +} + +/* Copies a repeat. */ +Py_LOCAL_INLINE(BOOL) copy_repeat_data(RE_SafeState* safe_state, RE_RepeatData* + dst, RE_RepeatData* src) { + if (!copy_guard_data(safe_state, &dst->body_guard_list, + &src->body_guard_list) || !copy_guard_data(safe_state, + &dst->tail_guard_list, &src->tail_guard_list)) { + safe_dealloc(safe_state, dst->body_guard_list.spans); + safe_dealloc(safe_state, dst->tail_guard_list.spans); + + return FALSE; + } + + dst->count = src->count; + dst->start = src->start; + dst->capture_change = src->capture_change; + + return TRUE; +} + +/* Pushes a return node onto the group call stack. */ +Py_LOCAL_INLINE(BOOL) push_group_return(RE_SafeState* safe_state, RE_Node* + return_node) { + RE_State* state; + PatternObject* pattern; + RE_GroupCallFrame* frame; + + state = safe_state->re_state; + pattern = state->pattern; + + if (state->current_group_call_frame && + state->current_group_call_frame->next) + /* Advance to the next allocated frame. */ + frame = state->current_group_call_frame->next; + else if (!state->current_group_call_frame && state->first_group_call_frame) + /* Advance to the first allocated frame. */ + frame = state->first_group_call_frame; + else { + /* Create a new frame. */ + frame = (RE_GroupCallFrame*)safe_alloc(safe_state, + sizeof(RE_GroupCallFrame)); + if (!frame) + return FALSE; + + frame->groups = (RE_GroupData*)safe_alloc(safe_state, + pattern->true_group_count * sizeof(RE_GroupData)); + frame->repeats = (RE_RepeatData*)safe_alloc(safe_state, + pattern->repeat_count * sizeof(RE_RepeatData)); + if (!frame->groups || !frame->repeats) { + safe_dealloc(safe_state, frame->groups); + safe_dealloc(safe_state, frame->repeats); + safe_dealloc(safe_state, frame); + + return FALSE; + } + + memset(frame->groups, 0, pattern->true_group_count * + sizeof(RE_GroupData)); + memset(frame->repeats, 0, pattern->repeat_count * + sizeof(RE_RepeatData)); + + frame->previous = state->current_group_call_frame; + frame->next = NULL; + + if (frame->previous) + frame->previous->next = frame; + else + state->first_group_call_frame = frame; + } + + frame->node = return_node; + + /* Push the groups and guards. */ + if (return_node) { + size_t g; + size_t r; + + for (g = 0; g < pattern->true_group_count; g++) { + frame->groups[g].span = state->groups[g].span; + frame->groups[g].current_capture = + state->groups[g].current_capture; + } + + for (r = 0; r < pattern->repeat_count; r++) { + if (!copy_repeat_data(safe_state, &frame->repeats[r], + &state->repeats[r])) + return FALSE; + } + } + + state->current_group_call_frame = frame; + + return TRUE; +} + +/* Pops a return node from the group call stack. */ +Py_LOCAL_INLINE(RE_Node*) pop_group_return(RE_State* state) { + RE_GroupCallFrame* frame; + + frame = state->current_group_call_frame; + + /* Pop the groups and repeats. */ + if (frame->node) { + PatternObject* pattern; + size_t g; + size_t r; + + pattern = state->pattern; + + for (g = 0; g < pattern->true_group_count; g++) { + state->groups[g].span = frame->groups[g].span; + state->groups[g].current_capture = + frame->groups[g].current_capture; + } + + for (r = 0; r < pattern->repeat_count; r++) + copy_repeat_data(NULL, &state->repeats[r], &frame->repeats[r]); + } + + /* Withdraw to previous frame. */ + state->current_group_call_frame = frame->previous; + + return frame->node; +} + +/* Returns the return node from the top of the group call stack. */ +Py_LOCAL_INLINE(RE_Node*) top_group_return(RE_State* state) { + RE_GroupCallFrame* frame; + + frame = state->current_group_call_frame; + + return frame->node; +} + +/* Checks whether the node is a firstset. */ +Py_LOCAL_INLINE(BOOL) is_firstset(RE_Node* node) { + if (node->step != 0) + return FALSE; + + switch (node->op) { + case RE_OP_ANY: + case RE_OP_ANY_ALL: + case RE_OP_ANY_ALL_REV: + case RE_OP_ANY_REV: + case RE_OP_ANY_U: + case RE_OP_ANY_U_REV: + case RE_OP_CHARACTER: + case RE_OP_CHARACTER_IGN: + case RE_OP_CHARACTER_IGN_REV: + case RE_OP_CHARACTER_REV: + case RE_OP_PROPERTY: + case RE_OP_PROPERTY_IGN: + case RE_OP_PROPERTY_IGN_REV: + case RE_OP_PROPERTY_REV: + case RE_OP_RANGE: + case RE_OP_RANGE_IGN: + case RE_OP_RANGE_IGN_REV: + case RE_OP_RANGE_REV: + case RE_OP_SET_DIFF: + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_DIFF_IGN_REV: + case RE_OP_SET_DIFF_REV: + case RE_OP_SET_INTER: + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION: + case RE_OP_SET_UNION_IGN: + case RE_OP_SET_UNION_IGN_REV: + case RE_OP_SET_UNION_REV: + return TRUE; + default: + return FALSE; + } +} + +/* Locates the start node for testing ahead. */ +Py_LOCAL_INLINE(RE_Node*) locate_test_start(RE_Node* node) { + for (;;) { + switch (node->op) { + case RE_OP_BOUNDARY: + switch (node->next_1.node->op) { + case RE_OP_STRING: + case RE_OP_STRING_FLD: + case RE_OP_STRING_FLD_REV: + case RE_OP_STRING_IGN: + case RE_OP_STRING_IGN_REV: + case RE_OP_STRING_REV: + return node->next_1.node; + default: + return node; + } + case RE_OP_CALL_REF: + case RE_OP_END_GROUP: + case RE_OP_START_GROUP: + node = node->next_1.node; + break; + case RE_OP_GREEDY_REPEAT: + case RE_OP_LAZY_REPEAT: + if (node->values[1] == 0) + return node; + node = node->next_1.node; + break; + case RE_OP_GREEDY_REPEAT_ONE: + case RE_OP_LAZY_REPEAT_ONE: + if (node->values[1] == 0) + return node; + return node->nonstring.next_2.node; + case RE_OP_LOOKAROUND: + node = node->next_1.node; + break; + default: + if (is_firstset(node)) { + switch (node->next_1.node->op) { + case RE_OP_END_OF_STRING: + case RE_OP_START_OF_STRING: + return node->next_1.node; + } + } + + return node; + } + } +} + +/* Checks whether a character matches any of a set of case characters. */ +Py_LOCAL_INLINE(BOOL) any_case(Py_UCS4 ch, int case_count, Py_UCS4* cases) { + int i; + + for (i = 0; i < case_count; i++) { + if (ch == cases[i]) + return TRUE; + } + + return FALSE; +} + +/* Matches many ANYs. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY(RE_State* state, Py_ssize_t + text_pos, Py_ssize_t limit, BOOL match) { + void* text; + + text = state->text; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && (text_ptr[0] != '\n') == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && (text_ptr[0] != '\n') == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && (text_ptr[0] != '\n') == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many ANYs backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_REV(RE_State* state, Py_ssize_t + text_pos, Py_ssize_t limit, BOOL match) { + void* text; + + text = state->text; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && (text_ptr[-1] != '\n') == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && (text_ptr[-1] != '\n') == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && (text_ptr[-1] != '\n') == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many ANY_Us. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U(RE_State* state, Py_ssize_t + text_pos, Py_ssize_t limit, BOOL match) { + BOOL (*is_line_sep)(Py_UCS4 ch); + void* text; + + is_line_sep = state->encoding->is_line_sep; + text = state->text; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && is_line_sep(text_ptr[0]) != match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && is_line_sep(text_ptr[0]) != match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && is_line_sep(text_ptr[0]) != match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many ANY_Us backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U_REV(RE_State* state, Py_ssize_t + text_pos, Py_ssize_t limit, BOOL match) { + BOOL (*is_line_sep)(Py_UCS4 ch); + void* text; + + is_line_sep = state->encoding->is_line_sep; + text = state->text; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && is_line_sep(text_ptr[-1]) != match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && is_line_sep(text_ptr[-1]) != match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && is_line_sep(text_ptr[-1]) != match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many CHARACTERs. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + Py_UCS4 ch; + + text = state->text; + match = node->match == match; + ch = node->values[0]; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && (text_ptr[0] == ch) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && (text_ptr[0] == ch) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && (text_ptr[0] == ch) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many CHARACTERs, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_IGN(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + Py_UCS4 cases[RE_MAX_CASES]; + int case_count; + + text = state->text; + match = node->match == match; + case_count = state->encoding->all_cases(node->values[0], cases); + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && any_case(text_ptr[0], case_count, cases) + == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && any_case(text_ptr[0], case_count, cases) + == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && any_case(text_ptr[0], case_count, cases) + == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many CHARACTERs backwards, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_IGN_REV(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + Py_UCS4 cases[RE_MAX_CASES]; + int case_count; + + text = state->text; + match = node->match == match; + case_count = state->encoding->all_cases(node->values[0], cases); + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && any_case(text_ptr[-1], case_count, + cases) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && any_case(text_ptr[-1], case_count, + cases) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && any_case(text_ptr[-1], case_count, + cases) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many CHARACTERs backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_REV(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + Py_UCS4 ch; + + text = state->text; + match = node->match == match; + ch = node->values[0]; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && (text_ptr[-1] == ch) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && (text_ptr[-1] == ch) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && (text_ptr[-1] == ch) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many PROPERTYs. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + BOOL (*has_property)(RE_CODE property, Py_UCS4 ch); + RE_CODE property; + + text = state->text; + match = node->match == match; + has_property = state->encoding->has_property; + property = node->values[0]; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && has_property(property, text_ptr[0]) == + match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && has_property(property, text_ptr[0]) == + match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && has_property(property, text_ptr[0]) == + match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many PROPERTYs, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + RE_CODE property; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + property = node->values[0]; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && has_property_ign(encoding, property, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && has_property_ign(encoding, property, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && has_property_ign(encoding, property, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many PROPERTYs backwards, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN_REV(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + RE_CODE property; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + property = node->values[0]; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && has_property_ign(encoding, property, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && has_property_ign(encoding, property, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && has_property_ign(encoding, property, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many PROPERTYs backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_REV(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + BOOL (*has_property)(RE_CODE property, Py_UCS4 ch); + RE_CODE property; + + text = state->text; + match = node->match == match; + has_property = state->encoding->has_property; + property = node->values[0]; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && has_property(property, text_ptr[-1]) == + match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && has_property(property, text_ptr[-1]) == + match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && has_property(property, text_ptr[-1]) == + match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many RANGEs. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + Py_UCS4 lower; + Py_UCS4 upper; + + text = state->text; + match = node->match == match; + lower = node->values[0]; + upper = node->values[1]; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && in_range(lower, upper, text_ptr[0]) == + match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && in_range(lower, upper, text_ptr[0]) == + match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && in_range(lower, upper, text_ptr[0]) == + match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many RANGEs, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + Py_UCS4 lower; + Py_UCS4 upper; + + text = state->text; + encoding = state->encoding; + match = node->match == match; + lower = node->values[0]; + upper = node->values[1]; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && in_range_ign(encoding, lower, upper, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && in_range_ign(encoding, lower, upper, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && in_range_ign(encoding, lower, upper, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many RANGEs backwards, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN_REV(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + Py_UCS4 lower; + Py_UCS4 upper; + + text = state->text; + encoding = state->encoding; + match = node->match == match; + lower = node->values[0]; + upper = node->values[1]; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && in_range_ign(encoding, lower, upper, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && in_range_ign(encoding, lower, upper, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && in_range_ign(encoding, lower, upper, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many RANGEs backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_REV(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + Py_UCS4 lower; + Py_UCS4 upper; + + text = state->text; + match = node->match == match; + lower = node->values[0]; + upper = node->values[1]; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && in_range(lower, upper, text_ptr[-1]) == + match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && in_range(lower, upper, text_ptr[-1]) == + match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && in_range(lower, upper, text_ptr[-1]) == + match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many SETs. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_SET(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && in_set(encoding, node, text_ptr[0]) == + match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && in_set(encoding, node, text_ptr[0]) == + match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && in_set(encoding, node, text_ptr[0]) == + match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many SETs, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && in_set_ign(encoding, node, text_ptr[0]) + == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && in_set_ign(encoding, node, text_ptr[0]) + == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && in_set_ign(encoding, node, text_ptr[0]) + == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many SETs backwards, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN_REV(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && in_set_ign(encoding, node, text_ptr[-1]) + == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && in_set_ign(encoding, node, text_ptr[-1]) + == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && in_set_ign(encoding, node, text_ptr[-1]) + == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many SETs backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && in_set(encoding, node, text_ptr[-1]) == + match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && in_set(encoding, node, text_ptr[-1]) == + match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && in_set(encoding, node, text_ptr[-1]) == + match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Counts a repeated character pattern. */ +Py_LOCAL_INLINE(size_t) count_one(RE_State* state, RE_Node* node, Py_ssize_t + text_pos, size_t max_count) { + if (max_count < 1) + return 0; + + switch (node->op) { + case RE_OP_ANY: + max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + + return match_many_ANY(state, text_pos, text_pos + max_count, TRUE) - + text_pos; + case RE_OP_ANY_ALL: + max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + + return max_count; + case RE_OP_ANY_ALL_REV: + max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + + return max_count; + case RE_OP_ANY_REV: + max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + + return text_pos - match_many_ANY_REV(state, text_pos, text_pos - + max_count, TRUE); + case RE_OP_ANY_U: + max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + + return match_many_ANY_U(state, text_pos, text_pos + max_count, TRUE) - + text_pos; + case RE_OP_ANY_U_REV: + max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + + return text_pos - match_many_ANY_U_REV(state, text_pos, text_pos - + max_count, TRUE); + case RE_OP_CHARACTER: + max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + + return match_many_CHARACTER(state, node, text_pos, text_pos + + max_count, TRUE) - text_pos; + case RE_OP_CHARACTER_IGN: + max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + + return match_many_CHARACTER_IGN(state, node, text_pos, text_pos + + max_count, TRUE) - text_pos; + case RE_OP_CHARACTER_IGN_REV: + max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + + return text_pos - match_many_CHARACTER_IGN_REV(state, node, text_pos, + text_pos - max_count, TRUE); + case RE_OP_CHARACTER_REV: + max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + + return text_pos - match_many_CHARACTER_REV(state, node, text_pos, + text_pos - max_count, TRUE); + case RE_OP_PROPERTY: + max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + + return match_many_PROPERTY(state, node, text_pos, text_pos + max_count, + TRUE) - text_pos; + case RE_OP_PROPERTY_IGN: + max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + + return match_many_PROPERTY_IGN(state, node, text_pos, text_pos + + max_count, TRUE) - text_pos; + case RE_OP_PROPERTY_IGN_REV: + max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + + return text_pos - match_many_PROPERTY_IGN_REV(state, node, text_pos, + text_pos - max_count, TRUE); + case RE_OP_PROPERTY_REV: + max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + + return text_pos - match_many_PROPERTY_REV(state, node, text_pos, + text_pos - max_count, TRUE); + case RE_OP_RANGE: + max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + + return match_many_RANGE(state, node, text_pos, text_pos + max_count, + TRUE) - text_pos; + case RE_OP_RANGE_IGN: + max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + + return match_many_RANGE_IGN(state, node, text_pos, text_pos + + max_count, TRUE) - text_pos; + case RE_OP_RANGE_IGN_REV: + max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + + return text_pos - match_many_RANGE_IGN_REV(state, node, text_pos, + text_pos - max_count, TRUE); + case RE_OP_RANGE_REV: + max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + + return text_pos - match_many_RANGE_REV(state, node, text_pos, text_pos + - max_count, TRUE); + case RE_OP_SET_DIFF: + case RE_OP_SET_INTER: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_UNION: + max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + + return match_many_SET(state, node, text_pos, text_pos + max_count, + TRUE) - text_pos; + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_UNION_IGN: + max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + + return match_many_SET_IGN(state, node, text_pos, text_pos + max_count, + TRUE) - text_pos; + case RE_OP_SET_DIFF_IGN_REV: + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_UNION_IGN_REV: + max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + + return text_pos - match_many_SET_IGN_REV(state, node, text_pos, + text_pos - max_count, TRUE); + case RE_OP_SET_DIFF_REV: + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION_REV: + max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + + return text_pos - match_many_SET_REV(state, node, text_pos, text_pos - + max_count, TRUE); + } + + return 0; +} + +/* Tries to match a character pattern. */ +Py_LOCAL_INLINE(BOOL) match_one(RE_State* state, RE_Node* node, Py_ssize_t + text_pos) { + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void* text; + + char_at = state->char_at; + text = state->text; + + switch (node->op) { + case RE_OP_ANY: + return text_pos < state->slice_end && char_at(text, text_pos) != '\n'; + case RE_OP_ANY_ALL: + return text_pos < state->slice_end; + case RE_OP_ANY_ALL_REV: + return text_pos > state->slice_start; + case RE_OP_ANY_REV: + return text_pos > state->slice_start && char_at(text, text_pos - 1) != + '\n'; + case RE_OP_ANY_U: + return text_pos < state->slice_end && + !state->encoding->is_line_sep(char_at(text, text_pos)); + case RE_OP_ANY_U_REV: + return text_pos > state->slice_start && + !state->encoding->is_line_sep(char_at(text, text_pos - 1)); + case RE_OP_CHARACTER: + return text_pos < state->slice_end && (char_at(text, text_pos) == + node->values[0]) == node->match; + case RE_OP_CHARACTER_IGN: + return text_pos < state->slice_end && same_char_ign(state->encoding, + char_at(text, text_pos), node->values[0]) == node->match; + case RE_OP_CHARACTER_IGN_REV: + return text_pos > state->slice_start && same_char_ign(state->encoding, + char_at(text, text_pos - 1), node->values[0]) == node->match; + case RE_OP_CHARACTER_REV: + return text_pos > state->slice_start && (char_at(text, text_pos - 1) == + node->values[0]) == node->match; + case RE_OP_PROPERTY: + return text_pos < state->slice_end && + state->encoding->has_property(node->values[0], char_at(text, + text_pos)) == node->match; + case RE_OP_PROPERTY_IGN: + return text_pos < state->slice_end && has_property_ign(state->encoding, + node->values[0], char_at(text, text_pos)) == node->match; + case RE_OP_PROPERTY_IGN_REV: + return text_pos > state->slice_start && + has_property_ign(state->encoding, node->values[0], char_at(text, + text_pos - 1)) == node->match; + case RE_OP_PROPERTY_REV: + return text_pos > state->slice_start && + state->encoding->has_property(node->values[0], char_at(text, text_pos + - 1)) == node->match; + case RE_OP_RANGE: + return text_pos < state->slice_end && in_range(node->values[0], + node->values[1], char_at(text, text_pos)) == node->match; + case RE_OP_RANGE_IGN: + return text_pos < state->slice_end && in_range_ign(state->encoding, + node->values[0], node->values[1], char_at(text, text_pos)) == + node->match; + case RE_OP_RANGE_IGN_REV: + return text_pos > state->slice_start && in_range_ign(state->encoding, + node->values[0], node->values[1], char_at(text, text_pos - 1)) == + node->match; + case RE_OP_RANGE_REV: + return text_pos > state->slice_start && in_range(node->values[0], + node->values[1], char_at(text, text_pos - 1)) == node->match; + case RE_OP_SET_DIFF: + case RE_OP_SET_INTER: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_UNION: + return text_pos < state->slice_end && in_set(state->encoding, node, + char_at(text, text_pos)) == node->match; + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_UNION_IGN: + return text_pos < state->slice_end && in_set_ign(state->encoding, node, + char_at(text, text_pos)) == node->match; + case RE_OP_SET_DIFF_IGN_REV: + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_UNION_IGN_REV: + return text_pos > state->slice_start && in_set_ign(state->encoding, + node, char_at(text, text_pos - 1)) == node->match; + case RE_OP_SET_DIFF_REV: + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION_REV: + return text_pos > state->slice_start && in_set(state->encoding, node, + char_at(text, text_pos - 1)) == node->match; + } + + return FALSE; +} + +/* Performs a simple string search. */ +Py_LOCAL_INLINE(Py_ssize_t) simple_string_search(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit) { + void* text; + size_t length; + RE_CODE* values; + Py_UCS4 first_char; + + text = state->text; + length = node->value_count; + values = node->values; + first_char = values[0]; + limit -= length; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr <= limit_ptr) { + if (text_ptr[0] == first_char) { + size_t pos; + + pos = 1; + while (pos < length && text_ptr[pos] == values[pos]) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS1*)text; + } + + ++text_ptr; + } + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr <= limit_ptr) { + if (text_ptr[0] == first_char) { + size_t pos; + + pos = 1; + while (pos < length && text_ptr[pos] == values[pos]) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS2*)text; + } + + ++text_ptr; + } + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr <= limit_ptr) { + if (text_ptr[0] == first_char) { + size_t pos; + + pos = 1; + while (pos < length && text_ptr[pos] == values[pos]) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS4*)text; + } + + ++text_ptr; + } + break; + } + } + + return -1; +} + +/* Performs a simple string search, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) simple_string_search_ign(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit) { + void* text; + size_t length; + RE_CODE* values; + RE_EncodingTable* encoding; + Py_UCS4 cases[RE_MAX_CASES]; + int case_count; + + text = state->text; + length = node->value_count; + values = node->values; + encoding = state->encoding; + case_count = encoding->all_cases(values[0], cases); + limit -= length; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr <= limit_ptr) { + if (any_case(text_ptr[0], case_count, cases)) { + size_t pos; + + pos = 1; + while (pos < length && same_char_ign(encoding, text_ptr[pos], + values[pos])) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS1*)text; + } + + ++text_ptr; + } + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr <= limit_ptr) { + if (any_case(text_ptr[0], case_count, cases)) { + size_t pos; + + pos = 1; + while (pos < length && same_char_ign(encoding, text_ptr[pos], + values[pos])) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS2*)text; + } + + ++text_ptr; + } + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr <= limit_ptr) { + if (any_case(text_ptr[0], case_count, cases)) { + size_t pos; + + pos = 1; + while (pos < length && same_char_ign(encoding, text_ptr[pos], + values[pos])) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS4*)text; + } + + ++text_ptr; + } + break; + } + } + + return -1; +} + +/* Performs a simple string search backwards, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) simple_string_search_ign_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit) { + void* text; + size_t length; + RE_CODE* values; + RE_EncodingTable* encoding; + Py_UCS4 cases[RE_MAX_CASES]; + int case_count; + + text = state->text; + length = node->value_count; + values = node->values; + encoding = state->encoding; + case_count = encoding->all_cases(values[0], cases); + text_pos -= length; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr >= limit_ptr) { + if (any_case(text_ptr[0], case_count, cases)) { + size_t pos; + + pos = 1; + while (pos < length && same_char_ign(encoding, text_ptr[pos], + values[pos])) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS1*)text + length; + } + + --text_ptr; + } + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr >= limit_ptr) { + if (any_case(text_ptr[0], case_count, cases)) { + size_t pos; + + pos = 1; + while (pos < length && same_char_ign(encoding, text_ptr[pos], + values[pos])) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS2*)text + length; + } + + --text_ptr; + } + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr >= limit_ptr) { + if (any_case(text_ptr[0], case_count, cases)) { + size_t pos; + + pos = 1; + while (pos < length && same_char_ign(encoding, text_ptr[pos], + values[pos])) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS4*)text + length; + } + + --text_ptr; + } + break; + } + } + + return -1; +} + +/* Performs a simple string search backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) simple_string_search_rev(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit) { + void* text; + size_t length; + RE_CODE* values; + Py_UCS4 first_char; + + text = state->text; + length = node->value_count; + values = node->values; + first_char = values[0]; + text_pos -= length; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr >= limit_ptr) { + if (text_ptr[0] == first_char) { + size_t pos; + + pos = 1; + while (pos < length && text_ptr[pos] == values[pos]) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS1*)text + length; + } + + --text_ptr; + } + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr >= limit_ptr) { + if (text_ptr[0] == first_char) { + size_t pos; + + pos = 1; + while (pos < length && text_ptr[pos] == values[pos]) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS2*)text + length; + } + + --text_ptr; + } + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr >= limit_ptr) { + if (text_ptr[0] == first_char) { + size_t pos; + + pos = 1; + while (pos < length && text_ptr[pos] == values[pos]) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS4*)text + length; + } + + --text_ptr; + } + break; + } + } + + return -1; +} + +/* Performs a Boyer-Moore fast string search. */ +Py_LOCAL_INLINE(Py_ssize_t) fast_string_search(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit) { + void* text; + Py_ssize_t length; + RE_CODE* values; + Py_ssize_t* bad_character_offset; + Py_ssize_t* good_suffix_offset; + Py_ssize_t last_pos; + Py_UCS4 check_char; + + text = state->text; + length = (Py_ssize_t)node->value_count; + values = node->values; + good_suffix_offset = node->string.good_suffix_offset; + bad_character_offset = node->string.bad_character_offset; + last_pos = length - 1; + check_char = values[last_pos]; + limit -= length; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr <= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[last_pos]; + if (ch == check_char) { + Py_ssize_t pos; + + pos = last_pos - 1; + while (pos >= 0 && text_ptr[pos] == values[pos]) + --pos; + + if (pos < 0) + return text_ptr - (Py_UCS1*)text; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr <= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[last_pos]; + if (ch == check_char) { + Py_ssize_t pos; + + pos = last_pos - 1; + while (pos >= 0 && text_ptr[pos] == values[pos]) + --pos; + + if (pos < 0) + return text_ptr - (Py_UCS2*)text; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr <= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[last_pos]; + if (ch == check_char) { + Py_ssize_t pos; + + pos = last_pos - 1; + while (pos >= 0 && text_ptr[pos] == values[pos]) + --pos; + + if (pos < 0) + return text_ptr - (Py_UCS4*)text; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + } + + return -1; +} + +/* Performs a Boyer-Moore fast string search, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit) { + RE_EncodingTable* encoding; + void* text; + Py_ssize_t length; + RE_CODE* values; + Py_ssize_t* bad_character_offset; + Py_ssize_t* good_suffix_offset; + Py_ssize_t last_pos; + Py_UCS4 cases[RE_MAX_CASES]; + int case_count; + + encoding = state->encoding; + text = state->text; + length = (Py_ssize_t)node->value_count; + values = node->values; + good_suffix_offset = node->string.good_suffix_offset; + bad_character_offset = node->string.bad_character_offset; + last_pos = length - 1; + case_count = encoding->all_cases(values[last_pos], cases); + limit -= length; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr <= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[last_pos]; + if (any_case(ch, case_count, cases)) { + Py_ssize_t pos; + + pos = last_pos - 1; + while (pos >= 0 && same_char_ign(encoding, + text_ptr[pos], values[pos])) + --pos; + + if (pos < 0) + return text_ptr - (Py_UCS1*)text; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr <= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[last_pos]; + if (any_case(ch, case_count, cases)) { + Py_ssize_t pos; + + pos = last_pos - 1; + while (pos >= 0 && same_char_ign(encoding, + text_ptr[pos], values[pos])) + --pos; + + if (pos < 0) + return text_ptr - (Py_UCS2*)text; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr <= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[last_pos]; + if (any_case(ch, case_count, cases)) { + Py_ssize_t pos; + + pos = last_pos - 1; + while (pos >= 0 && same_char_ign(encoding, + text_ptr[pos], values[pos])) + --pos; + + if (pos < 0) + return text_ptr - (Py_UCS4*)text; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + } + + return -1; +} + +/* Performs a Boyer-Moore fast string search backwards, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit) { + RE_EncodingTable* encoding; + void* text; + Py_ssize_t length; + RE_CODE* values; + Py_ssize_t* bad_character_offset; + Py_ssize_t* good_suffix_offset; + Py_UCS4 cases[RE_MAX_CASES]; + int case_count; + + encoding = state->encoding; + text = state->text; + length = (Py_ssize_t)node->value_count; + values = node->values; + good_suffix_offset = node->string.good_suffix_offset; + bad_character_offset = node->string.bad_character_offset; + case_count = encoding->all_cases(values[0], cases); + text_pos -= length; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr >= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[0]; + if (any_case(ch, case_count, cases)) { + Py_ssize_t pos; + + pos = 1; + while (pos < length && same_char_ign(encoding, + text_ptr[pos], values[pos])) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS1*)text + length; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr >= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[0]; + if (any_case(ch, case_count, cases)) { + Py_ssize_t pos; + + pos = 1; + while (pos < length && same_char_ign(encoding, + text_ptr[pos], values[pos])) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS2*)text + length; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr >= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[0]; + if (any_case(ch, case_count, cases)) { + Py_ssize_t pos; + + pos = 1; + while (pos < length && same_char_ign(encoding, + text_ptr[pos], values[pos])) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS4*)text + length; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + } + + return -1; +} + +/* Performs a Boyer-Moore fast string search backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_rev(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit) { + void* text; + Py_ssize_t length; + RE_CODE* values; + Py_ssize_t* bad_character_offset; + Py_ssize_t* good_suffix_offset; + Py_UCS4 check_char; + + text = state->text; + length = (Py_ssize_t)node->value_count; + values = node->values; + good_suffix_offset = node->string.good_suffix_offset; + bad_character_offset = node->string.bad_character_offset; + check_char = values[0]; + text_pos -= length; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr >= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[0]; + if (ch == check_char) { + Py_ssize_t pos; + + pos = 1; + while (pos < length && text_ptr[pos] == values[pos]) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS1*)text + length; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr >= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[0]; + if (ch == check_char) { + Py_ssize_t pos; + + pos = 1; + while (pos < length && text_ptr[pos] == values[pos]) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS2*)text + length; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr >= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[0]; + if (ch == check_char) { + Py_ssize_t pos; + + pos = 1; + while (pos < length && text_ptr[pos] == values[pos]) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS4*)text + length; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + } + + return -1; +} + +/* Check whether 2 characters are the same. */ +static BOOL same_char(RE_EncodingTable* encoding, Py_UCS4 ch1, Py_UCS4 ch2) { + return ch1 == ch2; +} + +/* Build the tables for a Boyer-Moore fast string search. */ +Py_LOCAL_INLINE(BOOL) build_fast_tables(RE_EncodingTable* encoding, RE_Node* + node, BOOL ignore) { + Py_ssize_t length; + RE_CODE* values; + Py_ssize_t* bad; + Py_ssize_t* good; + Py_UCS4 ch; + Py_ssize_t last_pos; + Py_ssize_t pos; + BOOL (*is_same_char)(RE_EncodingTable* encoding, Py_UCS4 ch1, Py_UCS4 ch2); + Py_ssize_t suffix_len; + BOOL saved_start; + Py_ssize_t s; + Py_ssize_t i; + Py_ssize_t s_start; + Py_UCS4 codepoints[RE_MAX_CASES]; + + length = (Py_ssize_t)node->value_count; + + if (length < RE_MIN_FAST_LENGTH) + return TRUE; + + values = node->values; + bad = (Py_ssize_t*)re_alloc(256 * sizeof(bad[0])); + good = (Py_ssize_t*)re_alloc(length * sizeof(good[0])); + + if (!bad || !good) { + re_dealloc(bad); + re_dealloc(good); + + return FALSE; + } + + for (ch = 0; ch < 0x100; ch++) + bad[ch] = length; + + last_pos = length - 1; + + for (pos = 0; pos < last_pos; pos++) { + Py_ssize_t offset; + + offset = last_pos - pos; + ch = values[pos]; + if (ignore) { + int count; + int i; + + count = encoding->all_cases(ch, codepoints); + + for (i = 0; i < count; i++) + bad[codepoints[i] & 0xFF] = offset; + } else + bad[ch & 0xFF] = offset; + } + + is_same_char = ignore ? same_char_ign : same_char; + + suffix_len = 2; + pos = length - suffix_len; + saved_start = FALSE; + s = pos - 1; + i = suffix_len - 1; + s_start = s; + while (pos >= 0) { + /* Look for another occurrence of the suffix. */ + while (i > 0) { + /* Have we dropped off the end of the string? */ + if (s + i < 0) + break; + + if (is_same_char(encoding, values[s + i], values[pos + i])) + /* It still matches. */ + --i; + else { + /* Start again further along. */ + --s; + i = suffix_len - 1; + } + } + + if (s >= 0 && is_same_char(encoding, values[s], values[pos])) { + /* We haven't dropped off the end of the string, and the suffix has + * matched this far, so this is a good starting point for the next + * iteration. + */ + --s; + if (!saved_start) { + s_start = s; + saved_start = TRUE; + } + } else { + /* Calculate the suffix offset. */ + good[pos] = pos - s; + + /* Extend the suffix and start searching for _this_ one. */ + --pos; + ++suffix_len; + + /* Where's a good place to start searching? */ + if (saved_start) { + s = s_start; + saved_start = FALSE; + } else + --s; + + /* Can we short-circuit the searching? */ + if (s < 0) + break; + } + + i = suffix_len - 1; + } + + /* Fill-in any remaining entries. */ + while (pos >= 0) { + good[pos] = pos - s; + --pos; + --s; + } + + node->string.bad_character_offset = bad; + node->string.good_suffix_offset = good; + + return TRUE; +} + +/* Build the tables for a Boyer-Moore fast string search backwards. */ +Py_LOCAL_INLINE(BOOL) build_fast_tables_rev(RE_EncodingTable* encoding, + RE_Node* node, BOOL ignore) { + Py_ssize_t length; + RE_CODE* values; + Py_ssize_t* bad; + Py_ssize_t* good; + Py_UCS4 ch; + Py_ssize_t last_pos; + Py_ssize_t pos; + BOOL (*is_same_char)(RE_EncodingTable* encoding, Py_UCS4 ch1, Py_UCS4 ch2); + Py_ssize_t suffix_len; + BOOL saved_start; + Py_ssize_t s; + Py_ssize_t i; + Py_ssize_t s_start; + Py_UCS4 codepoints[RE_MAX_CASES]; + + length = (Py_ssize_t)node->value_count; + + if (length < RE_MIN_FAST_LENGTH) + return TRUE; + + values = node->values; + bad = (Py_ssize_t*)re_alloc(256 * sizeof(bad[0])); + good = (Py_ssize_t*)re_alloc(length * sizeof(good[0])); + + if (!bad || !good) { + re_dealloc(bad); + re_dealloc(good); + + return FALSE; + } + + for (ch = 0; ch < 0x100; ch++) + bad[ch] = -length; + + last_pos = length - 1; + + for (pos = last_pos; pos > 0; pos--) { + Py_ssize_t offset; + + offset = -pos; + ch = values[pos]; + if (ignore) { + int count; + int i; + + count = encoding->all_cases(ch, codepoints); + + for (i = 0; i < count; i++) + bad[codepoints[i] & 0xFF] = offset; + } else + bad[ch & 0xFF] = offset; + } + + is_same_char = ignore ? same_char_ign : same_char; + + suffix_len = 2; + pos = suffix_len - 1; + saved_start = FALSE; + s = pos + 1; + i = suffix_len - 1; + s_start = s; + while (pos < length) { + /* Look for another occurrence of the suffix. */ + while (i > 0) { + /* Have we dropped off the end of the string? */ + if (s - i >= length) + break; + + if (is_same_char(encoding, values[s - i], values[pos - i])) + /* It still matches. */ + --i; + else { + /* Start again further along. */ + ++s; + i = suffix_len - 1; + } + } + + if (s < length && is_same_char(encoding, values[s], values[pos])) { + /* We haven't dropped off the end of the string, and the suffix has + * matched this far, so this is a good starting point for the next + * iteration. + */ + ++s; + if (!saved_start) { + s_start = s; + saved_start = TRUE; + } + } else { + /* Calculate the suffix offset. */ + good[pos] = pos - s; + + /* Extend the suffix and start searching for _this_ one. */ + ++pos; + ++suffix_len; + + /* Where's a good place to start searching? */ + if (saved_start) { + s = s_start; + saved_start = FALSE; + } else + ++s; + + /* Can we short-circuit the searching? */ + if (s >= length) + break; + } + + i = suffix_len - 1; + } + + /* Fill-in any remaining entries. */ + while (pos < length) { + good[pos] = pos - s; + ++pos; + ++s; + } + + node->string.bad_character_offset = bad; + node->string.good_suffix_offset = good; + + return TRUE; +} + +/* Performs a string search. */ +Py_LOCAL_INLINE(Py_ssize_t) string_search(RE_SafeState* safe_state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit) { + RE_State* state; + Py_ssize_t found_pos; + + state = safe_state->re_state; + + /* Can the string fit the available space? */ + if (text_pos + (Py_ssize_t)node->value_count > limit) + return -1; + + /* Has the node been initialised for fast searching, if necessary? */ + if (!(node->status & RE_STATUS_FAST_INIT)) { + /* Ideally the pattern should immutable and shareable across threads. + * Internally, however, it isn't. For safety we need to hold the GIL. + */ + acquire_GIL(safe_state); + + /* Double-check because of multithreading. */ + if (!(node->status & RE_STATUS_FAST_INIT)) { + build_fast_tables(state->encoding, node, FALSE); + node->status |= RE_STATUS_FAST_INIT; + } + + release_GIL(safe_state); + } + + if (node->string.bad_character_offset) + found_pos = fast_string_search(state, node, text_pos, limit); + else + found_pos = simple_string_search(state, node, text_pos, limit); + + return found_pos; +} + +/* Performs a string search, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) string_search_fld(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, Py_ssize_t* new_pos) { + RE_State* state; + RE_EncodingTable* encoding; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void* text; + RE_CODE* values; + Py_ssize_t start_pos; + int folded_pos; + int folded_len; + Py_ssize_t length; + Py_ssize_t string_pos; + Py_UCS4 folded[RE_MAX_FOLDED]; + int case_count; + Py_UCS4 cases[RE_MAX_CASES]; + + state = safe_state->re_state; + encoding = state->encoding; + full_case_fold = encoding->full_case_fold; + char_at = state->char_at; + text = state->text; + + values = node->values; + start_pos = text_pos; + folded_pos = 0; + folded_len = 0; + length = node->value_count; + string_pos = 0; + + /* We'll special-case the first character of the string. */ + case_count = encoding->all_cases(values[0], cases); + + while (string_pos < length || folded_pos < folded_len) { + if (folded_pos >= folded_len) { +fetch: + if (text_pos >= limit) + return -1; + + folded_len = full_case_fold(char_at(text, text_pos), folded); + folded_pos = 0; + } + + if (string_pos == 0) { + int i; + + for (i = 0; i < case_count; i++) { + if (folded[0] == cases[i]) + goto match; + } + + ++start_pos; + text_pos = start_pos; + goto fetch; + } else if (same_char_ign(encoding, values[string_pos], + folded[folded_pos])) { +match: + ++string_pos; + ++folded_pos; + + if (folded_pos >= folded_len) + ++text_pos; + } else { + ++start_pos; + text_pos = start_pos; + folded_pos = 0; + folded_len = 0; + string_pos = 0; + } + } + + if (new_pos) + *new_pos = text_pos; + + return start_pos; +} + +/* Performs a string search backwards, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) string_search_fld_rev(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, Py_ssize_t* new_pos) { + RE_State* state; + RE_EncodingTable* encoding; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void* text; + RE_CODE* values; + Py_ssize_t start_pos; + int folded_pos; + int folded_len; + Py_ssize_t length; + Py_ssize_t string_pos; + Py_UCS4 folded[RE_MAX_FOLDED]; + int case_count; + Py_UCS4 cases[RE_MAX_CASES]; + + state = safe_state->re_state; + encoding = state->encoding; + full_case_fold = encoding->full_case_fold; + char_at = state->char_at; + text = state->text; + + values = node->values; + start_pos = text_pos; + folded_pos = 0; + folded_len = 0; + length = node->value_count; + string_pos = length; + + /* We'll special-case the last character of the string. */ + case_count = encoding->all_cases(values[length - 1], cases); + + while (string_pos > 0 || folded_pos > 0) { + if (folded_pos <= 0) { +fetch: + if (text_pos <= limit) + return -1; + + folded_len = full_case_fold(char_at(text, text_pos - 1), folded); + folded_pos = folded_len; + } + + if (string_pos == length) { + int i; + + for (i = 0; i < case_count; i++) { + if (folded[folded_len - 1] == cases[i]) + goto match; + } + + --start_pos; + text_pos = start_pos; + goto fetch; + } else if (same_char_ign(encoding, values[string_pos - 1], + folded[folded_pos - 1])) { +match: + --string_pos; + --folded_pos; + + if (folded_pos <= 0) + --text_pos; + } else { + --start_pos; + text_pos = start_pos; + folded_pos = 0; + folded_len = 0; + string_pos = length; + } + } + + if (new_pos) + *new_pos = text_pos; + + return start_pos; +} + +/* Performs a string search, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) string_search_ign(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit) { + RE_State* state; + Py_ssize_t found_pos; + + state = safe_state->re_state; + + /* Can the string fit the available space? */ + if (text_pos + (Py_ssize_t)node->value_count > limit) + return -1; + + /* Has the node been initialised for fast searching, if necessary? */ + if (!(node->status & RE_STATUS_FAST_INIT)) { + /* Ideally the pattern should immutable and shareable across threads. + * Internally, however, it isn't. For safety we need to hold the GIL. + */ + acquire_GIL(safe_state); + + /* Double-check because of multithreading. */ + if (!(node->status & RE_STATUS_FAST_INIT)) { + build_fast_tables(state->encoding, node, TRUE); + node->status |= RE_STATUS_FAST_INIT; + } + + release_GIL(safe_state); + } + + if (node->string.bad_character_offset) + found_pos = fast_string_search_ign(state, node, text_pos, limit); + else + found_pos = simple_string_search_ign(state, node, text_pos, limit); + + return found_pos; +} + +/* Performs a string search backwards, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) string_search_ign_rev(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit) { + RE_State* state; + Py_ssize_t found_pos; + + state = safe_state->re_state; + + /* Can the string fit the available space? */ + if (text_pos - (Py_ssize_t)node->value_count < limit) + return -1; + + /* Has the node been initialised for fast searching, if necessary? */ + if (!(node->status & RE_STATUS_FAST_INIT)) { + /* Ideally the pattern should immutable and shareable across threads. + * Internally, however, it isn't. For safety we need to hold the GIL. + */ + acquire_GIL(safe_state); + + /* Double-check because of multithreading. */ + if (!(node->status & RE_STATUS_FAST_INIT)) { + build_fast_tables_rev(state->encoding, node, TRUE); + node->status |= RE_STATUS_FAST_INIT; + } + + release_GIL(safe_state); + } + + if (node->string.bad_character_offset) + found_pos = fast_string_search_ign_rev(state, node, text_pos, limit); + else + found_pos = simple_string_search_ign_rev(state, node, text_pos, limit); + + return found_pos; +} + +/* Performs a string search backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) string_search_rev(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit) { + RE_State* state; + Py_ssize_t found_pos; + + state = safe_state->re_state; + + /* Can the string fit the available space? */ + if (text_pos - (Py_ssize_t)node->value_count < limit) + return -1; + + /* Has the node been initialised for fast searching, if necessary? */ + if (!(node->status & RE_STATUS_FAST_INIT)) { + /* Ideally the pattern should immutable and shareable across threads. + * Internally, however, it isn't. For safety we need to hold the GIL. + */ + acquire_GIL(safe_state); + + /* Double-check because of multithreading. */ + if (!(node->status & RE_STATUS_FAST_INIT)) { + build_fast_tables_rev(state->encoding, node, FALSE); + node->status |= RE_STATUS_FAST_INIT; + } + + release_GIL(safe_state); + } + + if (node->string.bad_character_offset) + found_pos = fast_string_search_rev(state, node, text_pos, limit); + else + found_pos = simple_string_search_rev(state, node, text_pos, limit); + + return found_pos; +} + +/* Returns how many characters there could be before full case-folding. */ +Py_LOCAL_INLINE(size_t) possible_unfolded_length(size_t length) { + if (length == 0) + return 0; + + if (length < RE_MAX_FOLDED) + return 1; + + return length / RE_MAX_FOLDED; +} + +/* Tries a match at the current text position. + * + * Returns TRUE and the next node and text position if the match succeeds. + */ +Py_LOCAL_INLINE(BOOL) try_match(RE_State* state, RE_NextNode* next, Py_ssize_t + text_pos, RE_Position* next_position) { + RE_Node* test; + void* text; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + + test = next->test; + + if (test->status & RE_STATUS_FUZZY) { + next_position->node = next->node; + next_position->text_pos = text_pos; + return TRUE; + } + + text = state->text; + char_at = state->char_at; + + switch (test->op) { + case RE_OP_ANY: /* Any character, except a newline. */ + if (text_pos >= state->slice_end || char_at(text, text_pos) == '\n') + return FALSE; + break; + case RE_OP_ANY_ALL: /* Any character at all. */ + if (text_pos >= state->slice_end) + return FALSE; + break; + case RE_OP_ANY_ALL_REV: /* Any character at all. */ + if (text_pos <= state->slice_start) + return FALSE; + break; + case RE_OP_ANY_REV: /* Any character, except a newline. */ + if (text_pos <= state->slice_start || char_at(text, text_pos - 1) == + '\n') + return FALSE; + break; + case RE_OP_ANY_U: /* Any character, except a line separator. */ + if (text_pos >= state->slice_end || + state->encoding->is_line_sep(char_at(text, text_pos))) + return FALSE; + break; + case RE_OP_ANY_U_REV: /* Any character, except a line separator. */ + if (text_pos <= state->slice_start || + state->encoding->is_line_sep(char_at(text, text_pos - 1))) + return FALSE; + break; + case RE_OP_BOUNDARY: /* At a word boundary. */ + if (state->encoding->at_boundary(state, text_pos) != test->match) + return FALSE; + break; + case RE_OP_BRANCH: /* 2-way branch. */ + if (!try_match(state, &test->next_1, text_pos, next_position) && + !try_match(state, &test->nonstring.next_2, text_pos, next_position)) + return FALSE; + break; + case RE_OP_CHARACTER: /* A character literal. */ + if (text_pos >= state->slice_end || (char_at(text, text_pos) == + test->values[0]) != test->match) + return FALSE; + break; + case RE_OP_CHARACTER_IGN: /* A character literal, ignoring case. */ + if (text_pos >= state->slice_end || same_char_ign(state->encoding, + char_at(text, text_pos), test->values[0]) != test->match) + return FALSE; + break; + case RE_OP_CHARACTER_IGN_REV: /* A character literal, ignoring case. */ + if (text_pos <= state->slice_start || same_char_ign(state->encoding, + char_at(text, text_pos - 1), test->values[0]) != test->match) + return FALSE; + break; + case RE_OP_CHARACTER_REV: /* A character literal. */ + if (text_pos <= state->slice_start || (char_at(text, text_pos - 1) == + test->values[0]) != test->match) + return FALSE; + break; + case RE_OP_DEFAULT_BOUNDARY: /* At a default word boundary. */ + if (state->encoding->at_default_boundary(state, text_pos) != + test->match) + return FALSE; + break; + case RE_OP_DEFAULT_END_OF_WORD: /* At a default end of a word. */ + if (state->encoding->at_default_word_end(state, text_pos) != + test->match) + return FALSE; + break; + case RE_OP_DEFAULT_START_OF_WORD: /* At a default start of a word. */ + if (state->encoding->at_default_word_start(state, text_pos) != + test->match) + return FALSE; + break; + case RE_OP_END_OF_LINE: /* At the end of a line. */ + if (text_pos != state->text_length && char_at(text, text_pos) != '\n') + return FALSE; + break; + case RE_OP_END_OF_LINE_U: /* At the end of a line. */ + if (!state->encoding->at_line_end(state, text_pos)) + return FALSE; + break; + case RE_OP_END_OF_STRING: /* At the end of the string. */ + if (text_pos != state->text_length) + return FALSE; + break; + case RE_OP_END_OF_STRING_LINE: /* At the end of the string or the final newline. */ + if (text_pos != state->text_length && text_pos != state->final_newline) + return FALSE; + break; + case RE_OP_END_OF_STRING_LINE_U: /* At the end of the string or the final newline. */ + if (text_pos != state->text_length && text_pos != + state->final_line_sep) + return FALSE; + break; + case RE_OP_END_OF_WORD: /* At end of a word. */ + if (state->encoding->at_word_end(state, text_pos) != test->match) + return FALSE; + break; + case RE_OP_GRAPHEME_BOUNDARY: /* At a grapheme boundary. */ + if (state->encoding->at_grapheme_boundary(state, text_pos) != + test->match) + return FALSE; + break; + case RE_OP_PROPERTY: /* A character property. */ + /* values are: property */ + if (text_pos >= state->slice_end || + state->encoding->has_property(test->values[0], char_at(text, + text_pos)) != test->match) + return FALSE; + break; + case RE_OP_PROPERTY_IGN: /* A character property, ignoring case. */ + /* values are: property */ + if (text_pos >= state->slice_end || !has_property_ign(state->encoding, + test->values[0], char_at(text, text_pos)) != test->match) + return FALSE; + break; + case RE_OP_PROPERTY_IGN_REV: /* A character property, ignoring case. */ + /* values are: property */ + if (text_pos <= state->slice_start || + !has_property_ign(state->encoding, test->values[0], char_at(text, + text_pos - 1)) != test->match) + return FALSE; + break; + case RE_OP_PROPERTY_REV: /* A character property. */ + /* values are: property */ + if (text_pos <= state->slice_start || + state->encoding->has_property(test->values[0], char_at(text, text_pos + - 1)) != test->match) + return FALSE; + break; + case RE_OP_RANGE: /* A range. */ + /* values are: range */ + if (text_pos >= state->slice_end || in_range(test->values[0], + test->values[1], char_at(text, text_pos)) != test->match) + return FALSE; + break; + case RE_OP_RANGE_IGN: /* A range, ignoring case. */ + /* values are: range */ + if (text_pos >= state->slice_end || in_range_ign(state->encoding, + test->values[0], test->values[1], char_at(text, text_pos)) != + test->match) + return FALSE; + break; + case RE_OP_RANGE_IGN_REV: /* A range, ignoring case. */ + /* values are: range */ + if (text_pos <= state->slice_start || in_range_ign(state->encoding, + test->values[0], test->values[1], char_at(text, text_pos - 1)) != + test->match) + return FALSE; + break; + case RE_OP_RANGE_REV: /* A range. */ + /* values are: range */ + if (text_pos <= state->slice_start || in_range(test->values[0], + test->values[1], char_at(text, text_pos - 1)) != test->match) + return FALSE; + break; + case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */ + if (text_pos != state->search_anchor) + return FALSE; + break; + case RE_OP_SET_DIFF: /* Character set. */ + case RE_OP_SET_INTER: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_UNION: + if (text_pos >= state->slice_end || in_set(state->encoding, test, + char_at(text, text_pos)) != test->match) + return FALSE; + break; + case RE_OP_SET_DIFF_IGN: /* Character set, ignoring case. */ + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_UNION_IGN: + if (text_pos >= state->slice_end || in_set_ign(state->encoding, test, + char_at(text, text_pos)) != test->match) + return FALSE; + break; + case RE_OP_SET_DIFF_IGN_REV: /* Character set, ignoring case. */ + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_UNION_IGN_REV: + if (text_pos <= state->slice_start || in_set_ign(state->encoding, test, + char_at(text, text_pos - 1)) != test->match) + return FALSE; + break; + case RE_OP_SET_DIFF_REV: /* Character set. */ + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION_REV: + if (text_pos <= state->slice_start || in_set(state->encoding, test, + char_at(text, text_pos - 1)) != test->match) + return FALSE; + break; + case RE_OP_START_OF_LINE: /* At the start of a line. */ + if (text_pos != 0 && char_at(text, text_pos - 1) != '\n') + return FALSE; + break; + case RE_OP_START_OF_LINE_U: /* At the start of a line. */ + if (!state->encoding->at_line_start(state, text_pos)) + return FALSE; + break; + case RE_OP_START_OF_STRING: /* At the start of the string. */ + if (text_pos != 0) + return FALSE; + break; + case RE_OP_START_OF_WORD: /* At start of a word. */ + if (state->encoding->at_word_start(state, text_pos) != test->match) + return FALSE; + break; + case RE_OP_STRING: /* A string literal. */ + { + size_t length; + size_t available; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_CODE* values; + size_t i; + + length = test->value_count; + available = state->slice_end - text_pos; + if (length > available) + return FALSE; + + char_at = state->char_at; + values = test->values; + + for (i = 0; i < length; i++) { + if (char_at(text, text_pos + i) != values[i]) + return FALSE; + } + break; + } + case RE_OP_STRING_FLD: /* A string literal, ignoring case. */ + { + size_t length; + Py_ssize_t available; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_EncodingTable* encoding; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + Py_ssize_t pos; + size_t string_pos; + RE_CODE* values; + int folded_len; + int folded_pos; + Py_UCS4 folded[RE_MAX_FOLDED]; + + length = test->value_count; + available = state->slice_end - text_pos; + if ((Py_ssize_t)possible_unfolded_length(length) > available) + return FALSE; + + char_at = state->char_at; + encoding = state->encoding; + full_case_fold = encoding->full_case_fold; + pos = text_pos; + string_pos = 0; + values = test->values; + folded_len = 0; + folded_pos = 0; + + while (string_pos < length) { + if (folded_pos >= folded_len) { + if (pos >= state->slice_end) + return FALSE; + + folded_len = full_case_fold(char_at(text, pos), folded); + folded_pos = 0; + } + + if (!same_char_ign(encoding, folded[folded_pos], + values[string_pos])) + return FALSE; + + ++string_pos; + ++folded_pos; + + if (folded_pos >= folded_len) + ++pos; + } + + if (folded_pos < folded_len) + return FALSE; + + next_position->node = next->match_next; + if (next->match_step == 0) + next_position->text_pos = text_pos; + else + next_position->text_pos = pos; + + return TRUE; + } + case RE_OP_STRING_FLD_REV: /* A string literal, ignoring case. */ + { + size_t length; + Py_ssize_t available; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_EncodingTable* encoding; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + Py_ssize_t pos; + size_t string_pos; + RE_CODE* values; + int folded_len; + int folded_pos; + Py_UCS4 folded[RE_MAX_FOLDED]; + + length = test->value_count; + available = text_pos - state->slice_start; + if ((Py_ssize_t)possible_unfolded_length(length) > available) + return FALSE; + + char_at = state->char_at; + encoding = state->encoding; + full_case_fold = encoding->full_case_fold; + pos = text_pos; + string_pos = length; + values = test->values; + folded_len = 0; + folded_pos = folded_len; + + while (string_pos > 0) { + if (folded_pos <= 0) { + if (pos <= state->slice_start) + return FALSE; + + folded_len = full_case_fold(char_at(text, pos - 1), folded); + folded_pos = folded_len; + } + + if (!same_char_ign(encoding, folded[folded_pos - 1], + values[string_pos - 1])) + return FALSE; + + --string_pos; + --folded_pos; + + if (folded_pos <= 0) + --pos; + } + + if (folded_pos > 0) + return FALSE; + + next_position->node = next->match_next; + if (next->match_step == 0) + next_position->text_pos = text_pos; + else + next_position->text_pos = pos; + + return TRUE; + } + case RE_OP_STRING_IGN: /* A string literal, ignoring case. */ + { + size_t length; + size_t available; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_EncodingTable* encoding; + RE_CODE* values; + size_t i; + + length = test->value_count; + available = state->slice_end - text_pos; + if (length > available) + return FALSE; + + char_at = state->char_at; + encoding = state->encoding; + values = test->values; + + for (i = 0; i < length; i++) { + if (!same_char_ign(encoding, char_at(text, text_pos + i), + values[i])) + return FALSE; + } + break; + } + case RE_OP_STRING_IGN_REV: /* A string literal, ignoring case. */ + { + size_t length; + size_t available; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_EncodingTable* encoding; + RE_CODE* values; + size_t i; + + length = test->value_count; + available = text_pos - state->slice_start; + if (length > available) + return FALSE; + + char_at = state->char_at; + encoding = state->encoding; + values = test->values; + text_pos -= length; + + for (i = 0; i < length; i++) { + if (!same_char_ign(encoding, char_at(text, text_pos + i), + values[i])) + return FALSE; + } + + text_pos += length; + break; + } + case RE_OP_STRING_REV: /* A string literal. */ + { + size_t length; + size_t available; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_CODE* values; + size_t i; + + length = test->value_count; + available = text_pos - state->slice_start; + if (length > available) + return FALSE; + + char_at = state->char_at; + values = test->values; + text_pos -= length; + + for (i = 0; i < length; i++) { + if (char_at(text, text_pos + i) != values[i]) + return FALSE; + } + + text_pos += length; + break; + } + default: + next_position->node = next->node; + next_position->text_pos = text_pos; + return TRUE; + } + + next_position->node = next->match_next; + next_position->text_pos = text_pos + next->match_step; + + return TRUE; +} + +Py_LOCAL_INLINE(BOOL) search_start(RE_SafeState* safe_state, RE_NextNode* next, + RE_Position* new_position, int search_index); + +/* Searches for the start of a match. */ +Py_LOCAL_INLINE(BOOL) search_start(RE_SafeState* safe_state, RE_NextNode* next, + RE_Position* new_position, int search_index) { + RE_State* state; + Py_ssize_t text_pos; + RE_Node* test; + RE_Node* node; + Py_ssize_t start_pos; + Py_ssize_t step; + Py_ssize_t limit; + RE_SearchPosition* info; + + state = safe_state->re_state; + + start_pos = state->text_pos; + TRACE(("<> at %d\n", start_pos)) + + test = next->test; + node = next->node; + + if (state->reverse) { + if (start_pos < state->slice_start) + return FALSE; + + limit = state->slice_start; + step = -1; + } else { + if (start_pos > state->slice_end) + return FALSE; + + limit = state->slice_end; + step = 1; + } + + if (test->status & RE_STATUS_FUZZY) { + /* Don't call 'search_start' again. */ + state->pattern->do_search_start = FALSE; + + state->match_pos = start_pos; + new_position->node = node; + new_position->text_pos = start_pos; + + return TRUE; + } + +again: + if (!state->pattern->is_fuzzy) { + if (state->reverse) { + if (start_pos - (Py_ssize_t)state->min_width < limit) + return FALSE; + } else { + if (start_pos + (Py_ssize_t)state->min_width > limit) + return FALSE; + } + } + + if (search_index < MAX_SEARCH_POSITIONS) { + info = &state->search_positions[search_index]; + if (state->reverse) { + if (info->start_pos >= 0 && info->start_pos >= start_pos && + start_pos >= info->match_pos) { + + state->match_pos = info->match_pos; + + new_position->text_pos = state->match_pos; + new_position->node = node; + + return TRUE; + } + } else { + if (info->start_pos >= 0 && info->start_pos <= start_pos && + start_pos <= info->match_pos) { + + state->match_pos = info->match_pos; + + new_position->text_pos = state->match_pos; + new_position->node = node; + + return TRUE; + } + } + } else + info = NULL; + + switch (test->op) { + case RE_OP_ANY: /* Any character, except a newline. */ + start_pos = match_many_ANY(state, start_pos, limit, FALSE); + if (start_pos >= limit) + return FALSE; + break; + case RE_OP_ANY_ALL: /* Any character at all. */ + break; + case RE_OP_ANY_ALL_REV: /* Any character at all backwards. */ + break; + case RE_OP_ANY_REV: /* Any character backwards, except a newline. */ + start_pos = match_many_ANY_REV(state, start_pos, limit, FALSE); + if (start_pos <= limit) + return FALSE; + break; + case RE_OP_ANY_U: /* Any character, except a line separator. */ + start_pos = match_many_ANY_U(state, start_pos, limit, FALSE); + if (start_pos >= limit) + return FALSE; + break; + case RE_OP_ANY_U_REV: /* Any character backwards, except a line separator. */ + start_pos = match_many_ANY_U_REV(state, start_pos, limit, FALSE); + if (start_pos <= limit) + return FALSE; + break; + case RE_OP_BOUNDARY: /* At a word boundary. */ + { + BOOL match; + Py_ssize_t step; + BOOL (*at_boundary)(RE_State* state, Py_ssize_t start_pos); + + match = test->match; + step = state->reverse ? -1 : 1; + at_boundary = state->encoding->at_boundary; + + for (;;) { + if (at_boundary(state, start_pos) == match) + break; + if (start_pos == limit) + return FALSE; + start_pos += step; + } + break; + } + case RE_OP_CHARACTER: /* A character literal. */ + start_pos = match_many_CHARACTER(state, test, start_pos, limit, FALSE); + if (start_pos >= limit) + return FALSE; + break; + case RE_OP_CHARACTER_IGN: /* A character literal, ignoring case. */ + start_pos = match_many_CHARACTER_IGN(state, test, start_pos, limit, + FALSE); + if (start_pos >= limit) + return FALSE; + break; + case RE_OP_CHARACTER_IGN_REV: /* A character literal backwards, ignoring case. */ + start_pos = match_many_CHARACTER_IGN_REV(state, test, start_pos, limit, + FALSE); + if (start_pos <= limit) + return FALSE; + break; + case RE_OP_CHARACTER_REV: /* A character literal backwards. */ + start_pos = match_many_CHARACTER_REV(state, test, start_pos, limit, + FALSE); + if (start_pos <= limit) + return FALSE; + break; + case RE_OP_DEFAULT_BOUNDARY: /* At a default word boundary. */ + { + BOOL match; + Py_ssize_t step; + BOOL (*at_default_boundary)(RE_State* state, Py_ssize_t start_pos); + + match = test->match; + step = state->reverse ? -1 : 1; + at_default_boundary = state->encoding->at_default_boundary; + + for (;;) { + if (at_default_boundary(state, start_pos) == match) + break; + if (start_pos == limit) + return FALSE; + start_pos += step; + } + break; + } + case RE_OP_DEFAULT_END_OF_WORD: /* At a default end of a word. */ + { + BOOL match; + Py_ssize_t step; + BOOL (*at_default_word_end)(RE_State* state, Py_ssize_t start_pos); + + match = test->match; + step = state->reverse ? -1 : 1; + at_default_word_end = state->encoding->at_default_word_end; + + for (;;) { + if (at_default_word_end(state, start_pos) == match) + break; + if (start_pos == limit) + return FALSE; + start_pos += step; + } + break; + } + case RE_OP_DEFAULT_START_OF_WORD: /* At a default start of a word. */ + { + BOOL match; + Py_ssize_t step; + BOOL (*at_default_word_start)(RE_State* state, Py_ssize_t start_pos); + + match = test->match; + step = state->reverse ? -1 : 1; + at_default_word_start = state->encoding->at_default_word_start; + + for (;;) { + if (at_default_word_start(state, start_pos) == match) + break; + if (start_pos == limit) + return FALSE; + start_pos += step; + } + break; + } + case RE_OP_END_OF_LINE: /* At the end of a line. */ + { + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void* text; + Py_ssize_t step; + + char_at = state->char_at; + text = state->text; + text_pos = start_pos; + step = state->reverse ? -1 : 1; + + for (;;) { + if (text_pos == state->text_length || char_at(text, text_pos) == + '\n') + break; + if (text_pos == limit) + return FALSE; + text_pos += step; + } + + start_pos = text_pos; + break; + } + case RE_OP_END_OF_STRING: /* At the end of the string. */ + if (state->reverse) { + if (start_pos != state->text_length) + return FALSE; + } else { + if (state->slice_end != state->text_length) + return FALSE; + } + + start_pos = state->text_length; + break; + case RE_OP_END_OF_STRING_LINE: /* At end of string or final newline. */ + if (state->reverse) { + if (start_pos >= state->text_length) + start_pos = state->text_length; + else if (start_pos >= state->final_newline) + start_pos = state->final_newline; + else + return FALSE; + + if (start_pos < state->slice_start) + return FALSE; + } else { + if (start_pos <= state->final_newline) + start_pos = state->final_newline; + else if (start_pos <= state->text_length) + start_pos = state->text_length; + else + return FALSE; + + if (start_pos > state->slice_end) + return FALSE; + } + break; + case RE_OP_END_OF_WORD: /* At end of a word. */ + { + BOOL match; + Py_ssize_t step; + BOOL (*at_word_end)(RE_State* state, Py_ssize_t start_pos); + + match = test->match; + step = state->reverse ? -1 : 1; + at_word_end = state->encoding->at_word_end; + + for (;;) { + if (at_word_end(state, start_pos) == match) + break; + if (start_pos == limit) + return FALSE; + start_pos += step; + } + break; + } + case RE_OP_GRAPHEME_BOUNDARY: /* At a grapheme boundary. */ + { + BOOL match; + Py_ssize_t step; + BOOL (*at_boundary)(RE_State* state, Py_ssize_t start_pos); + + match = test->match; + step = state->reverse ? -1 : 1; + at_boundary = state->encoding->at_grapheme_boundary; + + for (;;) { + if (at_boundary(state, start_pos) == match) + break; + if (start_pos == limit) + return FALSE; + start_pos += step; + } + break; + } + case RE_OP_PROPERTY: /* A character property. */ + start_pos = match_many_PROPERTY(state, test, start_pos, limit, FALSE); + if (start_pos >= limit) + return FALSE; + break; + case RE_OP_PROPERTY_IGN: /* A character property, ignoring case. */ + start_pos = match_many_PROPERTY_IGN(state, test, start_pos, limit, + FALSE); + if (start_pos >= limit) + return FALSE; + break; + case RE_OP_PROPERTY_IGN_REV: /* A character property backwards, ignoring case. */ + start_pos = match_many_PROPERTY_IGN_REV(state, test, start_pos, limit, + FALSE); + if (start_pos <= limit) + return FALSE; + break; + case RE_OP_PROPERTY_REV: /* A character property backwards. */ + start_pos = match_many_PROPERTY_REV(state, test, start_pos, limit, + FALSE); + if (start_pos <= limit) + return FALSE; + break; + case RE_OP_RANGE: /* A range. */ + start_pos = match_many_RANGE(state, test, start_pos, limit, FALSE); + if (start_pos >= limit) + return FALSE; + break; + case RE_OP_RANGE_IGN: /* A range, ignoring case. */ + start_pos = match_many_RANGE_IGN(state, test, start_pos, limit, FALSE); + if (start_pos >= limit) + return FALSE; + break; + case RE_OP_RANGE_IGN_REV: /* A range backwards, ignoring case. */ + start_pos = match_many_RANGE_IGN_REV(state, test, start_pos, limit, + FALSE); + if (start_pos <= limit) + return FALSE; + break; + case RE_OP_RANGE_REV: /* A range backwards. */ + start_pos = match_many_RANGE_REV(state, test, start_pos, limit, FALSE); + if (start_pos <= limit) + return FALSE; + break; + case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */ + if (state->reverse) { + if (start_pos < state->search_anchor) + return FALSE; + } else { + if (start_pos > state->search_anchor) + return FALSE; + } + + start_pos = state->search_anchor; + break; + case RE_OP_SET_DIFF: /* A set. */ + case RE_OP_SET_INTER: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_UNION: + start_pos = match_many_SET(state, test, start_pos, limit, FALSE); + if (start_pos >= limit) + return FALSE; + break; + case RE_OP_SET_DIFF_IGN: /* A set, ignoring case. */ + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_UNION_IGN: + start_pos = match_many_SET_IGN(state, test, start_pos, limit, FALSE); + if (start_pos >= limit) + return FALSE; + break; + case RE_OP_SET_DIFF_IGN_REV: /* A set backwards, ignoring case. */ + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_UNION_IGN_REV: + start_pos = match_many_SET_IGN_REV(state, test, start_pos, limit, + FALSE); + if (start_pos <= limit) + return FALSE; + break; + case RE_OP_SET_DIFF_REV: /* A set backwards. */ + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION_REV: + start_pos = match_many_SET_REV(state, test, start_pos, limit, FALSE); + if (start_pos <= limit) + return FALSE; + break; + case RE_OP_START_OF_LINE: /* At the start of a line. */ + { + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void* text; + Py_ssize_t step; + Py_ssize_t text_pos; + + char_at = state->char_at; + text = state->text; + step = state->reverse ? -1 : 1; + text_pos = start_pos - 1; + + --limit; + + for (;;) { + if (text_pos < 0 || char_at(text, text_pos) == '\n') + break; + if (text_pos == limit) + return FALSE; + text_pos += step; + } + + ++text_pos; + ++limit; + + start_pos = text_pos; + break; + } + case RE_OP_START_OF_STRING: /* At the start of the string. */ + if (state->reverse) { + if (state->slice_start != 0) + return FALSE; + } else { + if (start_pos != 0) + return FALSE; + } + + start_pos = 0; + break; + case RE_OP_START_OF_WORD: /* At start of a word. */ + { + BOOL match; + Py_ssize_t step; + BOOL (*at_word_start)(RE_State* state, Py_ssize_t start_pos); + + match = test->match; + step = state->reverse ? -1 : 1; + at_word_start = state->encoding->at_word_start; + + for (;;) { + if (at_word_start(state, start_pos) == match) + break; + if (start_pos == limit) + return FALSE; + start_pos += step; + } + break; + } + case RE_OP_STRING: /* A string literal. */ + if (!(test->status & RE_STATUS_REQUIRED) || start_pos != + state->req_pos) { + start_pos = string_search(safe_state, test, start_pos, limit); + if (start_pos < 0) + return FALSE; + } + break; + case RE_OP_STRING_FLD: /* A string literal, ignoring case. */ + { + Py_ssize_t new_pos; + + if ((test->status & RE_STATUS_REQUIRED) && start_pos == state->req_pos) + new_pos = state->req_end; + else + start_pos = string_search_fld(safe_state, test, start_pos, + state->slice_end, &new_pos); + if (start_pos < 0) + return FALSE; + + /* Can we look further ahead? */ + if (test == node) { + if (test->next_1.node && !try_match(state, &test->next_1, new_pos, + new_position)) { + ++start_pos; + + if (state->reverse) { + if (start_pos < state->slice_start) + return FALSE; + } else { + if (start_pos > state->slice_end) + return FALSE; + } + + goto again; + } + + /* It's a possible match. */ + state->match_pos = start_pos; + + return TRUE; + } + break; + } + case RE_OP_STRING_FLD_REV: /* A string literal backwards, ignoring case. */ + { + Py_ssize_t new_pos; + + if ((test->status & RE_STATUS_REQUIRED) && start_pos == state->req_pos) + new_pos = state->req_end; + else + start_pos = string_search_fld_rev(safe_state, test, start_pos, + state->slice_start, &new_pos); + if (start_pos < 0) + return FALSE; + + /* Can we look further ahead? */ + if (test == node) { + if (test->next_1.node && !try_match(state, &test->next_1, new_pos, + new_position)) { + --start_pos; + + if (state->reverse) { + if (start_pos < state->slice_start) + return FALSE; + } else { + if (start_pos > state->slice_end) + return FALSE; + } + + goto again; + } + + /* It's a possible match. */ + state->match_pos = start_pos; + + return TRUE; + } + break; + } + case RE_OP_STRING_IGN: /* A string literal, ignoring case. */ + if (!(test->status & RE_STATUS_REQUIRED) || start_pos != + state->req_pos) { + start_pos = string_search_ign(safe_state, test, start_pos, limit); + if (start_pos < 0) + return FALSE; + } + break; + case RE_OP_STRING_IGN_REV: /* A string literal backwards, ignoring case. */ + if (!(test->status & RE_STATUS_REQUIRED) || start_pos != + state->req_pos) { + start_pos = string_search_ign_rev(safe_state, test, start_pos, + limit); + if (start_pos < 0) + return FALSE; + } + break; + case RE_OP_STRING_REV: /* A string literal backwards. */ + if (!(test->status & RE_STATUS_REQUIRED) || start_pos != + state->req_pos) { + start_pos = string_search_rev(safe_state, test, start_pos, limit); + if (start_pos < 0) + return FALSE; + } + break; + default: + /* Don't call 'search_start' again. */ + state->pattern->do_search_start = FALSE; + + state->match_pos = start_pos; + new_position->node = node; + new_position->text_pos = start_pos; + return TRUE; + } + + text_pos = start_pos; + + /* Can we look further ahead? */ + if (test == node) { + text_pos += test->step; + + if (test->next_1.node && !try_match(state, &test->next_1, text_pos, + new_position)) { + start_pos += step; + + if (state->reverse) { + if (start_pos < state->slice_start) + return FALSE; + } else { + if (start_pos > state->slice_end) + return FALSE; + } + + goto again; + } + } else { + new_position->node = node; + new_position->text_pos = start_pos; + } + + /* It's a possible match. */ + state->match_pos = start_pos; + + if (info) { + info->start_pos = state->text_pos; + info->match_pos = state->match_pos; + } + + return TRUE; +} + +/* Saves a capture group. */ +Py_LOCAL_INLINE(BOOL) save_capture(RE_SafeState* safe_state, size_t + private_index, size_t public_index) { + RE_State* state; + RE_GroupData* private_group; + RE_GroupData* public_group; + + state = safe_state->re_state; + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + private_group = &state->groups[private_index - 1]; + public_group = &state->groups[public_index - 1]; + + /* Will the repeated captures ever be visible? */ + if (!state->visible_captures) { + public_group->captures[0] = private_group->span; + public_group->capture_count = 1; + + return TRUE; + } + + if (public_group->capture_count >= public_group->capture_capacity) { + size_t new_capacity; + RE_GroupSpan* new_captures; + + new_capacity = public_group->capture_capacity * 2; + new_capacity = RE_MAX(new_capacity, RE_INIT_CAPTURE_SIZE); + new_captures = (RE_GroupSpan*)safe_realloc(safe_state, + public_group->captures,new_capacity * sizeof(RE_GroupSpan)); + if (!new_captures) + return FALSE; + + public_group->captures = new_captures; + public_group->capture_capacity = new_capacity; + } + + public_group->captures[public_group->capture_count++] = + private_group->span; + + return TRUE; +} + +/* Unsaves a capture group. */ +Py_LOCAL_INLINE(void) unsave_capture(RE_State* state, size_t private_index, + size_t public_index) { + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + if (state->groups[public_index - 1].capture_count > 0) + --state->groups[public_index - 1].capture_count; +} + +/* Pushes the groups for backtracking. */ +Py_LOCAL_INLINE(BOOL) push_groups(RE_SafeState* safe_state) { + RE_State* state; + size_t group_count; + RE_SavedGroups* current; + size_t g; + + state = safe_state->re_state; + + group_count = state->pattern->true_group_count; + if (group_count == 0) + return TRUE; + + current = state->current_saved_groups; + + if (current && current->next) + current = current->next; + else if (!current && state->first_saved_groups) + current = state->first_saved_groups; + else { + RE_SavedGroups* new_block; + + new_block = (RE_SavedGroups*)safe_alloc(safe_state, + sizeof(RE_SavedGroups)); + if (!new_block) + return FALSE; + + new_block->spans = (RE_GroupSpan*)safe_alloc(safe_state, group_count * + sizeof(RE_GroupSpan)); + new_block->counts = (size_t*)safe_alloc(safe_state, group_count * + sizeof(size_t)); + if (!new_block->spans || !new_block->counts) { + safe_dealloc(safe_state, new_block->spans); + safe_dealloc(safe_state, new_block->counts); + safe_dealloc(safe_state, new_block); + return FALSE; + } + + new_block->previous = current; + new_block->next = NULL; + + if (new_block->previous) + new_block->previous->next = new_block; + else + state->first_saved_groups = new_block; + + current = new_block; + } + + for (g = 0; g < group_count; g++) { + current->spans[g] = state->groups[g].span; + current->counts[g] = state->groups[g].capture_count; + } + + state->current_saved_groups = current; + + return TRUE; +} + +/* Pops the groups for backtracking. */ +Py_LOCAL_INLINE(void) pop_groups(RE_State* state) { + size_t group_count; + RE_SavedGroups* current; + size_t g; + + group_count = state->pattern->true_group_count; + if (group_count == 0) + return; + + current = state->current_saved_groups; + + for (g = 0; g < group_count; g++) { + state->groups[g].span = current->spans[g]; + state->groups[g].capture_count = current->counts[g]; + } + + state->current_saved_groups = current->previous; +} + +/* Drops the groups for backtracking. */ +Py_LOCAL_INLINE(void) drop_groups(RE_State* state) { + if (state->pattern->true_group_count != 0) + state->current_saved_groups = state->current_saved_groups->previous; +} + +/* Pushes the repeats for backtracking. */ +Py_LOCAL_INLINE(BOOL) push_repeats(RE_SafeState* safe_state) { + RE_State* state; + PatternObject* pattern; + Py_ssize_t repeat_count; + RE_SavedRepeats* current; + Py_ssize_t r; + + state = safe_state->re_state; + pattern = state->pattern; + + repeat_count = pattern->repeat_count; + if (repeat_count == 0) + return TRUE; + + current = state->current_saved_repeats; + + if (current && current->next) + current = current->next; + else if (!current && state->first_saved_repeats) + current = state->first_saved_repeats; + else { + RE_SavedRepeats* new_block; + + new_block = (RE_SavedRepeats*)safe_alloc(safe_state, + sizeof(RE_SavedRepeats)); + if (!new_block) + return FALSE; + + memset(new_block, 0, sizeof(RE_SavedRepeats)); + + new_block->repeats = (RE_RepeatData*)safe_alloc(safe_state, + repeat_count * sizeof(RE_RepeatData)); + if (!new_block->repeats) { + safe_dealloc(safe_state, new_block); + return FALSE; + } + + memset(new_block->repeats, 0, repeat_count * sizeof(RE_RepeatData)); + + new_block->previous = current; + new_block->next = NULL; + + if (new_block->previous) + new_block->previous->next = new_block; + else + state->first_saved_repeats = new_block; + + current = new_block; + } + + for (r = 0; r < repeat_count; r++) { + if (!copy_repeat_data(safe_state, ¤t->repeats[r], + &state->repeats[r])) + return FALSE; + } + + state->current_saved_repeats = current; + + return TRUE; +} + +/* Pops the repeats for backtracking. */ +Py_LOCAL_INLINE(void) pop_repeats(RE_State* state) { + PatternObject* pattern; + Py_ssize_t repeat_count; + RE_SavedRepeats* current; + Py_ssize_t r; + + pattern = state->pattern; + + repeat_count = pattern->repeat_count; + if (repeat_count == 0) + return; + + current = state->current_saved_repeats; + + for (r = 0; r < repeat_count; r++) + copy_repeat_data(NULL, &state->repeats[r], ¤t->repeats[r]); + + state->current_saved_repeats = current->previous; +} + +/* Saves state info before a recusive call by 'basic_match'. */ +Py_LOCAL_INLINE(void) save_info(RE_State* state, RE_Info* info) { + info->current_backtrack_block = state->current_backtrack_block; + info->backtrack_count = info->current_backtrack_block->count; + info->current_saved_groups = state->current_saved_groups; + info->must_advance = state->must_advance; + info->current_group_call_frame = state->current_group_call_frame; +} + +/* Restores state info after a recusive call by 'basic_match'. */ +Py_LOCAL_INLINE(void) restore_info(RE_State* state, RE_Info* info) { + state->current_group_call_frame = info->current_group_call_frame; + state->must_advance = info->must_advance; + state->current_saved_groups = info->current_saved_groups; + info->current_backtrack_block->count = info->backtrack_count; + state->current_backtrack_block = info->current_backtrack_block; +} + +/* Inserts a new span in a guard list. */ +Py_LOCAL_INLINE(BOOL) insert_guard_span(RE_SafeState* safe_state, RE_GuardList* + guard_list, Py_ssize_t index) { + size_t n; + + if (guard_list->count >= guard_list->capacity) { + size_t new_capacity; + RE_GuardSpan* new_spans; + + new_capacity = guard_list->capacity * 2; + if (new_capacity == 0) + new_capacity = RE_INIT_GUARDS_BLOCK_SIZE; + new_spans = (RE_GuardSpan*)safe_realloc(safe_state, guard_list->spans, + new_capacity * sizeof(RE_GuardSpan)); + if (!new_spans) + return FALSE; + + guard_list->capacity = new_capacity; + guard_list->spans = new_spans; + } + + n = guard_list->count - index; + if (n > 0) + memmove(guard_list->spans + index + 1, guard_list->spans + index, n * + sizeof(RE_GuardSpan)); + ++guard_list->count; + + return TRUE; +} + +/* Deletes a span in a guard list. */ +Py_LOCAL_INLINE(void) delete_guard_span(RE_GuardList* guard_list, Py_ssize_t + index) { + size_t n; + + n = guard_list->count - index - 1; + if (n > 0) + memmove(guard_list->spans + index, guard_list->spans + index + 1, n * + sizeof(RE_GuardSpan)); + --guard_list->count; +} + +/* Checks whether a position is guarded against further matching. */ +Py_LOCAL_INLINE(BOOL) is_guarded(RE_GuardList* guard_list, Py_ssize_t text_pos) + { + size_t low; + size_t high; + + /* Is this position in the guard list? */ + low = 0; + high = guard_list->count; + while (low < high) { + size_t mid; + RE_GuardSpan* span; + + mid = (low + high) / 2; + span = &guard_list->spans[mid]; + if (text_pos < span->low) + high = mid; + else if (text_pos > span->high) + low = mid + 1; + else + return span->protect; + } + + guard_list->last_text_pos = text_pos; + guard_list->last_low = low; + + return FALSE; +} + +/* Guards a position against further matching. */ +Py_LOCAL_INLINE(BOOL) guard(RE_SafeState* safe_state, RE_GuardList* guard_list, + Py_ssize_t text_pos, BOOL protect) { + size_t low; + size_t high; + + /* Where should be new position be added? */ + if (text_pos == guard_list->last_text_pos) + low = guard_list->last_low; + else { + low = 0; + high = guard_list->count; + while (low < high) { + size_t mid; + RE_GuardSpan* span; + + mid = (low + high) / 2; + span = &guard_list->spans[mid]; + if (text_pos < span->low) + high = mid; + else if (text_pos > span->high) + low = mid + 1; + else + return TRUE; + } + } + + /* Add the position to the guard list. */ + if (low > 0 && guard_list->spans[low - 1].high + 1 == text_pos && + guard_list->spans[low - 1].protect == protect) { + /* The new position is just above this span. */ + if (low < guard_list->count && guard_list->spans[low].low - 1 == + text_pos && guard_list->spans[low].protect == protect) { + /* The new position joins 2 spans */ + guard_list->spans[low - 1].high = guard_list->spans[low].high; + delete_guard_span(guard_list, low); + } else + /* Extend the span. */ + guard_list->spans[low - 1].high = text_pos; + } else if (low < guard_list->count && guard_list->spans[low].low - 1 == + text_pos && guard_list->spans[low].protect == protect) + /* The new position is just below this span. */ + /* Extend the span. */ + guard_list->spans[low].low = text_pos; + else { + /* Insert a new span. */ + if (!insert_guard_span(safe_state, guard_list, low)) + return FALSE; + guard_list->spans[low].low = text_pos; + guard_list->spans[low].high = text_pos; + guard_list->spans[low].protect = protect; + } + + guard_list->last_text_pos = -1; + + return TRUE; +} + +/* Guards a position against further matching for a repeat. */ +Py_LOCAL_INLINE(BOOL) guard_repeat(RE_SafeState* safe_state, size_t index, + Py_ssize_t text_pos, RE_STATUS_T guard_type, BOOL protect) { + RE_State* state; + RE_GuardList* guard_list; + + state = safe_state->re_state; + + /* Is a guard active here? */ + if (!(state->pattern->repeat_info[index].status & guard_type)) + return TRUE; + + /* Which guard list? */ + if (guard_type & RE_STATUS_BODY) + guard_list = &state->repeats[index].body_guard_list; + else + guard_list = &state->repeats[index].tail_guard_list; + + return guard(safe_state, guard_list, text_pos, protect); +} + +/* Checks whether a position is guarded against further matching for a repeat. + */ +Py_LOCAL_INLINE(BOOL) is_repeat_guarded(RE_SafeState* safe_state, size_t index, + Py_ssize_t text_pos, RE_STATUS_T guard_type) { + RE_State* state; + RE_GuardList* guard_list; + + state = safe_state->re_state; + + /* Is a guard active here? */ + if (!(state->pattern->repeat_info[index].status & guard_type)) + return FALSE; + + /* Which guard list? */ + if (guard_type == RE_STATUS_BODY) + guard_list = &state->repeats[index].body_guard_list; + else + guard_list = &state->repeats[index].tail_guard_list; + + return is_guarded(guard_list, text_pos); +} + +/* Resets the guards inside atomic subpatterns and lookarounds. */ +Py_LOCAL_INLINE(void) reset_guards(RE_State* state, RE_CODE* values) { + PatternObject* pattern; + size_t repeat_count; + + pattern = state->pattern; + repeat_count = (size_t)pattern->repeat_count; + + if (values) { + size_t i; + + for (i = 1; i <= values[0]; i++) { + size_t index; + + index = values[i]; + + if (index < repeat_count) { + reset_guard_list(&state->repeats[index].body_guard_list); + reset_guard_list(&state->repeats[index].tail_guard_list); + } else { + index -= repeat_count; + + reset_guard_list(&state->fuzzy_guards[index].body_guard_list); + reset_guard_list(&state->fuzzy_guards[index].tail_guard_list); + } + } + } else { + size_t index; + size_t fuzzy_count; + + for (index = 0; index < repeat_count; index++) { + reset_guard_list(&state->repeats[index].body_guard_list); + reset_guard_list(&state->repeats[index].tail_guard_list); + } + + fuzzy_count = pattern->fuzzy_count; + + for (index = 0; index < fuzzy_count; index++) { + reset_guard_list(&state->fuzzy_guards[index].body_guard_list); + reset_guard_list(&state->fuzzy_guards[index].tail_guard_list); + } + } +} + +/* Builds a Unicode string. */ +Py_LOCAL_INLINE(PyObject*) build_unicode_value(void* buffer, Py_ssize_t len, + Py_ssize_t buffer_charsize) { + return PyUnicode_FromUnicode(buffer, len); +} + +/* Builds a bytestring. Returns NULL if any member is too wide. */ +Py_LOCAL_INLINE(PyObject*) build_bytes_value(void* buffer, Py_ssize_t len, + Py_ssize_t buffer_charsize) +{ + Py_UCS1* byte_buffer; + Py_ssize_t i; + PyObject* result; + + if (buffer_charsize == 1) + return Py_BuildValue("s#", buffer, len); + + byte_buffer = re_alloc(len); + if (!byte_buffer) + return NULL; + + for (i = 0; i < len; i++) { + Py_UCS2 c = ((Py_UCS2*)buffer)[i]; + if (c > 0xFF) + goto too_wide; + + byte_buffer[i] = (Py_UCS1)c; + } + + result = Py_BuildValue("s#", byte_buffer, len); + + re_dealloc(byte_buffer); + + return result; + +too_wide: + re_dealloc(byte_buffer); + + return NULL; +} + +/* Looks for a string in a string set, ignoring case. */ +Py_LOCAL_INLINE(int) string_set_contains_ign(RE_State* state, PyObject* + string_set, void* buffer, Py_ssize_t index, Py_ssize_t len, Py_ssize_t + buffer_charsize) { + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); + RE_EncodingTable* encoding; + BOOL (*possible_turkic)(Py_UCS4 ch); + Py_UCS4 codepoints[4]; + + switch (buffer_charsize) { + case 1: + char_at = bytes1_char_at; + set_char_at = bytes1_set_char_at; + break; + case 2: + char_at = bytes2_char_at; + set_char_at = bytes2_set_char_at; + break; + case 4: + char_at = bytes4_char_at; + set_char_at = bytes4_set_char_at; + break; + default: + char_at = bytes1_char_at; + set_char_at = bytes1_set_char_at; + break; + } + + encoding = state->encoding; + possible_turkic = encoding->possible_turkic; + + /* Look for a possible Turkic 'I'. */ + while (index < len && !possible_turkic(char_at(buffer, index))) + ++index; + + if (index < len) { + /* Possible Turkic 'I'. */ + int count; + int i; + + /* Try all the alternatives to the 'I'. */ + count = encoding->all_turkic_i(char_at(buffer, index), codepoints); + + for (i = 0; i < count; i++) { + int status; + + set_char_at(buffer, index, codepoints[i]); + + /* Recurse for the remainder of the string. */ + status = string_set_contains_ign(state, string_set, buffer, index + + 1, len, buffer_charsize); + if (status != 0) + return status; + } + + return 0; + } else { + /* No Turkic 'I'. */ + PyObject* string; + int status; + + if (state->is_unicode) + string = build_unicode_value(buffer, len, buffer_charsize); + else + string = build_bytes_value(buffer, len, buffer_charsize); + if (!string) + return RE_ERROR_MEMORY; + + status = PySet_Contains(string_set, string); + Py_DECREF(string); + + return status; + } +} + +/* Tries to match a string at the current position with a member of a string + * set, ignoring case, for a forwards or reverse search. + */ +Py_LOCAL_INLINE(int) string_set_match_ign_fwdrev(RE_SafeState* safe_state, + RE_Node* node, BOOL reverse) { + Py_ssize_t index; + Py_ssize_t min_len; + Py_ssize_t max_len; + RE_State* state; + Py_ssize_t available; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void* text; + Py_ssize_t text_pos; + RE_EncodingTable* encoding; + Py_UCS4 (*simple_case_fold)(Py_UCS4 ch); + Py_ssize_t folded_charsize; + void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); + void* folded; + PyObject* string_set; + int status; + Py_ssize_t len; + + index = node->values[0]; + min_len = (Py_ssize_t)node->values[1]; + max_len = (Py_ssize_t)node->values[2]; + + state = safe_state->re_state; + + available = reverse ? state->text_pos - state->slice_start : + state->slice_end - state->text_pos; + + if (min_len > available) + /* Too few characters for any match. */ + return 0; + + max_len = RE_MIN(max_len, available); + + char_at = state->char_at; + text = state->text; + text_pos = state->text_pos; + encoding = state->encoding; + simple_case_fold = encoding->simple_case_fold; + + acquire_GIL(safe_state); + + /* The folded string will have the same width as the original string. */ + folded_charsize = state->charsize; + + switch (folded_charsize) { + case 1: + set_char_at = bytes1_set_char_at; + break; + case 2: + set_char_at = bytes2_set_char_at; + break; + case 4: + set_char_at = bytes4_set_char_at; + break; + default: + return 0; + } + + /* Allocate a buffer for the folded string. */ + folded = re_alloc(max_len * folded_charsize); + if (!folded) + goto error; + + /* Fetch the string set. */ + string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, index); + if (!string_set) + goto error; + + status = 0; + + /* Attempt matches for a decreasing length. */ + for (len = max_len; status == 0 && len >= min_len; len--) { + Py_ssize_t offset; + Py_ssize_t inc_len; + int i; + + if (reverse) { + offset = -len; + inc_len = -len; + } else { + offset = 0; + inc_len = len; + } + + for (i = 0; i < len; i++) { + Py_UCS4 ch; + + ch = simple_case_fold(char_at(text, text_pos + offset + i)); + set_char_at(folded, i, ch); + } + + status = string_set_contains_ign(state, string_set, folded, 0, len, + folded_charsize); + + if (status == 1) + /* Advance past the match. */ + state->text_pos += inc_len; + } + + re_dealloc(folded); + + release_GIL(safe_state); + + return status; + +error: + re_dealloc(folded); + + release_GIL(safe_state); + + return RE_ERROR_INTERNAL; +} + +/* Tries to match a string at the current position with a member of a string + * set. + */ +Py_LOCAL_INLINE(int) string_set_match(RE_SafeState* safe_state, RE_Node* node) + { + Py_ssize_t index; + Py_ssize_t min_len; + Py_ssize_t max_len; + RE_State* state; + Py_ssize_t available; + void* (*point_to)(void* text, Py_ssize_t pos); + void* text; + Py_ssize_t text_pos; + PyObject* string_set; + int status; + Py_ssize_t len; + + index = node->values[0]; + min_len = (Py_ssize_t)node->values[1]; + max_len = (Py_ssize_t)node->values[2]; + + state = safe_state->re_state; + + available = state->slice_end - state->text_pos; + if (min_len > available) + /* Too few characters for any match. */ + return 0; + + max_len = RE_MIN(max_len, available); + + point_to = state->point_to; + text = state->text; + text_pos = state->text_pos; + + acquire_GIL(safe_state); + + /* Fetch the string set. */ + string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, index); + if (!string_set) + goto error; + + status = 0; + + /* Attempt matches for a decreasing length. */ + for (len = max_len; status == 0 && len >= min_len; len--) { + PyObject* string; + + if (state->is_unicode) + string = build_unicode_value(point_to(text, text_pos), len, + state->charsize); + else + string = build_bytes_value(point_to(text, text_pos), len, + state->charsize); + if (!string) + goto error; + + status = PySet_Contains(string_set, string); + Py_DECREF(string); + + if (status == 1) + /* Advance past the match. */ + state->text_pos += len; + } + + release_GIL(safe_state); + + return status; + +error: + release_GIL(safe_state); + + return RE_ERROR_INTERNAL; +} + +/* Tries to match a string at the current position with a member of a string + * set, ignoring case. + */ +Py_LOCAL_INLINE(int) string_set_match_fld(RE_SafeState* safe_state, RE_Node* + node) { + Py_ssize_t index; + Py_ssize_t min_len; + Py_ssize_t max_len; + RE_State* state; + Py_ssize_t available; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void* text; + Py_ssize_t text_pos; + RE_EncodingTable* encoding; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + Py_ssize_t buf_size; + Py_ssize_t folded_charsize; + void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); + void* folded; + PyObject* string_set; + int status; + Py_ssize_t end_fetch; + Py_ssize_t len; + Py_UCS4 codepoints[RE_MAX_FOLDED]; + + index = node->values[0]; + min_len = (Py_ssize_t)node->values[1]; + max_len = (Py_ssize_t)node->values[2]; + + state = safe_state->re_state; + + available = state->slice_end - state->text_pos; + if ((Py_ssize_t)possible_unfolded_length(min_len) > available) + /* Too few characters for any match. */ + return 0; + + char_at = state->char_at; + text = state->text; + text_pos = state->text_pos; + encoding = state->encoding; + full_case_fold = encoding->full_case_fold; + + /* The folded string will have the same width as the original string. */ + folded_charsize = state->charsize; + + switch (folded_charsize) { + case 1: + set_char_at = bytes1_set_char_at; + break; + case 2: + set_char_at = bytes2_set_char_at; + break; + case 4: + set_char_at = bytes4_set_char_at; + break; + default: + return 0; + } + + acquire_GIL(safe_state); + + /* Allocate a buffer for the folded string, plus a little extra. */ + buf_size = max_len + RE_MAX_FOLDED; + folded = re_alloc(buf_size * folded_charsize); + if (!folded) + goto error; + + /* Fetch the string set. */ + string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, index); + if (!string_set) + goto error; + + status = 0; + + /* Attempt matches for a decreasing length. */ + end_fetch = text_pos + max_len; + + for (len = max_len; status == 0 && len >= min_len; len--) { + Py_ssize_t pos; + int folded_len; + + pos = text_pos; + folded_len = 0; + + /* Fetch until we have enough characters. */ + while (pos < end_fetch && folded_len < len) { + int count; + int i; + + count = full_case_fold(char_at(text, pos), codepoints); + + for (i = 0; i < count; i++) { + Py_UCS4 ch; + + ch = codepoints[i]; + set_char_at(folded, folded_len + i, ch); + } + + folded_len += count; + + ++pos; + } + + /* Do we have an acceptable number? */ + if (min_len <= folded_len && folded_len <= len) { + status = string_set_contains_ign(state, string_set, folded, 0, + folded_len, folded_charsize); + + if (status == 1) + /* Advance past the match. */ + state->text_pos = pos; + } + + /* If we got fewer than expected, next time we want still fewer. */ + len = RE_MIN(len, folded_len); + + /* Fetch one fewer next time. */ + end_fetch = pos - 1; + } + + re_dealloc(folded); + + release_GIL(safe_state); + + return status; + +error: + re_dealloc(folded); + + release_GIL(safe_state); + + return RE_ERROR_INTERNAL; +} + +/* Tries to match a string at the current position with a member of a string + * set, ignoring case. + */ +Py_LOCAL_INLINE(int) string_set_match_fld_rev(RE_SafeState* safe_state, + RE_Node* node) { + Py_ssize_t index; + Py_ssize_t min_len; + Py_ssize_t max_len; + RE_State* state; + Py_ssize_t available; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void* text; + Py_ssize_t text_pos; + RE_EncodingTable* encoding; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + Py_ssize_t buf_size; + Py_ssize_t folded_charsize; + void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); + void* (*point_to)(void* text, Py_ssize_t pos); + void* folded; + PyObject* string_set; + int status; + Py_ssize_t end_fetch; + Py_ssize_t len; + Py_UCS4 codepoints[RE_MAX_FOLDED]; + + index = node->values[0]; + min_len = (Py_ssize_t)node->values[1]; + max_len = (Py_ssize_t)node->values[2]; + + state = safe_state->re_state; + + available = state->text_pos - state->slice_start; + if ((Py_ssize_t)possible_unfolded_length(min_len) > available) + /* Too few characters for any match. */ + return 0; + + char_at = state->char_at; + text = state->text; + text_pos = state->text_pos; + encoding = state->encoding; + full_case_fold = encoding->full_case_fold; + + /* The folded string will have the same width as the original string. */ + folded_charsize = state->charsize; + + switch (folded_charsize) { + case 1: + set_char_at = bytes1_set_char_at; + point_to = bytes1_point_to; + break; + case 2: + set_char_at = bytes2_set_char_at; + point_to = bytes2_point_to; + break; + case 4: + set_char_at = bytes4_set_char_at; + point_to = bytes4_point_to; + break; + default: + return 0; + } + + acquire_GIL(safe_state); + + /* Allocate a buffer for the folded string, plus a little extra. */ + buf_size = max_len + RE_MAX_FOLDED; + folded = re_alloc(buf_size * folded_charsize); + if (!folded) + goto error; + + /* Fetch the string set. */ + string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, index); + if (!string_set) + goto error; + + status = 0; + + /* Attempt matches for a decreasing length. */ + end_fetch = text_pos - max_len; + + for (len = max_len; status == 0 && len >= min_len; len--) { + Py_ssize_t pos; + int folded_len; + + pos = text_pos; + folded_len = 0; + + /* Fetch until we have enough characters. */ + while (pos > end_fetch && folded_len < len) { + int count; + int i; + + count = full_case_fold(char_at(text, pos - 1), codepoints); + + folded_len += count; + + for (i = 0; i < count; i++) { + Py_UCS4 ch; + + ch = codepoints[i]; + set_char_at(folded, buf_size - folded_len + i, ch); + } + + --pos; + } + + /* Do we have an acceptable number? */ + if (min_len <= folded_len && folded_len <= len) { + status = string_set_contains_ign(state, string_set, + point_to(folded, buf_size - len), 0, folded_len, + folded_charsize); + + if (status == 1) + /* Advance past the match. */ + state->text_pos = pos; + } + + /* If we got fewer than expected, next time we want still fewer. */ + len = RE_MIN(len, folded_len); + + /* Fetch one fewer next time. */ + end_fetch = pos + 1; + } + + re_dealloc(folded); + + release_GIL(safe_state); + + return status; + +error: + re_dealloc(folded); + + release_GIL(safe_state); + + return RE_ERROR_INTERNAL; +} + +/* Tries to match a string at the current position with a member of a string + * set, ignoring case, for a forwards search. + */ +Py_LOCAL_INLINE(int) string_set_match_ign(RE_SafeState* safe_state, RE_Node* + node) { + return string_set_match_ign_fwdrev(safe_state, node, FALSE); +} + +/* Tries to match a string at the current position with a member of a string + * set, ignoring case, for a reverse search. + */ +Py_LOCAL_INLINE(int) string_set_match_ign_rev(RE_SafeState* safe_state, + RE_Node* node) { + return string_set_match_ign_fwdrev(safe_state, node, TRUE); +} + +/* Tries to match a string at the current position with a member of a string + * set. + */ +Py_LOCAL_INLINE(int) string_set_match_rev(RE_SafeState* safe_state, RE_Node* + node) { + Py_ssize_t index; + Py_ssize_t min_len; + Py_ssize_t max_len; + RE_State* state; + Py_ssize_t available; + void* (*point_to)(void* text, Py_ssize_t pos); + void* text; + Py_ssize_t text_pos; + PyObject* string_set; + int status; + Py_ssize_t len; + + index = node->values[0]; + min_len = (Py_ssize_t)node->values[1]; + max_len = (Py_ssize_t)node->values[2]; + + state = safe_state->re_state; + + available = state->text_pos - state->slice_start; + if (min_len > available) + /* Too few characters for any match. */ + return 0; + + max_len = RE_MIN(max_len, available); + + point_to = state->point_to; + text = state->text; + text_pos = state->text_pos; + + acquire_GIL(safe_state); + + /* Fetch the string set. */ + string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, index); + if (!string_set) + goto error; + + status = 0; + + /* Attempt matches for a decreasing length. */ + for (len = max_len; status == 0 && len >= min_len; len--) { + PyObject* string; + + if (state->is_unicode) + string = build_unicode_value(point_to(text, text_pos - len), len, + state->charsize); + else + string = build_bytes_value(point_to(text, text_pos - len), len, + state->charsize); + if (!string) + goto error; + + status = PySet_Contains(string_set, string); + Py_DECREF(string); + + if (status == 1) + /* Advance past the match. */ + state->text_pos -= len; + } + + release_GIL(safe_state); + + return status; + +error: + release_GIL(safe_state); + + return RE_ERROR_INTERNAL; +} + +/* Checks whether any additional fuzzy error is permitted. */ +Py_LOCAL_INLINE(BOOL) any_error_permitted(RE_State* state) { + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + return fuzzy_info->total_cost <= values[RE_FUZZY_VAL_MAX_COST] && + fuzzy_info->counts[RE_FUZZY_ERR] < values[RE_FUZZY_VAL_MAX_ERR] && + state->total_cost <= state->max_cost; +} + +/* Checks whether this additional fuzzy error is permitted. */ +Py_LOCAL_INLINE(BOOL) this_error_permitted(RE_State* state, int fuzzy_type) { + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + return fuzzy_info->total_cost + values[RE_FUZZY_VAL_COST_BASE + fuzzy_type] + <= values[RE_FUZZY_VAL_MAX_COST] && fuzzy_info->counts[fuzzy_type] < + values[RE_FUZZY_VAL_MAX_BASE + fuzzy_type] && state->total_cost + + values[RE_FUZZY_VAL_COST_BASE + fuzzy_type] <= state->max_cost; +} + +Py_LOCAL_INLINE(BOOL) next_fuzzy_match_one(RE_State* state, RE_FuzzyData* data) + { + Py_ssize_t new_pos; + + if (this_error_permitted(state, data->fuzzy_type)) { + switch (data->fuzzy_type) { + case RE_FUZZY_DEL: + /* Could a character at text_pos have been deleted? */ + data->new_node = data->new_node->next_1.node; + return TRUE; + case RE_FUZZY_INS: + /* Could the character at text_pos have been inserted? */ + new_pos = data->new_text_pos + data->step; + if (data->permit_insertion && state->slice_start <= new_pos && + new_pos <= state->slice_end) { + data->new_text_pos = new_pos; + return TRUE; + } + break; + case RE_FUZZY_SUB: + /* Could the character at text_pos have been substituted? */ + new_pos = data->new_text_pos + data->step; + if (state->slice_start <= new_pos && new_pos <= state->slice_end) { + data->new_text_pos = new_pos; + data->new_node = data->new_node->next_1.node; + return TRUE; + } + break; + } + } + + return FALSE; +} + +/* Tries a fuzzy match of a single-character item. */ +Py_LOCAL_INLINE(BOOL) fuzzy_match_one(RE_SafeState* safe_state, BOOL search, + Py_ssize_t* text_pos, RE_Node** node, int step) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + RE_BacktrackData* bt_data; + + state = safe_state->re_state; + + if (!any_error_permitted(state)) { + *node = NULL; + return TRUE; + } + + data.new_text_pos = *text_pos; + data.new_node = *node; + data.step = step; + + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || data.new_text_pos != + state->search_anchor; + + for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + if (next_fuzzy_match_one(state, &data)) + goto found; + } + + *node = NULL; + return TRUE; + +found: + if (!add_backtrack(safe_state, (*node)->op)) + return FALSE; + bt_data = state->backtrack; + bt_data->fuzzy_one.position.text_pos = *text_pos; + bt_data->fuzzy_one.position.node = *node; + bt_data->fuzzy_one.fuzzy_type = (RE_INT8)data.fuzzy_type; + bt_data->fuzzy_one.step = (RE_INT8)step; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = data.new_text_pos; + *node = data.new_node; + + return TRUE; +} + +/* Retries a fuzzy match of a single-character item. */ +Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_one(RE_SafeState* safe_state, BOOL + search, Py_ssize_t* text_pos, RE_Node** node) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + RE_BacktrackData* bt_data; + + state = safe_state->re_state; + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + bt_data = state->backtrack; + data.new_text_pos = bt_data->fuzzy_one.position.text_pos; + data.new_node = bt_data->fuzzy_one.position.node; + data.fuzzy_type = bt_data->fuzzy_one.fuzzy_type; + data.step = bt_data->fuzzy_one.step; + + --fuzzy_info->counts[data.fuzzy_type]; + --fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + --state->total_errors; + state->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || data.new_text_pos != + state->search_anchor; + + for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + if (next_fuzzy_match_one(state, &data)) + goto found; + } + + discard_backtrack(state); + *node = NULL; + return TRUE; + +found: + bt_data->fuzzy_one.fuzzy_type = (RE_INT8)data.fuzzy_type; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = data.new_text_pos; + *node = data.new_node; + + return TRUE; +} + +Py_LOCAL_INLINE(BOOL) next_fuzzy_match_zero(RE_State* state, RE_FuzzyData* + data) { + if (this_error_permitted(state, data->fuzzy_type)) { + switch (data->fuzzy_type) { + case RE_FUZZY_DEL: + /* Could a character at text_pos have been deleted? */ + data->new_node = data->new_node->next_1.node; + return TRUE; + case RE_FUZZY_INS: + /* Could the character at text_pos have been inserted? */ + if (data->permit_insertion && data->new_text_pos != data->limit) { + data->new_text_pos += data->step; + return TRUE; + } + break; + case RE_FUZZY_SUB: + /* Could the character at text_pos have been substituted? */ + if (data->new_text_pos != data->limit) { + data->new_node = data->new_node->next_1.node; + return TRUE; + } + break; + } + } + + return FALSE; +} + +/* Tries a fuzzy match of a zero-width item. */ +Py_LOCAL_INLINE(BOOL) fuzzy_match_zero(RE_SafeState* safe_state, BOOL search, + Py_ssize_t* text_pos, RE_Node** node) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + RE_BacktrackData* bt_data; + + state = safe_state->re_state; + + if (!any_error_permitted(state)) { + *node = NULL; + return TRUE; + } + + data.new_text_pos = *text_pos; + data.new_node = *node; + + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + if (data.new_node->status & RE_STATUS_REVERSE) { + data.step = -1; + data.limit = state->slice_start; + } else { + data.step = 1; + data.limit = state->slice_end; + } + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || data.new_text_pos != + state->search_anchor; + + for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + if (next_fuzzy_match_zero(state, &data)) + goto found; + } + + *node = NULL; + return TRUE; + +found: + if (!add_backtrack(safe_state, (*node)->op)) + return FALSE; + bt_data = state->backtrack; + bt_data->fuzzy_zero.position.text_pos = *text_pos; + bt_data->fuzzy_zero.position.node = *node; + bt_data->fuzzy_zero.fuzzy_type = (RE_INT8)data.fuzzy_type; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = data.new_text_pos; + *node = data.new_node; + + return TRUE; +} + +/* Retries a fuzzy match of a zero-width item. */ +Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_zero(RE_SafeState* safe_state, BOOL + search, Py_ssize_t* text_pos, RE_Node** node) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + RE_BacktrackData* bt_data; + + state = safe_state->re_state; + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + bt_data = state->backtrack; + data.new_text_pos = bt_data->fuzzy_zero.position.text_pos; + data.new_node = bt_data->fuzzy_zero.position.node; + data.fuzzy_type = bt_data->fuzzy_zero.fuzzy_type; + + if (data.new_node->status & RE_STATUS_REVERSE) { + data.step = -1; + data.limit = state->slice_start; + } else { + data.step = 1; + data.limit = state->slice_end; + } + + if (data.fuzzy_type >= 0) { + --fuzzy_info->counts[data.fuzzy_type]; + --fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + + data.fuzzy_type]; + --state->total_errors; + state->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + } + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || data.new_text_pos != + state->search_anchor; + + for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + if (next_fuzzy_match_zero(state, &data)) + goto found; + } + + discard_backtrack(state); + *node = NULL; + return TRUE; + +found: + bt_data->fuzzy_zero.fuzzy_type = (RE_INT8)data.fuzzy_type; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = data.new_text_pos; + *node = data.new_node; + + return TRUE; +} + +/* Tries a fuzzy insertion. */ +Py_LOCAL_INLINE(BOOL) fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t + text_pos, RE_Node* node) { + RE_State* state; + RE_BacktrackData* bt_data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + + state = safe_state->re_state; + + /* No insertion or deletion. */ + if (!add_backtrack(safe_state, node->op)) + return FALSE; + bt_data = state->backtrack; + bt_data->fuzzy_insert.position.text_pos = text_pos; + bt_data->fuzzy_insert.position.node = node; + bt_data->fuzzy_insert.count = 0; + bt_data->fuzzy_insert.too_few_errors = state->too_few_errors; + bt_data->fuzzy_insert.fuzzy_node = node; /* END_FUZZY node. */ + + /* Check whether there are too few errors. */ + fuzzy_info = &state->fuzzy_info; + + /* The node in this case is the END_FUZZY node. */ + values = node->values; + + if (fuzzy_info->counts[RE_FUZZY_DEL] < values[RE_FUZZY_VAL_MIN_DEL] || + fuzzy_info->counts[RE_FUZZY_INS] < values[RE_FUZZY_VAL_MIN_INS] || + fuzzy_info->counts[RE_FUZZY_SUB] < values[RE_FUZZY_VAL_MIN_SUB] || + fuzzy_info->counts[RE_FUZZY_ERR] < values[RE_FUZZY_VAL_MIN_ERR]) + state->too_few_errors = TRUE; + + return TRUE; +} + +/* Retries a fuzzy insertion. */ +Py_LOCAL_INLINE(BOOL) retry_fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t* + text_pos, RE_Node** node) { + RE_State* state; + RE_FuzzyInfo* fuzzy_info; + RE_BacktrackData* bt_data; + Py_ssize_t new_text_pos; + RE_Node* new_node; + int step; + Py_ssize_t limit; + RE_Node* fuzzy_node; + RE_CODE* values; + + state = safe_state->re_state; + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + bt_data = state->backtrack; + new_text_pos = bt_data->fuzzy_insert.position.text_pos; + new_node = bt_data->fuzzy_insert.position.node; + + if (new_node->status & RE_STATUS_REVERSE) { + step = -1; + limit = state->slice_start; + } else { + step = 1; + limit = state->slice_end; + } + + /* Could the character at text_pos have been inserted? */ + if (!this_error_permitted(state, RE_FUZZY_INS) || new_text_pos == limit) { + Py_ssize_t count; + + count = bt_data->fuzzy_insert.count; + + fuzzy_info->counts[RE_FUZZY_INS] -= count; + fuzzy_info->counts[RE_FUZZY_ERR] -= count; + fuzzy_info->total_cost -= values[RE_FUZZY_VAL_INS_COST] * count; + state->total_errors -= count; + state->total_cost -= values[RE_FUZZY_VAL_INS_COST] * count; + state->too_few_errors = bt_data->fuzzy_insert.too_few_errors; + + discard_backtrack(state); + *node = NULL; + return TRUE; + } + + ++bt_data->fuzzy_insert.count; + + ++fuzzy_info->counts[RE_FUZZY_INS]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_INS_COST]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_INS_COST]; + + /* Check whether there are too few errors. */ + state->too_few_errors = bt_data->fuzzy_insert.too_few_errors; + fuzzy_node = bt_data->fuzzy_insert.fuzzy_node; /* END_FUZZY node. */ + values = fuzzy_node->values; + if (fuzzy_info->counts[RE_FUZZY_DEL] < values[RE_FUZZY_VAL_MIN_DEL] || + fuzzy_info->counts[RE_FUZZY_INS] < values[RE_FUZZY_VAL_MIN_INS] || + fuzzy_info->counts[RE_FUZZY_SUB] < values[RE_FUZZY_VAL_MIN_SUB] || + fuzzy_info->counts[RE_FUZZY_ERR] < values[RE_FUZZY_VAL_MIN_ERR]) + state->too_few_errors = TRUE; + + *text_pos = new_text_pos + step * bt_data->fuzzy_insert.count; + *node = new_node; + + return TRUE; +} + +Py_LOCAL_INLINE(BOOL) next_fuzzy_match_string(RE_State* state, RE_FuzzyData* + data) { + Py_ssize_t new_pos; + + if (this_error_permitted(state, data->fuzzy_type)) { + switch (data->fuzzy_type) { + case RE_FUZZY_DEL: + /* Could a character at text_pos have been deleted? */ + data->new_string_pos += data->step; + return TRUE; + case RE_FUZZY_INS: + /* Could the character at text_pos have been inserted? */ + new_pos = data->new_text_pos + data->step; + if (data->permit_insertion && state->slice_start <= new_pos && + new_pos <= state->slice_end) { + data->new_text_pos = new_pos; + return TRUE; + } + break; + case RE_FUZZY_SUB: + /* Could the character at text_pos have been substituted? */ + new_pos = data->new_text_pos + data->step; + if (state->slice_start <= new_pos && new_pos <= state->slice_end) { + data->new_text_pos = new_pos; + data->new_string_pos += data->step; + return TRUE; + } + break; + } + } + + return FALSE; +} + +/* Tries a fuzzy match of a string. */ +Py_LOCAL_INLINE(BOOL) fuzzy_match_string(RE_SafeState* safe_state, BOOL search, + Py_ssize_t* text_pos, RE_Node* node, Py_ssize_t* string_pos, BOOL* matched, + int step) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + RE_BacktrackData* bt_data; + + state = safe_state->re_state; + + if (!any_error_permitted(state)) { + *matched = FALSE; + return TRUE; + } + + data.new_text_pos = *text_pos; + data.new_string_pos = *string_pos; + data.step = step; + + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || data.new_text_pos != + state->search_anchor; + + for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + if (next_fuzzy_match_string(state, &data)) + goto found; + } + + *matched = FALSE; + return TRUE; + +found: + if (!add_backtrack(safe_state, node->op)) + return FALSE; + bt_data = state->backtrack; + bt_data->fuzzy_string.position.text_pos = *text_pos; + bt_data->fuzzy_string.position.node = node; + bt_data->fuzzy_string.string_pos = *string_pos; + bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; + bt_data->fuzzy_string.step = (RE_INT8)step; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = data.new_text_pos; + *string_pos = data.new_string_pos; + *matched = TRUE; + + return TRUE; +} + +/* Retries a fuzzy match of a string. */ +Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_string(RE_SafeState* safe_state, BOOL + search, Py_ssize_t* text_pos, RE_Node** node, Py_ssize_t* string_pos, BOOL* + matched) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + RE_BacktrackData* bt_data; + RE_Node* new_node; + + state = safe_state->re_state; + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + bt_data = state->backtrack; + data.new_text_pos = bt_data->fuzzy_string.position.text_pos; + new_node = bt_data->fuzzy_string.position.node; + data.new_string_pos = bt_data->fuzzy_string.string_pos; + data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type; + data.step = bt_data->fuzzy_string.step; + + --fuzzy_info->counts[data.fuzzy_type]; + --fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + --state->total_errors; + state->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || data.new_text_pos != + state->search_anchor; + + for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + if (next_fuzzy_match_string(state, &data)) + goto found; + } + + discard_backtrack(state); + *matched = FALSE; + return TRUE; + +found: + bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = data.new_text_pos; + *node = new_node; + *string_pos = data.new_string_pos; + *matched = TRUE; + + return TRUE; +} + +Py_LOCAL_INLINE(BOOL) next_fuzzy_match_string_fld(RE_State* state, + RE_FuzzyData* data) { + int new_pos; + + if (this_error_permitted(state, data->fuzzy_type)) { + switch (data->fuzzy_type) { + case RE_FUZZY_DEL: + /* Could a character at text_pos have been deleted? */ + data->new_string_pos += data->step; + return TRUE; + case RE_FUZZY_INS: + /* Could the character at text_pos have been inserted? */ + new_pos = data->new_folded_pos + data->step; + if (data->permit_insertion && 0 <= new_pos && new_pos <= + data->folded_len) { + data->new_folded_pos = new_pos; + return TRUE; + } + break; + case RE_FUZZY_SUB: + /* Could the character at text_pos have been substituted? */ + new_pos = data->new_folded_pos + data->step; + if (0 <= new_pos && new_pos <= data->folded_len) { + data->new_folded_pos = new_pos; + data->new_string_pos += data->step; + return TRUE; + } + break; + } + } + + return FALSE; +} + +/* Tries a fuzzy match of a string, ignoring case. */ +Py_LOCAL_INLINE(BOOL) fuzzy_match_string_fld(RE_SafeState* safe_state, BOOL + search, Py_ssize_t* text_pos, RE_Node* node, Py_ssize_t* string_pos, int* + folded_pos, int folded_len, BOOL* matched, int step) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + Py_ssize_t new_text_pos; + RE_BacktrackData* bt_data; + + state = safe_state->re_state; + + if (!any_error_permitted(state)) { + *matched = FALSE; + return TRUE; + } + + new_text_pos = *text_pos; + data.new_string_pos = *string_pos; + data.new_folded_pos = *folded_pos; + data.folded_len = folded_len; + data.step = step; + + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || new_text_pos != state->search_anchor; + if (step > 0) + data.permit_insertion |= data.new_folded_pos != 0; + else + data.permit_insertion |= data.new_folded_pos != folded_len; + + for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + if (next_fuzzy_match_string_fld(state, &data)) + goto found; + } + + *matched = FALSE; + return TRUE; + +found: + if (!add_backtrack(safe_state, node->op)) + return FALSE; + bt_data = state->backtrack; + bt_data->fuzzy_string.position.text_pos = *text_pos; + bt_data->fuzzy_string.position.node = node; + bt_data->fuzzy_string.string_pos = *string_pos; + bt_data->fuzzy_string.folded_pos = (RE_INT8)(*folded_pos); + bt_data->fuzzy_string.folded_len = (RE_INT8)folded_len; + bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; + bt_data->fuzzy_string.step = (RE_INT8)step; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = new_text_pos; + *string_pos = data.new_string_pos; + *folded_pos = data.new_folded_pos; + *matched = TRUE; + + return TRUE; +} + +/* Retries a fuzzy match of a string, ignoring case. */ +Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_string_fld(RE_SafeState* safe_state, + BOOL search, Py_ssize_t* text_pos, RE_Node** node, Py_ssize_t* string_pos, + int* folded_pos, BOOL* matched) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + RE_BacktrackData* bt_data; + Py_ssize_t new_text_pos; + RE_Node* new_node; + + state = safe_state->re_state; + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + bt_data = state->backtrack; + new_text_pos = bt_data->fuzzy_string.position.text_pos; + new_node = bt_data->fuzzy_string.position.node; + data.new_string_pos = bt_data->fuzzy_string.string_pos; + data.new_folded_pos = bt_data->fuzzy_string.folded_pos; + data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type; + data.step = bt_data->fuzzy_string.step; + + --fuzzy_info->counts[data.fuzzy_type]; + --fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + --state->total_errors; + state->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || new_text_pos != state->search_anchor; + if (data.step > 0) + data.permit_insertion |= data.new_folded_pos != 0; + else + data.permit_insertion |= data.new_folded_pos != + bt_data->fuzzy_string.folded_len; + + for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + if (next_fuzzy_match_string_fld(state, &data)) + goto found; + } + + discard_backtrack(state); + *matched = FALSE; + return TRUE; + +found: + bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = new_text_pos; + *node = new_node; + *string_pos = data.new_string_pos; + *folded_pos = data.new_folded_pos; + *matched = TRUE; + + return TRUE; +} + +Py_LOCAL_INLINE(BOOL) next_fuzzy_match_string_fld2(RE_State* state, + RE_FuzzyData* data) { + int new_pos; + + if (this_error_permitted(state, data->fuzzy_type)) { + switch (data->fuzzy_type) { + case RE_FUZZY_DEL: + /* Could a character at text_pos have been deleted? */ + data->new_gfolded_pos += data->step; + return TRUE; + case RE_FUZZY_INS: + /* Could the character at text_pos have been inserted? */ + new_pos = data->new_folded_pos + data->step; + if (data->permit_insertion && 0 <= new_pos && new_pos <= + data->folded_len) { + data->new_folded_pos = new_pos; + return TRUE; + } + break; + case RE_FUZZY_SUB: + /* Could the character at text_pos have been substituted? */ + new_pos = data->new_folded_pos + data->step; + if (0 <= new_pos && new_pos <= data->folded_len) { + data->new_folded_pos = new_pos; + data->new_gfolded_pos += data->step; + return TRUE; + } + break; + } + } + + return FALSE; +} + +/* Tries a fuzzy match of a group reference, ignoring case. */ +Py_LOCAL_INLINE(BOOL) fuzzy_match_string_fld2(RE_SafeState* safe_state, BOOL + search, Py_ssize_t* text_pos, RE_Node* node, int* folded_pos, int folded_len, + Py_ssize_t* group_pos, int* gfolded_pos, int gfolded_len, BOOL* matched, int + step) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + Py_ssize_t new_text_pos; + Py_ssize_t new_group_pos; + RE_BacktrackData* bt_data; + + state = safe_state->re_state; + + if (!any_error_permitted(state)) { + *matched = FALSE; + return TRUE; + } + + new_text_pos = *text_pos; + data.new_folded_pos = *folded_pos; + data.folded_len = folded_len; + new_group_pos = *group_pos; + data.new_gfolded_pos = *gfolded_pos; + data.step = step; + + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || new_text_pos != state->search_anchor; + if (step > 0) + data.permit_insertion |= data.new_folded_pos != 0; + else + data.permit_insertion |= data.new_folded_pos != folded_len; + + for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + if (next_fuzzy_match_string_fld2(state, &data)) + goto found; + } + + *matched = FALSE; + return TRUE; + +found: + if (!add_backtrack(safe_state, node->op)) + return FALSE; + bt_data = state->backtrack; + bt_data->fuzzy_string.position.text_pos = *text_pos; + bt_data->fuzzy_string.position.node = node; + bt_data->fuzzy_string.string_pos = *group_pos; + bt_data->fuzzy_string.folded_pos = (RE_INT8)(*folded_pos); + bt_data->fuzzy_string.folded_len = (RE_INT8)folded_len; + bt_data->fuzzy_string.gfolded_pos = (RE_INT8)(*gfolded_pos); + bt_data->fuzzy_string.gfolded_len = (RE_INT8)gfolded_len; + bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; + bt_data->fuzzy_string.step = (RE_INT8)step; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = new_text_pos; + *group_pos = new_group_pos; + *folded_pos = data.new_folded_pos; + *gfolded_pos = data.new_gfolded_pos; + *matched = TRUE; + + return TRUE; +} + +/* Retries a fuzzy match of a group reference, ignoring case. */ +Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_string_fld2(RE_SafeState* safe_state, + BOOL search, Py_ssize_t* text_pos, RE_Node** node, int* folded_pos, + Py_ssize_t* group_pos, int* gfolded_pos, BOOL* matched) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + RE_BacktrackData* bt_data; + Py_ssize_t new_text_pos; + Py_ssize_t new_group_pos; + RE_Node* new_node; + + state = safe_state->re_state; + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + bt_data = state->backtrack; + new_text_pos = bt_data->fuzzy_string.position.text_pos; + new_node = bt_data->fuzzy_string.position.node; + new_group_pos = bt_data->fuzzy_string.string_pos; + data.new_folded_pos = bt_data->fuzzy_string.folded_pos; + data.new_gfolded_pos = bt_data->fuzzy_string.gfolded_pos; + data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type; + data.step = bt_data->fuzzy_string.step; + + --fuzzy_info->counts[data.fuzzy_type]; + --fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + --state->total_errors; + state->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || new_text_pos != state->search_anchor || + data.new_folded_pos != bt_data->fuzzy_string.folded_len; + + for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + if (next_fuzzy_match_string_fld2(state, &data)) + goto found; + } + + discard_backtrack(state); + *matched = FALSE; + return TRUE; + +found: + bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = new_text_pos; + *node = new_node; + *group_pos = new_group_pos; + *folded_pos = data.new_folded_pos; + *gfolded_pos = data.new_gfolded_pos; + *matched = TRUE; + + return TRUE; +} + +/* Locates the required string, if there's one. */ +Py_LOCAL_INLINE(Py_ssize_t) locate_required_string(RE_SafeState* safe_state) { + RE_State* state; + PatternObject* pattern; + Py_ssize_t found_pos; + Py_ssize_t end_pos; + + state = safe_state->re_state; + pattern = state->pattern; + + /* We haven't matched the required string yet. */ + state->req_pos = -1; + + if (!pattern->req_string) + /* There isn't a required string, so start matching from the current + * position. + */ + return state->text_pos; + + /* Search for the required string and calculate where to start matching. */ + switch (pattern->req_string->op) { + case RE_OP_STRING: + found_pos = string_search(safe_state, pattern->req_string, + state->text_pos, state->slice_end); + if (found_pos < 0) + /* The required string wasn't found. */ + return -1; + + /* Record where the required string matched. */ + state->req_pos = found_pos; + state->req_end = found_pos + pattern->req_string->value_count; + + if (pattern->req_offset >= 0) { + /* Step back from the required string to where we should start + * matching. + */ + found_pos -= pattern->req_offset; + if (found_pos >= state->text_pos) + return found_pos; + } + break; + case RE_OP_STRING_FLD: + found_pos = string_search_fld(safe_state, pattern->req_string, + state->text_pos, state->slice_end, &end_pos); + if (found_pos < 0) + /* The required string wasn't found. */ + return -1; + + /* Record where the required string matched. */ + state->req_pos = found_pos; + state->req_end = end_pos; + + if (pattern->req_offset >= 0) { + /* Step back from the required string to where we should start + * matching. + */ + found_pos -= pattern->req_offset; + if (found_pos >= state->text_pos) + return found_pos; + } + break; + case RE_OP_STRING_FLD_REV: + found_pos = string_search_fld_rev(safe_state, pattern->req_string, + state->text_pos, state->slice_start, &end_pos); + if (found_pos < 0) + /* The required string wasn't found. */ + return -1; + + /* Record where the required string matched. */ + state->req_pos = found_pos; + state->req_end = end_pos; + + if (pattern->req_offset >= 0) { + /* Step back from the required string to where we should start + * matching. + */ + found_pos += pattern->req_offset; + if (found_pos <= state->text_pos) + return found_pos; + } + break; + case RE_OP_STRING_IGN: + found_pos = string_search_ign(safe_state, pattern->req_string, + state->text_pos, state->slice_end); + if (found_pos < 0) + /* The required string wasn't found. */ + return -1; + + /* Record where the required string matched. */ + state->req_pos = found_pos; + state->req_end = found_pos + pattern->req_string->value_count; + + if (pattern->req_offset >= 0) { + /* Step back from the required string to where we should start + * matching. + */ + found_pos -= pattern->req_offset; + if (found_pos >= state->text_pos) + return found_pos; + } + break; + case RE_OP_STRING_IGN_REV: + found_pos = string_search_ign_rev(safe_state, pattern->req_string, + state->text_pos, state->slice_start); + if (found_pos < 0) + /* The required string wasn't found. */ + return -1; + + /* Record where the required string matched. */ + state->req_pos = found_pos; + state->req_end = found_pos - pattern->req_string->value_count; + + if (pattern->req_offset >= 0) { + /* Step back from the required string to where we should start + * matching. + */ + found_pos += pattern->req_offset; + if (found_pos <= state->text_pos) + return found_pos; + } + break; + case RE_OP_STRING_REV: + found_pos = string_search_rev(safe_state, pattern->req_string, + state->text_pos, state->slice_start); + if (found_pos < 0) + /* The required string wasn't found. */ + return -1; + + /* Record where the required string matched. */ + state->req_pos = found_pos; + state->req_end = found_pos - pattern->req_string->value_count; + + if (pattern->req_offset >= 0) { + /* Step back from the required string to where we should start + * matching. + */ + found_pos += pattern->req_offset; + if (found_pos <= state->text_pos) + return found_pos; + } + break; + } + + /* Start matching from the current position. */ + return state->text_pos; +} + +/* Performs a depth-first match or search from the context. */ +Py_LOCAL_INLINE(int) basic_match(RE_SafeState* safe_state, RE_Node* start_node, + BOOL search, BOOL recursive_call) { + RE_State* state; + Py_ssize_t slice_start; + Py_ssize_t slice_end; + Py_ssize_t text_pos; + RE_EncodingTable* encoding; + PatternObject* pattern; + Py_ssize_t text_length; + RE_NextNode start_pair; + void* text; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + BOOL (*has_property)(RE_CODE property, Py_UCS4 ch); + Py_ssize_t pattern_step; /* The overall step of the pattern (forwards or backwards). */ + Py_ssize_t string_pos; + BOOL do_search_start; + Py_ssize_t found_pos; + int folded_pos; + int gfolded_pos; + RE_Node* node; + TRACE(("<>\n")) + + state = safe_state->re_state; + + slice_start = state->slice_start; + slice_end = state->slice_end; + text_pos = state->text_pos; + + encoding = state->encoding; + + pattern = state->pattern; + + text_length = state->text_length; + + /* Look beyond any initial group node. */ + start_pair.node = start_node; + if (recursive_call) + start_pair.test = locate_test_start(start_node); + else + start_pair.test = pattern->start_test; + + /* Is the pattern anchored to the start or end of the string? */ + switch (start_pair.test->op) { + case RE_OP_END_OF_STRING: + if (state->reverse) { + /* Searching backwards. */ + if (text_pos != text_length) + return RE_ERROR_FAILURE; + + /* Don't bother to search further because it's anchored. */ + search = FALSE; + } + break; + case RE_OP_START_OF_STRING: + if (!state->reverse) { + /* Searching forwards. */ + if (text_pos != 0) + return RE_ERROR_FAILURE; + + /* Don't bother to search further because it's anchored. */ + search = FALSE; + } + break; + } + + text = state->text; + char_at = state->char_at; + has_property = encoding->has_property; + pattern_step = state->reverse ? -1 : 1; + string_pos = -1; + do_search_start = pattern->do_search_start; + + /* Add a backtrack entry for failure. */ + if (!add_backtrack(safe_state, RE_OP_FAILURE)) + return RE_ERROR_BACKTRACKING; + +start_match: + /* If we're searching, advance along the string until there could be a + * match. + */ + if (pattern->pattern_call_ref >= 0) { + RE_GuardList* guard_list; + + guard_list = &state->group_call_guard_list[pattern->pattern_call_ref]; + guard_list->count = 0; + guard_list->last_text_pos = -1; + } + + /* Locate the required string, if there's one, unless this is a recursive + * call of 'basic_match'. + */ + if (!pattern->req_string || recursive_call) + found_pos = state->text_pos; + else { + found_pos = locate_required_string(safe_state); + if (found_pos < 0) + return RE_ERROR_FAILURE; + } + + if (search) { + state->text_pos = found_pos; + + if (do_search_start) { + RE_Position new_position; + +next_match_1: + /* 'search_start' will clear 'do_search_start' if it can't perform + * a fast search for the next possible match. This enables us to + * avoid the overhead of the call subsequently. + */ + if (!search_start(safe_state, &start_pair, &new_position, 0)) + return RE_ERROR_FAILURE; + + node = new_position.node; + text_pos = new_position.text_pos; + + if (node->op == RE_OP_SUCCESS) { + /* Must the match advance past its start? */ + if (text_pos != state->search_anchor || !state->must_advance) { + state->text_pos = text_pos; + return RE_ERROR_SUCCESS; + } + + state->text_pos = state->match_pos + pattern_step; + goto next_match_1; + } + + /* 'do_search_start' may have been cleared. */ + do_search_start = pattern->do_search_start; + } else { + /* Avoiding 'search_start', which we've found can't perform a fast + * search for the next possible match. + */ + node = start_node; + text_pos = state->text_pos; + +next_match_2: + if (state->reverse) { + if (text_pos < slice_start) + return RE_ERROR_FAILURE; + } else { + if (text_pos > slice_end) + return RE_ERROR_FAILURE; + } + + state->match_pos = text_pos; + + if (node->op == RE_OP_SUCCESS) { + /* Must the match advance past its start? */ + if (text_pos != state->search_anchor || !state->must_advance) { + BOOL success; + + if (state->match_all && !recursive_call) { + /* We want to match all of the slice. */ + if (state->reverse) + success = text_pos == slice_start; + else + success = text_pos == slice_end; + } else + success = TRUE; + + if (success) { + state->text_pos = text_pos; + return RE_ERROR_SUCCESS; + } + } + + text_pos = state->match_pos + pattern_step; + goto next_match_2; + } + } + } else { + /* The start position is anchored to the current position. */ + if (found_pos != state->text_pos) + return RE_ERROR_FAILURE; + + node = start_node; + } + +advance: + /* The main matching loop. */ + for (;;) { + TRACE(("%d|", text_pos)) + + /* Should we abort the matching? */ + ++state->iterations; + + if (state->iterations == 0 && safe_check_signals(safe_state)) + return RE_ERROR_INTERRUPTED; + + switch (node->op) { + case RE_OP_ANY: /* Any character, except a newline. */ + TRACE(("%s\n", re_op_text[node->op])) + + if (text_pos < slice_end && char_at(text, text_pos) != '\n') { + ++text_pos; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_ANY_ALL: /* Any character at all. */ + TRACE(("%s\n", re_op_text[node->op])) + + if (text_pos < slice_end) { + ++text_pos; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_ANY_ALL_REV: /* Any character at all backwards. */ + TRACE(("%s\n", re_op_text[node->op])) + + if (text_pos > slice_start) { + --text_pos; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_ANY_REV: /* Any character backwards, except a newline. */ + TRACE(("%s\n", re_op_text[node->op])) + + if (text_pos > slice_start && char_at(text, text_pos - 1) != '\n') + { + --text_pos; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_ANY_U: /* Any character, except a line separator. */ + TRACE(("%s\n", re_op_text[node->op])) + + if (text_pos < slice_end && + !state->encoding->is_line_sep(char_at(text, text_pos))) { + ++text_pos; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_ANY_U_REV: /* Any character backwards, except a line separator. */ + TRACE(("%s\n", re_op_text[node->op])) + + if (text_pos > slice_start && + !state->encoding->is_line_sep(char_at(text, text_pos - 1))) { + --text_pos; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_ATOMIC: /* Atomic subpattern. */ + { + RE_Info info; + int status; + TRACE(("%s\n", re_op_text[node->op])) + + if (!add_backtrack(safe_state, RE_OP_ATOMIC)) + return RE_ERROR_BACKTRACKING; + state->backtrack->atomic.too_few_errors = state->too_few_errors; + state->backtrack->atomic.capture_change = state->capture_change; + + /* Save the groups. */ + if (!push_groups(safe_state)) + return RE_ERROR_MEMORY; + + save_info(state, &info); + + state->text_pos = text_pos; + state->must_advance = FALSE; + + status = basic_match(safe_state, node->nonstring.next_2.node, + FALSE, TRUE); + if (status < 0) + return status; + + reset_guards(state, node->values); + + restore_info(state, &info); + + if (status != RE_ERROR_SUCCESS) + goto backtrack; + + node = node->next_1.node; + text_pos = state->text_pos; + break; + } + case RE_OP_BOUNDARY: /* At a word boundary. */ + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + if (encoding->at_boundary(state, text_pos) == node->match) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_BRANCH: /* 2-way branch. */ + { + RE_Position next_position; + TRACE(("%s\n", re_op_text[node->op])) + + if (try_match(state, &node->next_1, text_pos, &next_position)) { + if (!add_backtrack(safe_state, RE_OP_BRANCH)) + return RE_ERROR_BACKTRACKING; + state->backtrack->branch.position.node = + node->nonstring.next_2.node; + state->backtrack->branch.position.text_pos = text_pos; + + node = next_position.node; + text_pos = next_position.text_pos; + } else + node = node->nonstring.next_2.node; + break; + } + case RE_OP_CALL_REF: /* A group call reference. */ + { + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + if (!push_group_return(safe_state, NULL)) + return RE_ERROR_MEMORY; + + if (!add_backtrack(safe_state, RE_OP_CALL_REF)) + return RE_ERROR_BACKTRACKING; + + node = node->next_1.node; + break; + } + case RE_OP_CHARACTER: /* A character literal. */ + TRACE(("%s %d %d\n", re_op_text[node->op], node->match, + node->values[0])) + + if (text_pos < slice_end && (char_at(text, text_pos) == + node->values[0]) == node->match) { + text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_CHARACTER_IGN: /* A character literal, ignoring case. */ + TRACE(("%s %d %d\n", re_op_text[node->op], node->match, + node->values[0])) + + if (text_pos < slice_end && same_char_ign(encoding, char_at(text, + text_pos), node->values[0]) == node->match) { + text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_CHARACTER_IGN_REV: /* A character literal backwards, ignoring case. */ + TRACE(("%s %d %d\n", re_op_text[node->op], node->match, + node->values[0])) + + if (text_pos > slice_start && same_char_ign(encoding, char_at(text, + text_pos - 1), node->values[0]) == node->match) { + text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_CHARACTER_REV: /* A character literal backwards. */ + TRACE(("%s %d %d\n", re_op_text[node->op], node->match, + node->values[0])) + + if (text_pos > slice_start && (char_at(text, text_pos - 1) == + node->values[0]) == node->match) { + text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_DEFAULT_BOUNDARY: /* At a default word boundary. */ + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + if (encoding->at_default_boundary(state, text_pos) == node->match) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_DEFAULT_END_OF_WORD: /* At a default end of a word. */ + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + if (encoding->at_default_word_end(state, text_pos) == node->match) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_DEFAULT_START_OF_WORD: /* At a default start of a word. */ + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + if (encoding->at_default_word_start(state, text_pos) == + node->match) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_END_FUZZY: /* End of fuzzy matching. */ + TRACE(("%s\n", re_op_text[node->op])) + + if (!fuzzy_insert(safe_state, text_pos, node)) + return RE_ERROR_BACKTRACKING; + + /* If there were too few errors, in the fuzzy section, try again. + */ + if (state->too_few_errors) { + state->too_few_errors = FALSE; + goto backtrack; + } + + node = node->next_1.node; + break; + case RE_OP_END_GREEDY_REPEAT: /* End of a greedy repeat. */ + { + RE_CODE index; + RE_RepeatData* rp_data; + BOOL changed; + BOOL try_body; + RE_Position next_body_position; + BOOL try_tail; + RE_Position next_tail_position; + RE_BacktrackData* bt_data; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Repeat indexes are 0-based. */ + index = node->values[0]; + rp_data = &state->repeats[index]; + + /* The body has matched successfully at this position. */ + if (!guard_repeat(safe_state, index, rp_data->start, + RE_STATUS_BODY, FALSE)) + return RE_ERROR_MEMORY; + + ++rp_data->count; + + /* Have we advanced through the text or has a capture group change? + */ + changed = rp_data->capture_change != state->capture_change || + text_pos != rp_data->start; + + /* The counts are of type size_t, so the format needs to specify + * that. + */ + TRACE(("min is %" PY_FORMAT_SIZE_T "u, max is %" PY_FORMAT_SIZE_T + "u, count is %" PY_FORMAT_SIZE_T "u\n", node->values[1], + node->values[2], rp_data->count)) + + /* Could the body or tail match? */ + try_body = changed && (rp_data->count < node->values[2] || + ~node->values[2] == 0) && !is_repeat_guarded(safe_state, index, + text_pos, RE_STATUS_BODY) && try_match(state, &node->next_1, + text_pos, &next_body_position); + try_tail = (!changed || rp_data->count >= node->values[1]) && + !is_repeat_guarded(safe_state, index, text_pos, RE_STATUS_TAIL) + && try_match(state, &node->nonstring.next_2, text_pos, + &next_tail_position); + if (!try_body && !try_tail) { + /* Neither the body nor the tail could match. */ + --rp_data->count; + goto backtrack; + } + + /* Record info in case we backtrack into the body. */ + if (!add_backtrack(safe_state, RE_OP_BODY_END)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.index = index; + bt_data->repeat.count = rp_data->count - 1; + bt_data->repeat.start = rp_data->start; + bt_data->repeat.capture_change = rp_data->capture_change; + + if (try_body) { + if (try_tail) { + /* Both the body and the tail could match, but the body + * takes precedence. If the body fails to match then we + * want to try the tail before backtracking further. + */ + + /* Record backtracking info for matching the tail. */ + if (!add_backtrack(safe_state, RE_OP_MATCH_TAIL)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.position = next_tail_position; + bt_data->repeat.index = index; + bt_data->repeat.count = rp_data->count; + bt_data->repeat.start = rp_data->start; + bt_data->repeat.capture_change = rp_data->capture_change; + bt_data->repeat.text_pos = text_pos; + } + + /* Record backtracking info in case the body fails to match. */ + if (!add_backtrack(safe_state, RE_OP_BODY_START)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.index = index; + bt_data->repeat.text_pos = text_pos; + + rp_data->capture_change = state->capture_change; + rp_data->start = text_pos; + + /* Advance into the body. */ + node = next_body_position.node; + text_pos = next_body_position.text_pos; + } else { + /* Only the tail could match. */ + + /* Advance into the tail. */ + node = next_tail_position.node; + text_pos = next_tail_position.text_pos; + } + break; + } + case RE_OP_END_GROUP: /* End of a capture group. */ + { + RE_CODE private_index; + RE_CODE public_index; + RE_GroupData* group; + RE_BacktrackData* bt_data; + TRACE(("%s %d\n", re_op_text[node->op], node->values[1])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + */ + private_index = node->values[0]; + public_index = node->values[1]; + group = &state->groups[private_index - 1]; + + if (!add_backtrack(safe_state, RE_OP_END_GROUP)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->group.private_index = private_index; + bt_data->group.public_index = public_index; + bt_data->group.text_pos = group->span.end; + bt_data->group.capture = (BOOL)node->values[2]; + bt_data->group.current_capture = group->current_capture; + + if (pattern->group_info[private_index - 1].referenced && + group->span.end != text_pos) + ++state->capture_change; + group->span.end = text_pos; + + /* Save the capture? */ + if (node->values[2]) { + group->current_capture = group->capture_count; + if (!save_capture(safe_state, private_index, public_index)) + return RE_ERROR_MEMORY; + } + + node = node->next_1.node; + break; + } + case RE_OP_END_LAZY_REPEAT: /* End of a lazy repeat. */ + { + RE_CODE index; + RE_RepeatData* rp_data; + BOOL changed; + BOOL try_body; + RE_Position next_body_position; + BOOL try_tail; + RE_Position next_tail_position; + RE_BacktrackData* bt_data; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Repeat indexes are 0-based. */ + index = node->values[0]; + rp_data = &state->repeats[index]; + + /* The body has matched successfully at this position. */ + if (!guard_repeat(safe_state, index, rp_data->start, + RE_STATUS_BODY, FALSE)) + return RE_ERROR_MEMORY; + + ++rp_data->count; + + /* Have we advanced through the text or has a capture group change? + */ + changed = rp_data->capture_change != state->capture_change || + text_pos != rp_data->start; + + /* The counts are of type size_t, so the format needs to specify + * that. + */ + TRACE(("min is %" PY_FORMAT_SIZE_T "u, max is %" PY_FORMAT_SIZE_T + "u, count is %" PY_FORMAT_SIZE_T "u\n", node->values[1], + node->values[2], rp_data->count)) + + /* Could the body or tail match? */ + try_body = changed && (rp_data->count < node->values[2] || + ~node->values[2] == 0) && !is_repeat_guarded(safe_state, index, + text_pos, RE_STATUS_BODY) && try_match(state, &node->next_1, + text_pos, &next_body_position); + try_tail = (!changed || rp_data->count >= node->values[1]) && + try_match(state, &node->nonstring.next_2, text_pos, + &next_tail_position); + if (!try_body && !try_tail) { + /* Neither the body nor the tail could match. */ + --rp_data->count; + goto backtrack; + } + + /* Record info in case we backtrack into the body. */ + if (!add_backtrack(safe_state, RE_OP_BODY_END)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.index = index; + bt_data->repeat.count = rp_data->count - 1; + bt_data->repeat.start = rp_data->start; + bt_data->repeat.capture_change = rp_data->capture_change; + + if (try_body) { + if (try_tail) { + /* Both the body and the tail could match, but the tail + * takes precedence. If the tail fails to match then we + * want to try the body before backtracking further. + */ + + /* Record backtracking info for matching the body. */ + if (!add_backtrack(safe_state, RE_OP_MATCH_BODY)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.position = next_body_position; + bt_data->repeat.index = index; + bt_data->repeat.count = rp_data->count; + bt_data->repeat.start = rp_data->start; + bt_data->repeat.capture_change = rp_data->capture_change; + bt_data->repeat.text_pos = text_pos; + + /* Advance into the tail. */ + node = next_tail_position.node; + text_pos = next_tail_position.text_pos; + } else { + /* Only the body could match. */ + + /* Record backtracking info in case the body fails to + * match. + */ + if (!add_backtrack(safe_state, RE_OP_BODY_START)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.index = index; + bt_data->repeat.text_pos = text_pos; + + rp_data->capture_change = state->capture_change; + rp_data->start = text_pos; + + /* Advance into the body. */ + node = next_body_position.node; + text_pos = next_body_position.text_pos; + } + } else { + /* Only the tail could match. */ + + /* Advance into the tail. */ + node = next_tail_position.node; + text_pos = next_tail_position.text_pos; + } + break; + } + case RE_OP_END_OF_LINE: /* At the end of a line. */ + TRACE(("%s\n", re_op_text[node->op])) + + if (text_pos == text_length || char_at(text, text_pos) == '\n') + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_END_OF_LINE_U: /* At the end of a line. */ + TRACE(("%s\n", re_op_text[node->op])) + + if (encoding->at_line_end(state, text_pos)) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_END_OF_STRING: /* At the end of the string. */ + TRACE(("%s\n", re_op_text[node->op])) + + if (text_pos == text_length) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_END_OF_STRING_LINE: /* At end of string or final newline. */ + TRACE(("%s\n", re_op_text[node->op])) + + if (text_pos == text_length || text_pos == state->final_newline) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_END_OF_STRING_LINE_U: /* At end of string or final newline. */ + TRACE(("%s\n", re_op_text[node->op])) + + if (text_pos == text_length || text_pos == state->final_line_sep) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_END_OF_WORD: /* At end of a word. */ + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + if (encoding->at_word_end(state, text_pos) == node->match) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_FUZZY: /* Fuzzy matching. */ + { + RE_FuzzyInfo* fuzzy_info; + RE_BacktrackData* bt_data; + TRACE(("%s\n", re_op_text[node->op])) + + fuzzy_info = &state->fuzzy_info; + + /* Save the current fuzzy info. */ + if (!add_backtrack(safe_state, RE_OP_FUZZY)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + memmove(&bt_data->fuzzy.fuzzy_info, fuzzy_info, + sizeof(RE_FuzzyInfo)); + bt_data->fuzzy.index = node->values[0]; + bt_data->fuzzy.text_pos = text_pos; + + /* Initialise the new fuzzy info. */ + memset(fuzzy_info->counts, 0, 4 * sizeof(fuzzy_info->counts[0])); + fuzzy_info->total_cost = 0; + fuzzy_info->node = node; + + node = node->next_1.node; + break; + } + case RE_OP_GRAPHEME_BOUNDARY: /* At a grapheme boundary. */ + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + if (encoding->at_grapheme_boundary(state, text_pos) == node->match) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_GREEDY_REPEAT: /* Greedy repeat. */ + { + RE_CODE index; + RE_RepeatData* rp_data; + RE_BacktrackData* bt_data; + BOOL try_body; + RE_Position next_body_position; + BOOL try_tail; + RE_Position next_tail_position; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Repeat indexes are 0-based. */ + index = node->values[0]; + rp_data = &state->repeats[index]; + + /* We might need to backtrack into the head, so save the current + * repeat. + */ + if (!add_backtrack(safe_state, RE_OP_GREEDY_REPEAT)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.index = index; + bt_data->repeat.count = rp_data->count; + bt_data->repeat.start = rp_data->start; + bt_data->repeat.capture_change = rp_data->capture_change; + bt_data->repeat.text_pos = text_pos; + + /* Initialise the new repeat. */ + rp_data->count = 0; + rp_data->start = text_pos; + rp_data->capture_change = state->capture_change; + + /* Could the body or tail match? */ + try_body = node->values[2] > 0 && !is_repeat_guarded(safe_state, + index, text_pos, RE_STATUS_BODY) && try_match(state, + &node->next_1, text_pos, &next_body_position); + try_tail = node->values[1] == 0 && try_match(state, + &node->nonstring.next_2, text_pos, &next_tail_position); + if (!try_body && !try_tail) + /* Neither the body nor the tail could match. */ + goto backtrack; + + if (try_body) { + if (try_tail) { + /* Both the body and the tail could match, but the body + * takes precedence. If the body fails to match then we + * want to try the tail before backtracking further. + */ + + /* Record backtracking info for matching the tail. */ + if (!add_backtrack(safe_state, RE_OP_MATCH_TAIL)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.position = next_tail_position; + bt_data->repeat.index = index; + bt_data->repeat.count = rp_data->count; + bt_data->repeat.start = rp_data->start; + bt_data->repeat.capture_change = rp_data->capture_change; + bt_data->repeat.text_pos = text_pos; + } + + /* Advance into the body. */ + node = next_body_position.node; + text_pos = next_body_position.text_pos; + } else { + /* Only the tail could match. */ + + /* Advance into the tail. */ + node = next_tail_position.node; + text_pos = next_tail_position.text_pos; + } + break; + } + case RE_OP_GREEDY_REPEAT_ONE: /* Greedy repeat for one character. */ + { + RE_CODE index; + RE_RepeatData* rp_data; + size_t count; + BOOL match; + RE_BacktrackData* bt_data; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Repeat indexes are 0-based. */ + index = node->values[0]; + rp_data = &state->repeats[index]; + + if (is_repeat_guarded(safe_state, index, text_pos, RE_STATUS_BODY)) + goto backtrack; + + /* Count how many times the character repeats, up to the maximum. + */ + count = count_one(state, node->nonstring.next_2.node, text_pos, + (size_t)node->values[2]); + + /* Unmatch until it's not guarded. */ + match = FALSE; + for (;;) { + if (count < node->values[1]) + /* The number of repeats is below the minimum. */ + break; + + if (!is_repeat_guarded(safe_state, index, text_pos + + (Py_ssize_t)count * node->step, RE_STATUS_TAIL)) { + /* It's not guarded at this position. */ + match = TRUE; + break; + } + + if (count == 0) + break; + + --count; + } + + if (!match) { + /* The repeat has failed to match at this position. */ + if (!guard_repeat(safe_state, index, text_pos, RE_STATUS_BODY, + TRUE)) + return RE_ERROR_MEMORY; + goto backtrack; + } + + /* Record the backtracking info. */ + if (!add_backtrack(safe_state, RE_OP_GREEDY_REPEAT_ONE)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.position.node = node; + bt_data->repeat.index = index; + bt_data->repeat.text_pos = rp_data->start; + bt_data->repeat.count = rp_data->count; + + rp_data->start = text_pos; + rp_data->count = count; + + /* Advance into the tail. */ + text_pos += (Py_ssize_t)count * node->step; + node = node->next_1.node; + break; + } + case RE_OP_GROUP_CALL: /* Group call. */ + { + RE_CODE index; + size_t g; + size_t r; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + index = node->values[0]; + + /* Save the capture groups and repeat guards. */ + if (!push_group_return(safe_state, node->next_1.node)) + return RE_ERROR_MEMORY; + + /* Clear the capture groups for the group call. They'll be restored + * on return. + */ + for (g = 0; g < state->pattern->true_group_count; g++) { + RE_GroupData* group; + + group = &state->groups[g]; + group->span.start = -1; + group->span.end = -1; + group->current_capture = -1; + } + + /* Clear the repeat guards for the group call. They'll be restored + * on return. + */ + for (r = 0; r < state->pattern->repeat_count; r++) { + RE_RepeatData* repeat; + + repeat = &state->repeats[r]; + repeat->body_guard_list.count = 0; + repeat->body_guard_list.last_text_pos = -1; + repeat->tail_guard_list.count = 0; + repeat->tail_guard_list.last_text_pos = -1; + } + + /* Call a group, skipping its CALL_REF node. */ + node = pattern->call_ref_info[index].node->next_1.node; + + if (!add_backtrack(safe_state, RE_OP_GROUP_CALL)) + return RE_ERROR_BACKTRACKING; + break; + } + case RE_OP_GROUP_EXISTS: /* Capture group exists. */ + { + RE_GroupData* group; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + * + * Check whether the captured text, if any, exists at this position + * in the string. + */ + group = &state->groups[node->values[0] - 1]; + if (group->current_capture >= 0) + node = node->next_1.node; + else + node = node->nonstring.next_2.node; + break; + } + case RE_OP_GROUP_RETURN: /* Group return. */ + { + RE_Node* return_node; + RE_BacktrackData* bt_data; + TRACE(("%s\n", re_op_text[node->op])) + + return_node = top_group_return(state); + + if (!add_backtrack(safe_state, RE_OP_GROUP_RETURN)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->group_call.node = return_node; + bt_data->group_call.capture_change = state->capture_change; + + if (return_node) { + /* The group was called. */ + node = return_node; + + /* Save the groups. */ + if (!push_groups(safe_state)) + return RE_ERROR_MEMORY; + + /* Save the repeats. */ + if (!push_repeats(safe_state)) + return RE_ERROR_MEMORY; + } else + /* The group was not called. */ + node = node->next_1.node; + + pop_group_return(state); + break; + } + case RE_OP_LAZY_REPEAT: /* Lazy repeat. */ + { + RE_CODE index; + RE_RepeatData* rp_data; + RE_BacktrackData* bt_data; + BOOL try_body; + RE_Position next_body_position; + BOOL try_tail; + RE_Position next_tail_position; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Repeat indexes are 0-based. */ + index = node->values[0]; + rp_data = &state->repeats[index]; + + /* We might need to backtrack into the head, so save the current + * repeat. + */ + if (!add_backtrack(safe_state, RE_OP_LAZY_REPEAT)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.index = index; + bt_data->repeat.count = rp_data->count; + bt_data->repeat.start = rp_data->start; + bt_data->repeat.capture_change = rp_data->capture_change; + bt_data->repeat.text_pos = text_pos; + + /* Initialise the new repeat. */ + rp_data->count = 0; + rp_data->start = text_pos; + rp_data->capture_change = state->capture_change; + + /* Could the body or tail match? */ + try_body = node->values[2] > 0 && !is_repeat_guarded(safe_state, + index, text_pos, RE_STATUS_BODY) && try_match(state, + &node->next_1, text_pos, &next_body_position); + try_tail = node->values[1] == 0 && try_match(state, + &node->nonstring.next_2, text_pos, &next_tail_position); + if (!try_body && !try_tail) + /* Neither the body nor the tail could match. */ + goto backtrack; + + if (try_body) { + if (try_tail) { + /* Both the body and the tail could match, but the tail + * takes precedence. If the tail fails to match then we + * want to try the body before backtracking further. + */ + + /* Record backtracking info for matching the tail. */ + if (!add_backtrack(safe_state, RE_OP_MATCH_BODY)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.position = next_body_position; + bt_data->repeat.index = index; + bt_data->repeat.count = rp_data->count; + bt_data->repeat.start = rp_data->start; + bt_data->repeat.capture_change = rp_data->capture_change; + bt_data->repeat.text_pos = text_pos; + + /* Advance into the tail. */ + node = next_tail_position.node; + text_pos = next_tail_position.text_pos; + } else { + /* Advance into the body. */ + node = next_body_position.node; + text_pos = next_body_position.text_pos; + } + } else { + /* Only the tail could match. */ + + /* Advance into the tail. */ + node = next_tail_position.node; + text_pos = next_tail_position.text_pos; + } + break; + } + case RE_OP_LAZY_REPEAT_ONE: /* Lazy repeat for one character. */ + { + RE_CODE index; + RE_RepeatData* rp_data; + size_t count; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Repeat indexes are 0-based. */ + index = node->values[0]; + rp_data = &state->repeats[index]; + + if (is_repeat_guarded(safe_state, index, text_pos, RE_STATUS_BODY)) + goto backtrack; + + /* Count how many times the character repeats, up to the minimum. + */ + count = count_one(state, node->nonstring.next_2.node, text_pos, + (size_t)node->values[1]); + + /* Have we matched at least the minimum? */ + if (count < node->values[1]) { + /* The repeat has failed to match at this position. */ + if (!guard_repeat(safe_state, index, text_pos, RE_STATUS_BODY, + TRUE)) + return RE_ERROR_MEMORY; + goto backtrack; + } + + if (count < node->values[2]) { + /* The match is shorter than the maximum, so we might need to + * backtrack the repeat to consume more. + */ + RE_BacktrackData* bt_data; + + /* Get the offset to the repeat values in the context. */ + rp_data = &state->repeats[index]; + if (!add_backtrack(safe_state, RE_OP_LAZY_REPEAT_ONE)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.position.node = node; + bt_data->repeat.index = index; + bt_data->repeat.text_pos = rp_data->start; + bt_data->repeat.count = rp_data->count; + + rp_data->start = text_pos; + rp_data->count = count; + } + + /* Advance into the tail. */ + text_pos += (Py_ssize_t)count * node->step; + node = node->next_1.node; + break; + } + case RE_OP_LOOKAROUND: /* Lookaround. */ + { + RE_Info info; + size_t capture_change; + BOOL too_few_errors; + int status; + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + /* Save the groups. */ + if (!push_groups(safe_state)) + return RE_ERROR_MEMORY; + + capture_change = state->capture_change; + + /* Save the other info. */ + save_info(state, &info); + + state->slice_start = 0; + state->slice_end = text_length; + state->text_pos = text_pos; + state->must_advance = FALSE; + + too_few_errors = state->too_few_errors; + + status = basic_match(safe_state, node->nonstring.next_2.node, + FALSE, TRUE); + if (status < 0) + return status; + + reset_guards(state, node->values + 1); + + state->slice_end = slice_end; + state->slice_start = slice_start; + + /* Restore the other info. */ + restore_info(state, &info); + + if (node->match) { + /* It's a positive lookaround. */ + if (status == RE_ERROR_SUCCESS) { + /* It succeeded, so the groups and certain flags may have + * changed. + */ + if (!add_backtrack(safe_state, RE_OP_LOOKAROUND)) + return RE_ERROR_BACKTRACKING; + + /* We'll restore the groups and flags on backtracking. */ + state->backtrack->lookaround.too_few_errors = + too_few_errors; + state->backtrack->lookaround.capture_change = + capture_change; + } else { + /* It failed, so the groups and certain flags haven't + * changed. + */ + drop_groups(state); + goto backtrack; + } + } else { + /* It's a negative lookaround. */ + if (status == RE_ERROR_SUCCESS) { + /* It succeeded, so the groups and certain flags may have + * changed. We need to restore them. + */ + pop_groups(state); + state->too_few_errors = too_few_errors; + state->capture_change = capture_change; + goto backtrack; + } else + /* It failed, so the groups and certain flags haven't + * changed. + */ + drop_groups(state); + } + + node = node->next_1.node; + break; + } + case RE_OP_PROPERTY: /* A property. */ + /* values are: property */ + TRACE(("%s %d %d\n", re_op_text[node->op], node->match, + node->values[0])) + + if (text_pos < slice_end && has_property(node->values[0], + char_at(text, text_pos)) == node->match) { + text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_PROPERTY_IGN: /* A property, ignoring case. */ + /* values are: property */ + TRACE(("%s %d %d\n", re_op_text[node->op], node->match, + node->values[0])) + + if (text_pos < slice_end && has_property_ign(encoding, + node->values[0], char_at(text, text_pos)) == node->match) { + text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_PROPERTY_IGN_REV: /* A property backwards, ignoring case. */ + /* values are: property */ + TRACE(("%s %d %d\n", re_op_text[node->op], node->match, + node->values[0])) + + if (text_pos > slice_start && has_property_ign(encoding, + node->values[0], char_at(text, text_pos - 1)) == node->match) { + text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_PROPERTY_REV: /* A property backwards. */ + /* values are: property */ + TRACE(("%s %d %d\n", re_op_text[node->op], node->match, + node->values[0])) + + if (text_pos > slice_start && has_property(node->values[0], + char_at(text, text_pos - 1)) == node->match) { + text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_RANGE: /* A range. */ + /* values are: lower, upper */ + TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, + node->values[0], node->values[1])) + + if (text_pos < slice_end && in_range(node->values[0], + node->values[1], char_at(text, text_pos)) == node->match) { + text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_RANGE_IGN: /* A range, ignoring case. */ + /* values are: lower, upper */ + TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, + node->values[0], node->values[1])) + + if (text_pos < slice_end && in_range_ign(encoding, node->values[0], + node->values[1], char_at(text, text_pos)) == node->match) { + text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_RANGE_IGN_REV: /* A range backwards, ignoring case. */ + /* values are: lower, upper */ + TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, + node->values[0], node->values[1])) + + if (text_pos > slice_start && in_range_ign(encoding, + node->values[0], node->values[1], char_at(text, text_pos - 1)) == + node->match) { + text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_RANGE_REV: /* A range backwards. */ + /* values are: lower, upper */ + TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, + node->values[0], node->values[1])) + + if (text_pos > slice_start && in_range(node->values[0], + node->values[1], char_at(text, text_pos - 1)) == node->match) { + text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_REF_GROUP: /* Reference to a capture group. */ + { + RE_GroupData* group; + RE_GroupSpan* span; + Py_ssize_t available; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + * + * Check whether the captured text, if any, exists at this position + * in the string. + */ + + /* Did the group capture anything? */ + group = &state->groups[node->values[0] - 1]; + if (group->current_capture < 0) + goto backtrack; + + span = &group->captures[group->current_capture]; + + /* Are there enough characters? */ + available = state->slice_end - text_pos; + if (!(node->status & RE_STATUS_FUZZY) && span->end - span->start > + available) + goto backtrack; + + if (string_pos < 0) + string_pos = span->start; + + /* Try comparing. */ + while (string_pos < span->end) { + if (text_pos < slice_end && char_at(text, text_pos) == + char_at(text, string_pos)) { + ++string_pos; + ++text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + if (!fuzzy_match_string(safe_state, search, &text_pos, + node, &string_pos, &matched, 1)) + return RE_ERROR_BACKTRACKING; + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + } + + string_pos = -1; + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_REF_GROUP_FLD: /* Reference to a capture group, ignoring case. */ + { + RE_GroupData* group; + RE_GroupSpan* span; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + int folded_len; + int gfolded_len; + Py_UCS4 folded[RE_MAX_FOLDED]; + Py_UCS4 gfolded[RE_MAX_FOLDED]; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + * + * Check whether the captured text, if any, exists at this position + * in the string. + */ + + /* Did the group capture anything? */ + group = &state->groups[node->values[0] - 1]; + if (group->current_capture < 0) + goto backtrack; + + span = &group->captures[group->current_capture]; + + full_case_fold = encoding->full_case_fold; + + if (string_pos < 0) { + string_pos = span->start; + folded_pos = 0; + folded_len = 0; + gfolded_pos = 0; + gfolded_len = 0; + } else { + folded_len = full_case_fold(char_at(text, text_pos), folded); + gfolded_len = full_case_fold(char_at(text, string_pos), + gfolded); + } + + /* Try comparing. */ + while (string_pos < span->end) { + /* Case-fold at current position in text. */ + if (folded_pos >= folded_len) { + if (text_pos < slice_end) + folded_len = full_case_fold(char_at(text, text_pos), + folded); + else + folded_len = 0; + folded_pos = 0; + } + + /* Case-fold at current position in group. */ + if (gfolded_pos >= gfolded_len) { + gfolded_len = full_case_fold(char_at(text, string_pos), + gfolded); + gfolded_pos = 0; + } + + if (folded_pos < folded_len && folded[folded_pos] == + gfolded[gfolded_pos]) { + ++folded_pos; + ++gfolded_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + if (!fuzzy_match_string_fld2(safe_state, search, &text_pos, + node, &folded_pos, folded_len, &string_pos, &gfolded_pos, + gfolded_len, &matched, 1)) + return RE_ERROR_BACKTRACKING; + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + + if (folded_pos >= folded_len && folded_len > 0) + ++text_pos; + + if (gfolded_pos >= gfolded_len) + ++string_pos; + } + + string_pos = -1; + + if (folded_pos < folded_len || gfolded_pos < gfolded_len) + goto backtrack; + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_REF_GROUP_FLD_REV: /* Reference to a capture group, ignoring case. */ + { + RE_GroupData* group; + RE_GroupSpan* span; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + int folded_len; + int gfolded_len; + Py_UCS4 folded[RE_MAX_FOLDED]; + Py_UCS4 gfolded[RE_MAX_FOLDED]; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + * + * Check whether the captured text, if any, exists at this position + * in the string. + */ + + /* Did the group capture anything? */ + group = &state->groups[node->values[0] - 1]; + if (group->current_capture < 0) + goto backtrack; + + span = &group->captures[group->current_capture]; + + full_case_fold = encoding->full_case_fold; + + if (string_pos < 0) { + string_pos = span->end; + folded_pos = 0; + folded_len = 0; + gfolded_pos = 0; + gfolded_len = 0; + } else { + folded_len = full_case_fold(char_at(text, text_pos - 1), + folded); + gfolded_len = full_case_fold(char_at(text, string_pos - 1), + gfolded); + } + + /* Try comparing. */ + while (string_pos > span->start) { + /* Case-fold at current position in text. */ + if (folded_pos <= 0) { + if (text_pos > slice_start) + folded_len = full_case_fold(char_at(text, text_pos - + 1), folded); + else + folded_len = 0; + folded_pos = folded_len; + } + + /* Case-fold at current position in group. */ + if (gfolded_pos <= 0) { + gfolded_len = full_case_fold(char_at(text, string_pos - 1), + gfolded); + gfolded_pos = gfolded_len; + } + + if (folded_pos > 0 && folded[folded_pos - 1] == + gfolded[gfolded_pos - 1]) { + --folded_pos; + --gfolded_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + if (!fuzzy_match_string_fld2(safe_state, search, &text_pos, + node, &folded_pos, folded_len, &string_pos, &gfolded_pos, + gfolded_len, &matched, -1)) + return RE_ERROR_BACKTRACKING; + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + + if (folded_pos <= 0 && folded_len > 0) + --text_pos; + + if (gfolded_pos <= 0) + --string_pos; + } + + string_pos = -1; + + if (folded_pos > 0 || gfolded_pos > 0) + goto backtrack; + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_REF_GROUP_IGN: /* Reference to a capture group, ignoring case. */ + { + RE_GroupData* group; + RE_GroupSpan* span; + Py_ssize_t available; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + * + * Check whether the captured text, if any, exists at this position + * in the string. + */ + + /* Did the group capture anything? */ + group = &state->groups[node->values[0] - 1]; + if (group->current_capture < 0) + goto backtrack; + + span = &group->captures[group->current_capture]; + + /* Are there enough characters? */ + available = state->slice_end - text_pos; + if (!(node->status & RE_STATUS_FUZZY) && span->end - span->start > + available) + goto backtrack; + + if (string_pos < 0) + string_pos = span->start; + + /* Try comparing. */ + while (string_pos < span->end) { + if (text_pos < slice_end && same_char_ign(encoding, + char_at(text, text_pos), char_at(text, string_pos))) { + ++string_pos; + ++text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + if (!fuzzy_match_string(safe_state, search, &text_pos, + node, &string_pos, &matched, 1)) + return RE_ERROR_BACKTRACKING; + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + } + + string_pos = -1; + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_REF_GROUP_IGN_REV: /* Reference to a capture group, ignoring case. */ + { + RE_GroupData* group; + RE_GroupSpan* span; + Py_ssize_t available; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + * + * Check whether the captured text, if any, exists at this position + * in the string. + */ + + /* Did the group capture anything? */ + group = &state->groups[node->values[0] - 1]; + if (group->current_capture < 0) + goto backtrack; + + span = &group->captures[group->current_capture]; + + /* Are there enough characters? */ + available = text_pos - state->slice_start; + if (!(node->status & RE_STATUS_FUZZY) && span->end - span->start > + available) + goto backtrack; + + if (string_pos < 0) + string_pos = span->end; + + /* Try comparing. */ + while (string_pos > span->start) { + if (text_pos > slice_start && same_char_ign(encoding, + char_at(text, text_pos - 1), char_at(text, string_pos - 1))) + { + --string_pos; + --text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + if (!fuzzy_match_string(safe_state, search, &text_pos, + node, &string_pos, &matched, -1)) + return RE_ERROR_BACKTRACKING; + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + } + + string_pos = -1; + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_REF_GROUP_REV: /* Reference to a capture group. */ + { + RE_GroupData* group; + RE_GroupSpan* span; + Py_ssize_t available; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + * + * Check whether the captured text, if any, exists at this position + * in the string. + */ + + /* Did the group capture anything? */ + group = &state->groups[node->values[0] - 1]; + if (group->current_capture < 0) + goto backtrack; + + span = &group->captures[group->current_capture]; + + /* Are there enough characters? */ + available = text_pos - state->slice_start; + if (!(node->status & RE_STATUS_FUZZY) && span->end - span->start > + available) + goto backtrack; + + if (string_pos < 0) + string_pos = span->end; + + /* Try comparing. */ + while (string_pos > span->start) { + if (text_pos > slice_start && char_at(text, text_pos - 1) == + char_at(text, string_pos - 1)) { + --string_pos; + --text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + if (!fuzzy_match_string(safe_state, search, &text_pos, + node, &string_pos, &matched, -1)) + return RE_ERROR_BACKTRACKING; + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + } + + string_pos = -1; + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */ + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + if (text_pos == state->search_anchor) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_SET_DIFF: /* Character set. */ + case RE_OP_SET_INTER: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_UNION: + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + if (text_pos < slice_end && in_set(encoding, node, char_at(text, + text_pos)) == node->match) { + text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_SET_DIFF_IGN: /* Character set, ignoring case. */ + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_UNION_IGN: + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + if (text_pos < slice_end && in_set_ign(encoding, node, + char_at(text, text_pos)) == node->match) { + text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_SET_DIFF_IGN_REV: /* Character set, ignoring case. */ + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_UNION_IGN_REV: + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + if (text_pos > slice_start && in_set_ign(encoding, node, + char_at(text, text_pos - 1)) == node->match) { + text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_SET_DIFF_REV: /* Character set. */ + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION_REV: + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + if (text_pos > slice_start && in_set(encoding, node, char_at(text, + text_pos - 1)) == node->match) { + text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_START_GROUP: /* Start of a capture group. */ + { + RE_CODE private_index; + RE_CODE public_index; + RE_GroupData* group; + RE_BacktrackData* bt_data; + TRACE(("%s %d\n", re_op_text[node->op], node->values[1])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + */ + private_index = node->values[0]; + public_index = node->values[1]; + group = &state->groups[private_index - 1]; + + if (!add_backtrack(safe_state, RE_OP_START_GROUP)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->group.private_index = private_index; + bt_data->group.public_index = public_index; + bt_data->group.text_pos = group->span.start; + bt_data->group.capture = (BOOL)node->values[2]; + bt_data->group.current_capture = group->current_capture; + + if (pattern->group_info[private_index - 1].referenced && + group->span.start != text_pos) + ++state->capture_change; + group->span.start = text_pos; + + /* Save the capture? */ + if (node->values[2]) { + group->current_capture = group->capture_count; + if (!save_capture(safe_state, private_index, public_index)) + return RE_ERROR_MEMORY; + } + + node = node->next_1.node; + break; + } + case RE_OP_START_OF_LINE: /* At the start of a line. */ + TRACE(("%s\n", re_op_text[node->op])) + + if (text_pos == 0 || char_at(text, text_pos - 1) == '\n') + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_START_OF_LINE_U: /* At the start of a line. */ + TRACE(("%s\n", re_op_text[node->op])) + + if (encoding->at_line_start(state, text_pos)) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_START_OF_STRING: /* At the start of the string. */ + TRACE(("%s\n", re_op_text[node->op])) + + if (text_pos == 0) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_START_OF_WORD: /* At start of a word. */ + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + if (encoding->at_word_start(state, text_pos) == node->match) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_STRING: /* A string literal. */ + { + Py_ssize_t length; + Py_ssize_t available; + RE_CODE* values; + TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == + state->req_pos && string_pos < 0) + text_pos = state->req_end; + else { + /* Are there enough characters to match? */ + length = node->value_count; + available = slice_end - text_pos; + if (length > available && !(node->status & RE_STATUS_FUZZY)) + goto backtrack; + + if (string_pos < 0) + string_pos = 0; + + values = node->values; + + /* Try comparing. */ + while (string_pos < length) { + if (text_pos < slice_end && char_at(text, text_pos) == + values[string_pos]) { + ++string_pos; + ++text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + if (!fuzzy_match_string(safe_state, search, &text_pos, + node, &string_pos, &matched, 1)) + return RE_ERROR_BACKTRACKING; + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + } + + string_pos = -1; + } + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_STRING_FLD: /* A string literal, ignoring case. */ + { + Py_ssize_t length; + Py_ssize_t available; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + RE_CODE* values; + int folded_len; + Py_UCS4 folded[RE_MAX_FOLDED]; + TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == + state->req_pos && string_pos < 0) + text_pos = state->req_end; + else { + /* Are there enough characters to match? */ + length = node->value_count; + available = slice_end - text_pos; + if ((Py_ssize_t)possible_unfolded_length(length) > available && + !(node->status & RE_STATUS_FUZZY)) + goto backtrack; + + full_case_fold = encoding->full_case_fold; + + if (string_pos < 0) { + string_pos = 0; + folded_pos = 0; + folded_len = 0; + } else { + folded_len = full_case_fold(char_at(text, text_pos), + folded); + if (folded_pos >= folded_len) { + if (text_pos >= slice_end) + goto backtrack; + + ++text_pos; + folded_pos = 0; + folded_len = 0; + } + } + + values = node->values; + + /* Try comparing. */ + while (string_pos < length) { + if (folded_pos >= folded_len) { + folded_len = full_case_fold(char_at(text, text_pos), + folded); + folded_pos = 0; + } + + if (same_char_ign(encoding, folded[folded_pos], + values[string_pos])) { + ++string_pos; + ++folded_pos; + + if (folded_pos >= folded_len) + ++text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + if (!fuzzy_match_string_fld(safe_state, search, + &text_pos, node, &string_pos, &folded_pos, + folded_len, &matched, 1)) + return RE_ERROR_BACKTRACKING; + if (!matched) { + string_pos = -1; + goto backtrack; + } + + if (folded_pos >= folded_len) + ++text_pos; + } else { + string_pos = -1; + goto backtrack; + } + } + + if (node->status & RE_STATUS_FUZZY) { + while (folded_pos < folded_len) { + BOOL matched; + + if (!fuzzy_match_string_fld(safe_state, search, + &text_pos, node, &string_pos, &folded_pos, + folded_len, &matched, 1)) + return RE_ERROR_BACKTRACKING; + + if (!matched) { + string_pos = -1; + goto backtrack; + } + + if (folded_pos >= folded_len) + ++text_pos; + } + } + + if (folded_pos < folded_len) { + string_pos = -1; + goto backtrack; + } + + string_pos = -1; + } + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_STRING_FLD_REV: /* A string literal, ignoring case. */ + { + Py_ssize_t length; + Py_ssize_t available; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + RE_CODE* values; + int folded_len; + Py_UCS4 folded[RE_MAX_FOLDED]; + TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == + state->req_pos && string_pos < 0) + text_pos = state->req_end; + else { + /* Are there enough characters to match? */ + length = node->value_count; + available = text_pos - slice_start; + if ((Py_ssize_t)possible_unfolded_length(length) > available && + !(node->status & RE_STATUS_FUZZY)) + goto backtrack; + + full_case_fold = encoding->full_case_fold; + + if (string_pos < 0) { + string_pos = length; + folded_pos = 0; + folded_len = 0; + } else { + folded_len = full_case_fold(char_at(text, text_pos - 1), + folded); + if (folded_pos <= 0) { + if (text_pos <= slice_start) + goto backtrack; + + --text_pos; + folded_pos = 0; + folded_len = 0; + } + } + + values = node->values; + + /* Try comparing. */ + while (string_pos > 0) { + if (folded_pos <= 0) { + folded_len = full_case_fold(char_at(text, text_pos - + 1), folded); + folded_pos = folded_len; + } + + if (same_char_ign(encoding, folded[folded_pos - 1], + values[string_pos - 1])) { + --string_pos; + --folded_pos; + + if (folded_pos <= 0) + --text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + if (!fuzzy_match_string_fld(safe_state, search, + &text_pos, node, &string_pos, &folded_pos, + folded_len, &matched, -1)) + return RE_ERROR_BACKTRACKING; + if (!matched) { + string_pos = -1; + goto backtrack; + } + + if (folded_pos <= 0) + --text_pos; + } else { + string_pos = -1; + goto backtrack; + } + } + + if (node->status & RE_STATUS_FUZZY) { + while (folded_pos > 0) { + BOOL matched; + + if (!fuzzy_match_string_fld(safe_state, search, + &text_pos, node, &string_pos, &folded_pos, + folded_len, &matched, -1)) + return RE_ERROR_BACKTRACKING; + + if (!matched) { + string_pos = -1; + goto backtrack; + } + + if (folded_pos <= 0) + --text_pos; + } + } + + if (folded_pos > 0) { + string_pos = -1; + goto backtrack; + } + + string_pos = -1; + } + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_STRING_IGN: /* A string literal, ignoring case. */ + { + Py_ssize_t length; + Py_ssize_t available; + RE_CODE* values; + TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == + state->req_pos && string_pos < 0) + text_pos = state->req_end; + else { + /* Are there enough characters to match? */ + length = node->value_count; + available = slice_end - text_pos; + if (length > available && !(node->status & RE_STATUS_FUZZY)) + goto backtrack; + + if (string_pos < 0) + string_pos = 0; + + values = node->values; + + /* Try comparing. */ + while (string_pos < length) { + if (text_pos < slice_end && same_char_ign(encoding, + char_at(text, text_pos), values[string_pos])) { + ++string_pos; + ++text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + if (!fuzzy_match_string(safe_state, search, &text_pos, + node, &string_pos, &matched, 1)) + return RE_ERROR_BACKTRACKING; + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + } + + string_pos = -1; + } + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_STRING_IGN_REV: /* A string literal, ignoring case. */ + { + Py_ssize_t length; + Py_ssize_t available; + RE_CODE* values; + TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == + state->req_pos && string_pos < 0) + text_pos = state->req_end; + else { + /* Are there enough characters to match? */ + length = node->value_count; + available = text_pos - slice_start; + if (length > available && !(node->status & RE_STATUS_FUZZY)) + goto backtrack; + + if (string_pos < 0) + string_pos = length; + + values = node->values; + + /* Try comparing. */ + while (string_pos > 0) { + if (text_pos > slice_start && same_char_ign(encoding, + char_at(text, text_pos - 1), values[string_pos - 1])) { + --string_pos; + --text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + if (!fuzzy_match_string(safe_state, search, &text_pos, + node, &string_pos, &matched, -1)) + return RE_ERROR_BACKTRACKING; + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + } + + string_pos = -1; + } + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_STRING_REV: /* A string literal. */ + { + Py_ssize_t length; + Py_ssize_t available; + RE_CODE* values; + TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == + state->req_pos && string_pos < 0) + text_pos = state->req_end; + else { + /* Are there enough characters to match? */ + length = node->value_count; + available = text_pos - slice_start; + if (length > available && !(node->status & RE_STATUS_FUZZY)) + goto backtrack; + + if (string_pos < 0) + string_pos = length; + + values = node->values; + + /* Try comparing. */ + while (string_pos > 0) { + if (text_pos > slice_start && char_at(text, text_pos - 1) + == values[string_pos - 1]) { + --string_pos; + --text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + if (!fuzzy_match_string(safe_state, search, &text_pos, + node, &string_pos, &matched, -1)) + return RE_ERROR_BACKTRACKING; + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + } + + string_pos = -1; + } + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_STRING_SET: /* Member of a string set. */ + { + int status; + TRACE(("%s\n", re_op_text[node->op])) + + state->text_pos = text_pos; + status = string_set_match(safe_state, node); + if (status < 0) + return status; + if (status == 0) + goto backtrack; + text_pos = state->text_pos; + node = node->next_1.node; + break; + } + case RE_OP_STRING_SET_FLD: /* Member of a string set, ignoring case. */ + { + int status; + TRACE(("%s\n", re_op_text[node->op])) + + state->text_pos = text_pos; + status = string_set_match_fld(safe_state, node); + if (status < 0) + return status; + if (status == 0) + goto backtrack; + text_pos = state->text_pos; + node = node->next_1.node; + break; + } + case RE_OP_STRING_SET_FLD_REV: /* Member of a string set, ignoring case. */ + { + int status; + TRACE(("%s\n", re_op_text[node->op])) + + state->text_pos = text_pos; + status = string_set_match_fld_rev(safe_state, node); + if (status < 0) + return status; + if (status == 0) + goto backtrack; + text_pos = state->text_pos; + node = node->next_1.node; + break; + } + case RE_OP_STRING_SET_IGN: /* Member of a string set, ignoring case. */ + { + int status; + TRACE(("%s\n", re_op_text[node->op])) + + state->text_pos = text_pos; + status = string_set_match_ign(safe_state, node); + if (status < 0) + return status; + if (status == 0) + goto backtrack; + text_pos = state->text_pos; + node = node->next_1.node; + break; + } + case RE_OP_STRING_SET_IGN_REV: /* Member of a string set, ignoring case. */ + { + int status; + TRACE(("%s\n", re_op_text[node->op])) + + state->text_pos = text_pos; + status = string_set_match_ign_rev(safe_state, node); + if (status < 0) + return status; + if (status == 0) + goto backtrack; + text_pos = state->text_pos; + node = node->next_1.node; + break; + } + case RE_OP_STRING_SET_REV: /* Member of a string set. */ + { + int status; + TRACE(("%s\n", re_op_text[node->op])) + + state->text_pos = text_pos; + status = string_set_match_rev(safe_state, node); + if (status < 0) + return status; + if (status == 0) + goto backtrack; + text_pos = state->text_pos; + node = node->next_1.node; + break; + } + case RE_OP_SUCCESS: /* Success. */ + /* Must the match advance past its start? */ + TRACE(("%s\n", re_op_text[node->op])) + + if (text_pos == state->search_anchor && state->must_advance) + goto backtrack; + + if (state->match_all && !recursive_call) { + /* We want to match all of the slice. */ + if (state->reverse) { + if (text_pos != slice_start) + goto backtrack; + } else { + if (text_pos != slice_end) + goto backtrack; + } + } + + state->text_pos = text_pos; + return RE_ERROR_SUCCESS; + default: /* Illegal opcode! */ + TRACE(("UNKNOWN OP %d\n", node->op)) + return RE_ERROR_ILLEGAL; + } + } + +backtrack: + for (;;) { + RE_BacktrackData* bt_data; + TRACE(("BACKTRACK ")) + + /* Should we abort the matching? */ + ++state->iterations; + + if (state->iterations == 0 && safe_check_signals(safe_state)) + return RE_ERROR_INTERRUPTED; + + bt_data = last_backtrack(state); + + switch (bt_data->op) { + case RE_OP_ANY: /* Any character, except a newline. */ + case RE_OP_ANY_ALL: /* Any character at all. */ + case RE_OP_ANY_ALL_REV: /* Any character at all. */ + case RE_OP_ANY_REV: /* Any character, except a newline. */ + case RE_OP_ANY_U: /* Any character, except a line separator. */ + case RE_OP_ANY_U_REV: /* Any character, except a line separator. */ + case RE_OP_CHARACTER: /* A character literal. */ + case RE_OP_CHARACTER_IGN: /* A character literal, ignoring case. */ + case RE_OP_CHARACTER_IGN_REV: /* A character literal, ignoring case. */ + case RE_OP_CHARACTER_REV: /* A character literal. */ + case RE_OP_PROPERTY: /* A property. */ + case RE_OP_PROPERTY_IGN: /* A property, ignoring case. */ + case RE_OP_PROPERTY_IGN_REV: /* A property, ignoring case. */ + case RE_OP_PROPERTY_REV: /* A property. */ + case RE_OP_RANGE: /* A range. */ + case RE_OP_RANGE_IGN: /* A range, ignoring case. */ + case RE_OP_RANGE_IGN_REV: /* A range, ignoring case. */ + case RE_OP_RANGE_REV: /* A range. */ + case RE_OP_SET_DIFF: /* Character set. */ + case RE_OP_SET_DIFF_IGN: /* Character set, ignoring case. */ + case RE_OP_SET_DIFF_IGN_REV: /* Character set, ignoring case. */ + case RE_OP_SET_DIFF_REV: /* Character set. */ + case RE_OP_SET_INTER: /* Character set. */ + case RE_OP_SET_INTER_IGN: /* Character set, ignoring case. */ + case RE_OP_SET_INTER_IGN_REV: /* Character set, ignoring case. */ + case RE_OP_SET_INTER_REV: /* Character set. */ + case RE_OP_SET_SYM_DIFF: /* Character set. */ + case RE_OP_SET_SYM_DIFF_IGN: /* Character set, ignoring case. */ + case RE_OP_SET_SYM_DIFF_IGN_REV: /* Character set, ignoring case. */ + case RE_OP_SET_SYM_DIFF_REV: /* Character set. */ + case RE_OP_SET_UNION: /* Character set. */ + case RE_OP_SET_UNION_IGN: /* Character set, ignoring case. */ + case RE_OP_SET_UNION_IGN_REV: /* Character set, ignoring case. */ + case RE_OP_SET_UNION_REV: /* Character set. */ + TRACE(("%s\n", re_op_text[bt_data->op])) + + if (!retry_fuzzy_match_one(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (node) + goto advance; + break; + case RE_OP_ATOMIC: /* Atomic subpattern. */ + TRACE(("%s\n", re_op_text[bt_data->op])) + + /* Restore the groups and certain flags and then backtrack. */ + pop_groups(state); + state->too_few_errors = bt_data->atomic.too_few_errors; + state->capture_change = bt_data->atomic.capture_change; + discard_backtrack(state); + break; + case RE_OP_BODY_END: + { + RE_RepeatData* rp_data; + TRACE(("%s %d\n", re_op_text[bt_data->op], bt_data->repeat.index)) + + /* We're backtracking into the body. */ + rp_data = &state->repeats[bt_data->repeat.index]; + + /* Restore the repeat info. */ + rp_data->count = bt_data->repeat.count; + rp_data->start = bt_data->repeat.start; + rp_data->capture_change = bt_data->repeat.capture_change; + + discard_backtrack(state); + break; + } + case RE_OP_BODY_START: + { + TRACE(("%s %d\n", re_op_text[bt_data->op], bt_data->repeat.index)) + + /* The body may have failed to match at this position. */ + if (!guard_repeat(safe_state, bt_data->repeat.index, + bt_data->repeat.text_pos, RE_STATUS_BODY, TRUE)) + return RE_ERROR_MEMORY; + + discard_backtrack(state); + break; + } + case RE_OP_BOUNDARY: /* At a word boundary. */ + case RE_OP_DEFAULT_BOUNDARY: /* At a default word boundary. */ + case RE_OP_DEFAULT_END_OF_WORD: /* At a default end of a word. */ + case RE_OP_DEFAULT_START_OF_WORD: /* At a default start of a word. */ + case RE_OP_END_OF_LINE: /* At the end of a line. */ + case RE_OP_END_OF_LINE_U: /* At the end of a line. */ + case RE_OP_END_OF_STRING: /* At the end of the string. */ + case RE_OP_END_OF_STRING_LINE: /* At end of string or final newline. */ + case RE_OP_END_OF_STRING_LINE_U: /* At end of string or final newline. */ + case RE_OP_END_OF_WORD: /* At end of a word. */ + case RE_OP_GRAPHEME_BOUNDARY: /* At a grapheme boundary. */ + case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */ + case RE_OP_START_OF_LINE: /* At the start of a line. */ + case RE_OP_START_OF_LINE_U: /* At the start of a line. */ + case RE_OP_START_OF_STRING: /* At the start of the string. */ + case RE_OP_START_OF_WORD: /* At start of a word. */ + TRACE(("%s\n", re_op_text[bt_data->op])) + + if (!retry_fuzzy_match_zero(safe_state, search, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + if (node) + goto advance; + break; + case RE_OP_BRANCH: /* 2-way branch. */ + TRACE(("%s\n", re_op_text[bt_data->op])) + + node = bt_data->branch.position.node; + text_pos = bt_data->branch.position.text_pos; + discard_backtrack(state); + goto advance; + case RE_OP_CALL_REF: /* A group call ref. */ + TRACE(("%s\n", re_op_text[bt_data->op])) + + pop_group_return(state); + discard_backtrack(state); + break; + case RE_OP_END_FUZZY: /* End of fuzzy matching. */ + TRACE(("%s\n", re_op_text[bt_data->op])) + + /* We need to retry the fuzzy match. */ + if (!retry_fuzzy_insert(safe_state, &text_pos, &node)) + return RE_ERROR_BACKTRACKING; + + /* If there were too few errors, in the fuzzy section, try again. + */ + if (state->too_few_errors) { + state->too_few_errors = FALSE; + goto backtrack; + } + + if (node) { + node = node->next_1.node; + goto advance; + } + break; + case RE_OP_END_GROUP: /* End of a capture group. */ + { + RE_CODE private_index; + RE_GroupData* group; + TRACE(("%s %d\n", re_op_text[bt_data->op], + bt_data->group.public_index)) + + private_index = bt_data->group.private_index; + group = &state->groups[private_index - 1]; + + /* Unsave the capture? */ + if (bt_data->group.capture) + unsave_capture(state, bt_data->group.private_index, + bt_data->group.public_index); + + if (pattern->group_info[private_index - 1].referenced && + group->span.end != bt_data->group.text_pos) + --state->capture_change; + group->span.end = bt_data->group.text_pos; + group->current_capture = bt_data->group.current_capture; + + discard_backtrack(state); + break; + } + case RE_OP_FAILURE: + { + Py_ssize_t end_pos; + TRACE(("%s\n", re_op_text[bt_data->op])) + + /* Do we have to advance? */ + if (!search) + return RE_ERROR_FAILURE; + + /* Can we advance? */ + text_pos = state->match_pos; + end_pos = state->reverse ? slice_start : slice_end; + if (text_pos == end_pos) + return RE_ERROR_FAILURE; + + /* Skip over any repeated leading characters. */ + switch (start_node->op) { + case RE_OP_GREEDY_REPEAT_ONE: + case RE_OP_LAZY_REPEAT_ONE: + { + size_t count; + + /* How many characters did the repeat actually match? */ + count = count_one(state, start_node->nonstring.next_2.node, + text_pos, (size_t)start_node->values[2]); + + /* If it's fewer than the maximum then skip over those + * characters. + */ + if (count < start_node->values[2]) + text_pos += (Py_ssize_t)count * pattern_step; + break; + } + } + + /* Advance and try to match again. */ + state->text_pos = text_pos + pattern_step; + + goto start_match; + } + case RE_OP_FUZZY: /* Fuzzy matching. */ + { + RE_FuzzyInfo* fuzzy_info; + TRACE(("%s\n", re_op_text[bt_data->op])) + + /* Restore the previous fuzzy info. */ + fuzzy_info = &state->fuzzy_info; + memmove(fuzzy_info, &bt_data->fuzzy.fuzzy_info, + sizeof(RE_FuzzyInfo)); + + discard_backtrack(state); + break; + } + case RE_OP_GREEDY_REPEAT: /* Greedy repeat. */ + case RE_OP_LAZY_REPEAT: /* Lazy repeat. */ + { + RE_RepeatData* rp_data; + TRACE(("%s\n", re_op_text[bt_data->op])) + + /* The repeat failed to match. */ + rp_data = &state->repeats[bt_data->repeat.index]; + + /* The body may have failed to match at this position. */ + if (!guard_repeat(safe_state, bt_data->repeat.index, + bt_data->repeat.text_pos, RE_STATUS_BODY, TRUE)) + return RE_ERROR_MEMORY; + + /* Restore the previous repeat. */ + rp_data->count = bt_data->repeat.count; + rp_data->start = bt_data->repeat.start; + rp_data->capture_change = bt_data->repeat.capture_change; + + discard_backtrack(state); + break; + } + case RE_OP_GREEDY_REPEAT_ONE: /* Greedy repeat for one character. */ + { + RE_RepeatData* rp_data; + size_t count; + Py_ssize_t step; + Py_ssize_t pos; + Py_ssize_t limit; + RE_Node* test; + BOOL match; + BOOL m; + size_t index; + TRACE(("%s\n", re_op_text[bt_data->op])) + + node = bt_data->repeat.position.node; + + rp_data = &state->repeats[bt_data->repeat.index]; + + /* Unmatch one character at a time until the tail could match or we + * have reached the minimum. + */ + text_pos = rp_data->start; + + count = rp_data->count; + step = node->step; + pos = text_pos + (Py_ssize_t)count * step; + limit = text_pos + (Py_ssize_t)node->values[1] * step; + + /* The tail failed to match at this position. */ + if (!guard_repeat(safe_state, bt_data->repeat.index, pos, + RE_STATUS_TAIL, TRUE)) + return RE_ERROR_MEMORY; + + if (count == node->values[1]) { + /* We've backtracked the repeat as far as we can. */ + rp_data->start = bt_data->repeat.text_pos; + rp_data->count = bt_data->repeat.count; + discard_backtrack(state); + break; + } + + test = node->next_1.test; + + m = test->match; + index = node->values[0]; + + match = FALSE; + + if (test->status & RE_STATUS_FUZZY) { + for (;;) { + RE_Position next_position; + + pos -= step; + + if (try_match(state, &node->next_1, pos, &next_position) && + !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + if (pos == limit) + break; + } + } else { + /* A repeated single-character match is often followed by a + * literal, so checking specially for it can be a good + * optimisation when working with long strings. + */ + switch (test->op) { + case RE_OP_CHARACTER: + { + Py_UCS4 ch; + + ch = test->values[0]; + + /* The tail is a character. We don't want to go off the end + * of the slice. + */ + limit = RE_MAX(limit, slice_start + 1); + + for (;;) { + if (pos <= limit) + break; + + --pos; + + if ((char_at(text, pos) == ch) == m && + !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_CHARACTER_IGN: + { + Py_UCS4 ch; + + ch = test->values[0]; + + /* The tail is a character. We don't want to go off the end + * of the slice. + */ + limit = RE_MAX(limit, slice_start + 1); + + for (;;) { + if (pos <= limit) + break; + + --pos; + + if (same_char_ign(encoding, char_at(text, pos), ch) == + m && !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_CHARACTER_IGN_REV: + { + Py_UCS4 ch; + + ch = test->values[0]; + + /* The tail is a character. We don't want to go off the end + * of the slice. + */ + limit = RE_MIN(limit, slice_end - 1); + + for (;;) { + if (pos >= limit) + break; + + ++pos; + + if (same_char_ign(encoding, char_at(text, pos - 1), ch) + == m && !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_CHARACTER_REV: + { + Py_UCS4 ch; + + ch = test->values[0]; + + /* The tail is a character. We don't want to go off the end + * of the slice. + */ + limit = RE_MIN(limit, slice_end - 1); + + for (;;) { + if (pos >= limit) + break; + + ++pos; + + if ((char_at(text, pos - 1) == ch) == m && + !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_STRING: + { + Py_ssize_t length; + + length = test->value_count; + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + pos = RE_MIN(pos, slice_end - length); + + for (;;) { + Py_ssize_t found; + + if (pos < limit) + break; + + found = string_search_rev(safe_state, test, pos + + length, limit); + if (found < 0) + break; + + pos = found - length; + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + --pos; + } + break; + } + case RE_OP_STRING_FLD: + { + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + size_t folded_length; + size_t i; + Py_UCS4 folded[RE_MAX_FOLDED]; + + full_case_fold = encoding->full_case_fold; + + folded_length = 0; + for (i = 0; i < test->value_count; i++) + folded_length += full_case_fold(test->values[i], + folded); + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + pos = RE_MIN(pos, slice_end - (Py_ssize_t)folded_length); + + for (;;) { + Py_ssize_t found; + Py_ssize_t new_pos; + + if (pos < limit) + break; + + found = string_search_fld_rev(safe_state, test, pos + + folded_length, limit, &new_pos); + if (found < 0) + break; + + pos = found - folded_length; + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + --pos; + } + break; + } + case RE_OP_STRING_FLD_REV: + { + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + size_t folded_length; + size_t i; + Py_UCS4 folded[RE_MAX_FOLDED]; + + full_case_fold = encoding->full_case_fold; + + folded_length = 0; + for (i = 0; i < test->value_count; i++) + folded_length += full_case_fold(test->values[i], + folded); + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + pos = RE_MAX(pos, slice_start + (Py_ssize_t)folded_length); + + for (;;) { + Py_ssize_t found; + Py_ssize_t new_pos; + + if (pos > limit) + break; + + found = string_search_fld(safe_state, test, pos - + folded_length, limit, &new_pos); + if (found < 0) + break; + + pos = found + folded_length; + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + ++pos; + } + break; + } + case RE_OP_STRING_IGN: + { + Py_ssize_t length; + + length = test->value_count; + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + pos = RE_MIN(pos, slice_end - length); + + for (;;) { + Py_ssize_t found; + + if (pos < limit) + break; + + found = string_search_ign_rev(safe_state, test, pos + + length, limit); + if (found < 0) + break; + + pos = found - length; + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + --pos; + } + break; + } + case RE_OP_STRING_IGN_REV: + { + Py_ssize_t length; + + length = test->value_count; + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + pos = RE_MAX(pos, slice_start + length); + + for (;;) { + Py_ssize_t found; + + if (pos > limit) + break; + + found = string_search_ign(safe_state, test, pos - + length, limit); + if (found < 0) + break; + + pos = found + length; + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + ++pos; + } + break; + } + case RE_OP_STRING_REV: + { + Py_ssize_t length; + + length = test->value_count; + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + pos = RE_MAX(pos, slice_start + length); + + for (;;) { + Py_ssize_t found; + + if (pos > limit) + break; + + found = string_search(safe_state, test, pos - length, + limit); + if (found < 0) + break; + + pos = found + length; + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + ++pos; + } + break; + } + default: + for (;;) { + RE_Position next_position; + + pos -= step; + + if (try_match(state, &node->next_1, pos, + &next_position) && !is_repeat_guarded(safe_state, + index, pos, RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + if (pos == limit) + break; + } + break; + } + } + + if (match) { + count = abs_ssize_t(pos - text_pos); + + /* The tail could match. */ + if (count > node->values[1]) + /* The match is longer than the minimum, so we might need + * to backtrack the repeat again to consume less. + */ + rp_data->count = count; + else { + /* We've reached or passed the minimum, so we won't need to + * backtrack the repeat again. + */ + rp_data->start = bt_data->repeat.text_pos; + rp_data->count = bt_data->repeat.count; + discard_backtrack(state); + + /* Have we passed the minimum? */ + if (count < node->values[1]) + goto backtrack; + } + + node = node->next_1.node; + text_pos = pos; + goto advance; + } else { + /* We've backtracked the repeat as far as we can. */ + rp_data->start = bt_data->repeat.text_pos; + rp_data->count = bt_data->repeat.count; + discard_backtrack(state); + } + break; + } + case RE_OP_GROUP_CALL: /* Group call. */ + TRACE(("%s\n", re_op_text[bt_data->op])) + + pop_group_return(state); + discard_backtrack(state); + break; + case RE_OP_GROUP_RETURN: /* Group return. */ + { + RE_Node* return_node; + TRACE(("%s\n", re_op_text[bt_data->op])) + + return_node = bt_data->group_call.node; + + push_group_return(safe_state, return_node); + + if (return_node) { + /* Restore the groups. */ + pop_groups(state); + state->capture_change = bt_data->group_call.capture_change; + + /* Restore the repeats. */ + pop_repeats(state); + } + + discard_backtrack(state); + break; + } + case RE_OP_LAZY_REPEAT_ONE: /* Lazy repeat for one character. */ + { + RE_RepeatData* rp_data; + size_t count; + Py_ssize_t step; + Py_ssize_t pos; + size_t available; + size_t max_count; + Py_ssize_t limit; + RE_Node* repeated; + RE_Node* test; + BOOL match; + BOOL m; + size_t index; + TRACE(("%s\n", re_op_text[bt_data->op])) + + node = bt_data->repeat.position.node; + + rp_data = &state->repeats[bt_data->repeat.index]; + + /* Match one character at a time until the tail could match or we + * have reached the maximum. + */ + text_pos = rp_data->start; + count = rp_data->count; + + step = node->step; + pos = text_pos + (Py_ssize_t)count * step; + available = step > 0 ? slice_end - text_pos : text_pos - + slice_start; + max_count = RE_MIN(node->values[2], available); + limit = text_pos + (Py_ssize_t)max_count * step; + + repeated = node->nonstring.next_2.node; + + test = node->next_1.test; + + m = test->match; + index = node->values[0]; + + match = FALSE; + + if (test->status & RE_STATUS_FUZZY) { + for (;;) { + RE_Position next_position; + + if (!match_one(state, repeated, pos)) + break; + + pos += step; + + if (try_match(state, &node->next_1, pos, &next_position) && + !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + if (pos == limit) + break; + } + } else { + /* A repeated single-character match is often followed by a + * literal, so checking specially for it can be a good + * optimisation when working with long strings. + */ + switch (test->op) { + case RE_OP_CHARACTER: + { + Py_UCS4 ch; + + ch = test->values[0]; + + /* The tail is a character. We don't want to go off the end + * of the slice. + */ + limit = RE_MIN(limit, slice_end - 1); + + for (;;) { + if (pos >= limit) + break; + + if (!match_one(state, repeated, pos)) + break; + + ++pos; + + if ((char_at(text, pos) == ch) == m && + !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_CHARACTER_IGN: + { + Py_UCS4 ch; + + ch = test->values[0]; + + /* The tail is a character. We don't want to go off the end + * of the slice. + */ + limit = RE_MIN(limit, slice_end - 1); + + for (;;) { + if (pos >= limit) + break; + + if (!match_one(state, repeated, pos)) + break; + + ++pos; + + if (same_char_ign(encoding, char_at(text, pos), ch) == + m && !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_CHARACTER_IGN_REV: + { + Py_UCS4 ch; + + ch = test->values[0]; + + /* The tail is a character. We don't want to go off the end + * of the slice. + */ + limit = RE_MAX(limit, slice_start + 1); + + for (;;) { + if (pos <= limit) + break; + + if (!match_one(state, repeated, pos)) + break; + + --pos; + + if (same_char_ign(encoding, char_at(text, pos - 1), ch) + == m && !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_CHARACTER_REV: + { + Py_UCS4 ch; + + ch = test->values[0]; + + /* The tail is a character. We don't want to go off the end + * of the slice. + */ + limit = RE_MAX(limit, slice_start + 1); + + for (;;) { + if (pos <= limit) + break; + + if (!match_one(state, repeated, pos)) + break; + + --pos; + + if ((char_at(text, pos - 1) == ch) == m && + !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_STRING: + { + Py_ssize_t length; + + length = test->value_count; + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + limit = RE_MIN(limit, slice_end - length); + + for (;;) { + Py_ssize_t found; + + if (pos >= limit) + break; + + /* Look for the tail string. */ + found = string_search(safe_state, test, pos + 1, limit + + length); + if (found < 0) + break; + + if (repeated->op == RE_OP_ANY_ALL) + /* Anything can precede the tail. */ + pos = found; + else { + /* Check that what precedes the tail will match. */ + while (pos != found && match_one(state, repeated, + pos)) + ++pos; + + if (pos != found) + /* Something preceding the tail didn't match. + */ + break; + } + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_STRING_FLD: + { + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + limit = RE_MIN(limit, slice_end); + + for (;;) { + Py_ssize_t found; + Py_ssize_t new_pos; + + if (pos >= limit) + break; + + /* Look for the tail string. */ + found = string_search_fld(safe_state, test, pos + 1, + limit, &new_pos); + if (found < 0) + break; + + if (repeated->op == RE_OP_ANY_ALL) + /* Anything can precede the tail. */ + pos = found; + else { + /* Check that what precedes the tail will match. */ + while (pos != found && match_one(state, repeated, + pos)) + ++pos; + + if (pos != found) + /* Something preceding the tail didn't match. + */ + break; + } + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_STRING_FLD_REV: + { + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + limit = RE_MIN(limit, slice_start); + + for (;;) { + Py_ssize_t found; + Py_ssize_t new_pos; + + if (pos <= limit) + break; + + /* Look for the tail string. */ + found = string_search_fld_rev(safe_state, test, pos - + 1, limit, &new_pos); + if (found < 0) + break; + + if (repeated->op == RE_OP_ANY_ALL) + /* Anything can precede the tail. */ + pos = found; + else { + /* Check that what precedes the tail will match. */ + while (pos != found && match_one(state, repeated, + pos)) + --pos; + + if (pos != found) + /* Something preceding the tail didn't match. + */ + break; + } + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_STRING_IGN: + { + Py_ssize_t length; + + length = test->value_count; + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + limit = RE_MIN(limit, slice_end - length); + + for (;;) { + Py_ssize_t found; + + if (pos >= limit) + break; + + /* Look for the tail string. */ + found = string_search_ign(safe_state, test, pos + 1, + limit + length); + if (found < 0) + break; + + if (repeated->op == RE_OP_ANY_ALL) + /* Anything can precede the tail. */ + pos = found; + else { + /* Check that what precedes the tail will match. */ + while (pos != found && match_one(state, repeated, + pos)) + ++pos; + + if (pos != found) + /* Something preceding the tail didn't match. + */ + break; + } + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_STRING_IGN_REV: + { + Py_ssize_t length; + + length = test->value_count; + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + limit = RE_MAX(limit, slice_start + length); + + for (;;) { + Py_ssize_t found; + + if (pos <= limit) + break; + + /* Look for the tail string. */ + found = string_search_ign_rev(safe_state, test, pos - + 1, limit - length); + if (found < 0) + break; + + if (repeated->op == RE_OP_ANY_ALL) + /* Anything can precede the tail. */ + pos = found; + else { + /* Check that what precedes the tail will match. */ + while (pos != found && match_one(state, repeated, + pos)) + --pos; + + if (pos != found) + /* Something preceding the tail didn't match. + */ + break; + } + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_STRING_REV: + { + Py_ssize_t length; + + length = test->value_count; + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + limit = RE_MAX(limit, slice_start + length); + + for (;;) { + Py_ssize_t found; + + if (pos <= limit) + break; + + /* Look for the tail string. */ + found = string_search_rev(safe_state, test, pos - 1, + limit - length); + if (found < 0) + break; + + if (repeated->op == RE_OP_ANY_ALL) + /* Anything can precede the tail. */ + pos = found; + else { + /* Check that what precedes the tail will match. */ + while (pos != found && match_one(state, repeated, + pos)) + --pos; + + if (pos != found) + /* Something preceding the tail didn't match. + */ + break; + } + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + default: + for (;;) { + RE_Position next_position; + + if (!match_one(state, repeated, pos)) + break; + + pos += step; + + if (try_match(state, &node->next_1, pos, + &next_position) && !is_repeat_guarded(safe_state, + index, pos, RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + if (pos == limit) + break; + } + break; + } + } + + if (match) { + /* The tail could match. */ + count = abs_ssize_t(pos - text_pos); + text_pos = pos; + + if (count < max_count) { + /* The match is shorter than the maximum, so we might need + * to backtrack the repeat again to consume more. + */ + rp_data->count = count; + } else { + /* We've reached or passed the maximum, so we won't need to + * backtrack the repeat again. + */ + rp_data->start = bt_data->repeat.text_pos; + rp_data->count = bt_data->repeat.count; + discard_backtrack(state); + + /* Have we passed the maximum? */ + if (count > max_count) + goto backtrack; + } + + node = node->next_1.node; + goto advance; + } else { + /* The tail couldn't match. */ + rp_data->start = bt_data->repeat.text_pos; + rp_data->count = bt_data->repeat.count; + discard_backtrack(state); + } + break; + } + case RE_OP_LOOKAROUND: /* Lookaround. */ + TRACE(("%s\n", re_op_text[bt_data->op])) + + /* Restore the groups and certain flags and then backtrack. */ + pop_groups(state); + state->too_few_errors = bt_data->lookaround.too_few_errors; + state->capture_change = bt_data->lookaround.capture_change; + discard_backtrack(state); + break; + case RE_OP_MATCH_BODY: + { + RE_RepeatData* rp_data; + TRACE(("%s %d\n", re_op_text[bt_data->op], bt_data->repeat.index)) + + /* We want to match the body. */ + rp_data = &state->repeats[bt_data->repeat.index]; + + /* Restore the repeat info. */ + rp_data->count = bt_data->repeat.count; + rp_data->start = bt_data->repeat.start; + rp_data->capture_change = bt_data->repeat.capture_change; + + /* Record backtracking info in case the body fails to match. */ + bt_data->op = RE_OP_BODY_START; + + /* Advance into the body. */ + node = bt_data->repeat.position.node; + text_pos = bt_data->repeat.position.text_pos; + goto advance; + } + case RE_OP_MATCH_TAIL: + { + RE_RepeatData* rp_data; + TRACE(("%s %d\n", re_op_text[bt_data->op], bt_data->repeat.index)) + + /* We want to match the tail. */ + rp_data = &state->repeats[bt_data->repeat.index]; + + /* Restore the repeat info. */ + rp_data->count = bt_data->repeat.count; + rp_data->start = bt_data->repeat.start; + rp_data->capture_change = bt_data->repeat.capture_change; + + /* Advance into the tail. */ + node = bt_data->repeat.position.node; + text_pos = bt_data->repeat.position.text_pos; + + discard_backtrack(state); + goto advance; + } + case RE_OP_REF_GROUP: /* Reference to a capture group. */ + case RE_OP_REF_GROUP_IGN: /* Reference to a capture group, ignoring case. */ + case RE_OP_REF_GROUP_IGN_REV: /* Reference to a capture group backwards, ignoring case. */ + case RE_OP_REF_GROUP_REV: /* Reference to a capture group backwards. */ + case RE_OP_STRING: /* A string literal. */ + case RE_OP_STRING_IGN: /* A string literal, ignoring case. */ + case RE_OP_STRING_IGN_REV: /* A string literal backwards, ignoring case. */ + case RE_OP_STRING_REV: /* A string literal backwards. */ + { + BOOL matched; + TRACE(("%s\n", re_op_text[bt_data->op])) + + if (!retry_fuzzy_match_string(safe_state, search, &text_pos, &node, + &string_pos, &matched)) + return RE_ERROR_BACKTRACKING; + if (matched) + goto advance; + string_pos = -1; + break; + } + case RE_OP_REF_GROUP_FLD: /* Reference to a capture group, ignoring case. */ + case RE_OP_REF_GROUP_FLD_REV: /* Reference to a capture group backwards, ignoring case. */ + { + BOOL matched; + TRACE(("%s\n", re_op_text[bt_data->op])) + + if (!retry_fuzzy_match_string_fld2(safe_state, search, &text_pos, + &node, &folded_pos, &string_pos, &gfolded_pos, &matched)) + return RE_ERROR_BACKTRACKING; + if (matched) + goto advance; + string_pos = -1; + break; + } + case RE_OP_START_GROUP: /* Start of a capture group. */ + { + RE_CODE private_index; + RE_GroupData* group; + TRACE(("%s %d\n", re_op_text[bt_data->op], + bt_data->group.public_index)) + + private_index = bt_data->group.private_index; + group = &state->groups[private_index - 1]; + + /* Unsave the capture? */ + if (bt_data->group.capture) + unsave_capture(state, bt_data->group.private_index, + bt_data->group.public_index); + + if (pattern->group_info[private_index - 1].referenced && + group->span.start != bt_data->group.text_pos) + --state->capture_change; + group->span.start = bt_data->group.text_pos; + group->current_capture = bt_data->group.current_capture; + + discard_backtrack(state); + break; + } + case RE_OP_STRING_FLD: /* A string literal, ignoring case. */ + case RE_OP_STRING_FLD_REV: /* A string literal backwards, ignoring case. */ + { + BOOL matched; + TRACE(("%s\n", re_op_text[bt_data->op])) + + if (!retry_fuzzy_match_string_fld(safe_state, search, &text_pos, + &node, &string_pos, &folded_pos, &matched)) + return RE_ERROR_BACKTRACKING; + if (matched) + goto advance; + string_pos = -1; + break; + } + default: + TRACE(("UNKNOWN OP %d\n", bt_data->op)) + return RE_ERROR_ILLEGAL; + } + } +} + +/* Saves group data for fuzzy matching. */ +Py_LOCAL_INLINE(RE_GroupData*) save_groups(RE_SafeState* safe_state, + RE_GroupData* saved_groups) { + RE_State* state; + PatternObject* pattern; + size_t g; + + /* Re-acquire the GIL. */ + acquire_GIL(safe_state); + + state = safe_state->re_state; + pattern = state->pattern; + + if (!saved_groups) { + saved_groups = (RE_GroupData*)re_alloc(pattern->true_group_count * + sizeof(RE_GroupData)); + if (!saved_groups) + goto error; + + memset(saved_groups, 0, pattern->true_group_count * + sizeof(RE_GroupData)); + } + + for (g = 0; g < pattern->true_group_count; g++) { + RE_GroupData* orig; + RE_GroupData* copy; + + orig = &state->groups[g]; + copy = &saved_groups[g]; + + copy->span = orig->span; + + if (orig->capture_count > copy->capture_capacity) { + RE_GroupSpan* cap_copy; + + cap_copy = (RE_GroupSpan*)re_realloc(copy->captures, + orig->capture_count * sizeof(RE_GroupSpan)); + if (!cap_copy) + goto error; + + copy->capture_capacity = orig->capture_count; + copy->captures = cap_copy; + } + + copy->capture_count = orig->capture_count; + Py_MEMCPY(copy->captures, orig->captures, orig->capture_count * + sizeof(RE_GroupSpan)); + } + + /* Release the GIL. */ + release_GIL(safe_state); + + return saved_groups; + +error: + if (saved_groups) { + for (g = 0; g < pattern->true_group_count; g++) + re_dealloc(saved_groups[g].captures); + + re_dealloc(saved_groups); + } + + /* Release the GIL. */ + release_GIL(safe_state); + + return NULL; +} + +/* Restores group data for fuzzy matching. */ +Py_LOCAL_INLINE(void) restore_groups(RE_SafeState* safe_state, RE_GroupData* + saved_groups) { + RE_State* state; + PatternObject* pattern; + size_t g; + + /* Re-acquire the GIL. */ + acquire_GIL(safe_state); + + state = safe_state->re_state; + pattern = state->pattern; + + for (g = 0; g < pattern->true_group_count; g++) + re_dealloc(state->groups[g].captures); + + Py_MEMCPY(state->groups, saved_groups, pattern->true_group_count * + sizeof(RE_GroupData)); + + re_dealloc(saved_groups); + + /* Release the GIL. */ + release_GIL(safe_state); +} + +/* Discards group data for fuzzy matching. */ +Py_LOCAL_INLINE(void) discard_groups(RE_SafeState* safe_state, RE_GroupData* + saved_groups) { + RE_State* state; + PatternObject* pattern; + size_t g; + + /* Re-acquire the GIL. */ + acquire_GIL(safe_state); + + state = safe_state->re_state; + pattern = state->pattern; + + for (g = 0; g < pattern->true_group_count; g++) + re_dealloc(saved_groups[g].captures); + + re_dealloc(saved_groups); + + /* Release the GIL. */ + release_GIL(safe_state); +} + +/* Performs a match or search from the current text position. + * + * The state can sometimes be shared across threads. In such instances there's + * a lock (mutex) on it. The lock is held for the duration of matching. + */ +Py_LOCAL_INLINE(int) do_match(RE_SafeState* safe_state, BOOL search) { + RE_State* state; + PatternObject* pattern; + size_t available; + BOOL get_best; + BOOL enhance_match; + BOOL must_advance; + RE_GroupData* best_groups; + Py_ssize_t best_match_pos; + Py_ssize_t best_text_pos = 0; /* Initialise to stop compiler warning. */ + int status; + Py_ssize_t slice_start; + Py_ssize_t slice_end; + TRACE(("<>\n")) + + state = safe_state->re_state; + pattern = state->pattern; + + /* Release the GIL. */ + release_GIL(safe_state); + + /* Is there enough to search? */ + if (state->reverse) { + if (state->text_pos < state->slice_start) { + acquire_GIL(safe_state); + return FALSE; + } + + available = state->text_pos - state->slice_start; + } else { + if (state->text_pos > state->slice_end) { + acquire_GIL(safe_state); + return FALSE; + } + + available = state->slice_end - state->text_pos; + } + + get_best = (pattern->flags & RE_FLAG_BESTMATCH) != 0; + enhance_match = (pattern->flags & RE_FLAG_ENHANCEMATCH) != 0 && !get_best; + + /* The maximum permitted cost. */ + state->max_cost = pattern->is_fuzzy ? RE_UNLIMITED : 0; + + best_groups = NULL; + + best_match_pos = state->text_pos; + must_advance = state->must_advance; + + slice_start = state->slice_start; + slice_end = state->slice_end; + + for (;;) { + /* If there's a better match, it won't start earlier in the string than + * the current best match, so there's no need to start earlier than + * that match. + */ + state->text_pos = best_match_pos; + state->must_advance = must_advance; + + /* Initialise the state. */ + init_match(state); + + status = RE_ERROR_SUCCESS; + if (state->max_cost == 0) { + /* An exact match. */ + if (available < state->min_width || (available == 0 && + state->must_advance)) + status = RE_ERROR_FAILURE; + } + + if (status == RE_ERROR_SUCCESS) + status = basic_match(safe_state, state->pattern->start_node, + search, FALSE); + + /* Has an error occurred? */ + if (status < 0) + break; + + if (status == RE_ERROR_FAILURE || (status == RE_ERROR_SUCCESS && + state->total_cost == 0)) + break; + + if (!get_best && !enhance_match) + break; + + if (!get_best && state->text_pos == state->match_pos) + /* We want the first match. The match is already zero-width, so the + * cost can't get any lower (because the fit can't get any better). + */ + break; + + if (best_groups) { + BOOL same; + size_t g; + + /* Did we get the same match as the best so far? */ + same = state->match_pos == best_match_pos && state->text_pos == + best_text_pos; + for (g = 0; same && g < pattern->public_group_count; g++) { + same = state->groups[g].span.start == best_groups[g].span.start + && state->groups[g].span.end == best_groups[g].span.end; + } + + if (same) + break; + } + + /* Save the best result so far. */ + best_groups = save_groups(safe_state, best_groups); + if (!best_groups) { + status = RE_ERROR_MEMORY; + break; + } + + best_match_pos = state->match_pos; + best_text_pos = state->text_pos; + + if (state->max_cost == 0) + break; + + /* Reduce the maximum permitted cost and try again. */ + state->max_cost = state->total_cost - 1; + + if (enhance_match) { + if (state->reverse) { + state->slice_start = state->text_pos; + state->slice_end = state->match_pos; + } else { + state->slice_start = state->match_pos; + state->slice_end = state->text_pos; + } + } + } + + state->slice_start = slice_start; + state->slice_end = slice_end; + + if (best_groups) { + if (status == RE_ERROR_SUCCESS && state->total_cost == 0) + /* We have a perfect match, so the previous best match. */ + discard_groups(safe_state, best_groups); + else { + /* Restore the previous best match. */ + status = RE_ERROR_SUCCESS; + + state->match_pos = best_match_pos; + state->text_pos = best_text_pos; + + restore_groups(safe_state, best_groups); + } + } + + if (status == RE_ERROR_SUCCESS) { + Py_ssize_t max_end_index; + PatternObject* pattern; + RE_GroupInfo* group_info; + size_t g; + + /* Store the results. */ + state->lastindex = -1; + state->lastgroup = -1; + max_end_index = -1; + + /* Store the capture groups. */ + pattern = state->pattern; + group_info = pattern->group_info; + for (g = 0; g < pattern->public_group_count; g++) { + RE_GroupSpan* span; + + span = &state->groups[g].span; + /* The string positions are of type Py_ssize_t, so the format needs + * to specify that. + */ + TRACE(("group %d from %" PY_FORMAT_SIZE_T "d to %" PY_FORMAT_SIZE_T + "d\n", g + 1, span->start, span->end)) + + if (span->start >= 0 && span->end >= 0 && group_info[g].end_index > + max_end_index) { + max_end_index = group_info[g].end_index; + state->lastindex = g + 1; + if (group_info[g].has_name) + state->lastgroup = g + 1; + } + } + } + + /* Re-acquire the GIL. */ + acquire_GIL(safe_state); + + if (status < 0 && !PyErr_Occurred()) + set_error(status, NULL); + + return status; +} + +/* Gets a string from a Python object. + * + * If the function returns true and str_info->should_release is true then it's + * the responsibility of the caller to release the buffer when it's no longer + * needed. + */ +Py_LOCAL_INLINE(BOOL) get_string(PyObject* string, RE_StringInfo* str_info) { + /* Given a Python object, return a data pointer, a length (in characters), + * and a character size. Return FALSE if the object is not a string (or not + * compatible). + */ + PyBufferProcs* buffer; + Py_ssize_t bytes; + Py_ssize_t size; + + /* Unicode objects do not support the buffer API. So, get the data directly + * instead. + */ + if (PyUnicode_Check(string)) { + /* Unicode strings doesn't always support the buffer interface. */ + str_info->characters = (void*)PyUnicode_AS_DATA(string); + str_info->length = PyUnicode_GET_SIZE(string); + str_info->charsize = sizeof(Py_UNICODE); + str_info->is_unicode = TRUE; + str_info->should_release = FALSE; + return TRUE; + } + + /* Get pointer to string buffer. */ +#if PY_VERSION_HEX >= 0x02060000 + buffer = Py_TYPE(string)->tp_as_buffer; + str_info->view.len = -1; +#else + buffer = string->ob_type->tp_as_buffer; +#endif + + if (!buffer) { + PyErr_SetString(PyExc_TypeError, "expected string or buffer"); + return FALSE; + } + +#if PY_VERSION_HEX >= 0x02060000 + if (buffer->bf_getbuffer && (*buffer->bf_getbuffer)(string, + &str_info->view, PyBUF_SIMPLE) >= 0) + /* It's a new-style buffer. */ + str_info->should_release = TRUE; + else +#endif + if (buffer->bf_getreadbuffer && buffer->bf_getsegcount && + buffer->bf_getsegcount(string, NULL) == 1) + /* It's an old-style buffer. */ + str_info->should_release = FALSE; + else { + PyErr_SetString(PyExc_TypeError, "expected string or buffer"); + return FALSE; + } + + /* Determine buffer size. */ +#if PY_VERSION_HEX >= 0x02060000 + if (str_info->should_release) { + /* It's a new-style buffer. */ + bytes = str_info->view.len; + str_info->characters = str_info->view.buf; + + if (str_info->characters == NULL) { + PyBuffer_Release(&str_info->view); + PyErr_SetString(PyExc_ValueError, "buffer is NULL"); + return FALSE; + } + } else +#endif + /* It's an old-style buffer. */ + bytes = buffer->bf_getreadbuffer(string, 0, &str_info->characters); + + if (bytes < 0) { +#if PY_VERSION_HEX >= 0x02060000 + if (str_info->should_release) + PyBuffer_Release(&str_info->view); +#endif + PyErr_SetString(PyExc_TypeError, "buffer has negative size"); + return FALSE; + } + + /* Determine character size. */ + size = PyObject_Size(string); + + if (PyString_Check(string) || bytes == size) + str_info->charsize = 1; + else { +#if PY_VERSION_HEX >= 0x02060000 + if (str_info->should_release) + PyBuffer_Release(&str_info->view); +#endif + PyErr_SetString(PyExc_TypeError, "buffer size mismatch"); + return FALSE; + } + + str_info->length = size; + str_info->is_unicode = FALSE; + + return TRUE; +} + +/* Deallocates the groups storage. */ +Py_LOCAL_INLINE(void) dealloc_groups(RE_GroupData* groups, size_t group_count) + { + size_t g; + + if (!groups) + return; + + for (g = 0; g < group_count; g++) + re_dealloc(groups[g].captures); + + re_dealloc(groups); +} + +/* Initialises a state object. */ +Py_LOCAL_INLINE(BOOL) state_init_2(RE_State* state, PatternObject* pattern, + PyObject* string, RE_StringInfo* str_info, Py_ssize_t start, Py_ssize_t end, + BOOL overlapped, int concurrent, BOOL use_lock, BOOL visible_captures, BOOL + match_all) { + Py_ssize_t final_pos; + int i; + + state->groups = NULL; + state->repeats = NULL; + state->visible_captures = visible_captures; + state->match_all = match_all; + state->backtrack_block.previous = NULL; + state->backtrack_block.next = NULL; + state->backtrack_block.capacity = RE_BACKTRACK_BLOCK_SIZE; + state->backtrack_allocated = RE_BACKTRACK_BLOCK_SIZE; + state->first_saved_groups = NULL; + state->current_saved_groups = NULL; + state->first_saved_repeats = NULL; + state->current_saved_repeats = NULL; + state->lock = NULL; + state->fuzzy_guards = NULL; + state->first_group_call_frame = NULL; + state->current_group_call_frame = NULL; + state->group_call_guard_list = NULL; + + /* The call guards used by recursive patterns. */ + if (pattern->call_ref_info_count > 0) { + state->group_call_guard_list = + (RE_GuardList*)re_alloc(pattern->call_ref_info_count * + sizeof(RE_GuardList)); + if (!state->group_call_guard_list) + goto error; + memset(state->group_call_guard_list, 0, pattern->call_ref_info_count * + sizeof(RE_GuardList)); + } + + /* The capture groups. */ + if (pattern->true_group_count) { + size_t g; + + if (pattern->groups_storage) { + state->groups = pattern->groups_storage; + pattern->groups_storage = NULL; + } else { + state->groups = (RE_GroupData*)re_alloc(pattern->true_group_count * + sizeof(RE_GroupData)); + if (!state->groups) + goto error; + memset(state->groups, 0, pattern->true_group_count * + sizeof(RE_GroupData)); + + for (g = 0; g < pattern->true_group_count; g++) { + RE_GroupSpan* captures; + + captures = (RE_GroupSpan*)re_alloc(sizeof(RE_GroupSpan)); + if (!captures) { + size_t i; + + for (i = 0; i < g; i++) + re_dealloc(state->groups[i].captures); + + goto error; + } + + state->groups[g].captures = captures; + state->groups[g].capture_capacity = 1; + } + } + } + + /* Adjust boundaries. */ + if (start < 0) + start += str_info->length; + if (start < 0) + start = 0; + else if (start > str_info->length) + start = str_info->length; + + if (end < 0) + end += str_info->length; + if (end < 0) + end = 0; + else if (end > str_info->length) + end = str_info->length; + + state->overlapped = overlapped; + state->min_width = pattern->min_width; + + /* Initialise the getters and setters for the character size. */ + state->charsize = str_info->charsize; + state->is_unicode = str_info->is_unicode; + +#if PY_VERSION_HEX >= 0x02060000 + /* Are we using a buffer object? If so, we need to copy the info. */ + state->should_release = str_info->should_release; + if (state->should_release) + state->view = str_info->view; + +#endif + switch (state->charsize) { + case 1: + state->char_at = bytes1_char_at; + state->set_char_at = bytes1_set_char_at; + state->point_to = bytes1_point_to; + break; + case 2: + state->char_at = bytes2_char_at; + state->set_char_at = bytes2_set_char_at; + state->point_to = bytes2_point_to; + break; + case 4: + state->char_at = bytes4_char_at; + state->set_char_at = bytes4_set_char_at; + state->point_to = bytes4_point_to; + break; + default: + goto error; + } + + state->encoding = pattern->encoding; + + /* The state object contains a reference to the string and also a pointer + * to its contents. + * + * The documentation says that the end of the slice behaves like the end of + * the string. + */ + state->text = str_info->characters; + state->text_length = end; + + state->reverse = (pattern->flags & RE_FLAG_REVERSE) != 0; + + state->slice_start = start; + state->slice_end = state->text_length; + state->text_pos = state->reverse ? state->slice_end : state->slice_start; + + /* Point to the final newline and line separator if it's at the end of the + * string, otherwise just -1. + */ + state->final_newline = -1; + state->final_line_sep = -1; + final_pos = state->text_length - 1; + if (final_pos >= 0) { + Py_UCS4 ch; + + ch = state->char_at(state->text, final_pos); + if (ch == 0x0A) { + /* The string ends with LF. */ + state->final_newline = final_pos; + state->final_line_sep = final_pos; + + /* Does the string end with CR/LF? */ + --final_pos; + if (final_pos >= 0 && state->char_at(state->text, final_pos) == + 0x0D) + state->final_line_sep = final_pos; + } else { + /* The string doesn't end with LF, but it could be another kind of + * line separator. + */ + if (state->encoding->is_line_sep(ch)) + state->final_line_sep = final_pos; + } + } + + /* If the 'new' behaviour is enabled then split correctly on zero-width + * matches. + */ + state->zero_width = (pattern->flags & RE_FLAG_VERSION1) != 0; + state->must_advance = FALSE; + + state->pattern = pattern; + state->string = string; + + if (pattern->repeat_count) { + if (pattern->repeats_storage) { + state->repeats = pattern->repeats_storage; + pattern->repeats_storage = NULL; + } else { + state->repeats = (RE_RepeatData*)re_alloc(pattern->repeat_count * + sizeof(RE_RepeatData)); + if (!state->repeats) + goto error; + memset(state->repeats, 0, pattern->repeat_count * + sizeof(RE_RepeatData)); + } + } + + if (pattern->fuzzy_count) { + state->fuzzy_guards = (RE_FuzzyGuards*)re_alloc(pattern->fuzzy_count * + sizeof(RE_FuzzyGuards)); + if (!state->fuzzy_guards) + goto error; + memset(state->fuzzy_guards, 0, pattern->fuzzy_count * + sizeof(RE_FuzzyGuards)); + } + + Py_INCREF(state->pattern); + Py_INCREF(state->string); + + /* Multithreading is allowed during matching when explicitly enabled or on + * immutable strings. + */ + switch (concurrent) { + case RE_CONC_NO: + state->is_multithreaded = FALSE; + break; + case RE_CONC_YES: + state->is_multithreaded = TRUE; + break; + default: + state->is_multithreaded = PyUnicode_Check(string) || + PyString_Check(string); + break; + } + + /* A state struct can sometimes be shared across threads. In such + * instances, if multithreading is enabled we need to protect the state + * with a lock (mutex) during matching. + */ + if (state->is_multithreaded && use_lock) + state->lock = PyThread_allocate_lock(); + + for (i = 0; i < MAX_SEARCH_POSITIONS; i++) + state->search_positions[i].start_pos = -1; + + return TRUE; + +error: + re_dealloc(state->group_call_guard_list); + re_dealloc(state->repeats); + dealloc_groups(state->groups, pattern->true_group_count); + re_dealloc(state->fuzzy_guards); + state->repeats = NULL; + state->groups = NULL; + state->fuzzy_guards = NULL; + return FALSE; +} + +#if PY_VERSION_HEX >= 0x02060000 +/* Releases the string's buffer, if necessary. */ +Py_LOCAL_INLINE(void) release_buffer(RE_StringInfo* str_info) { + if (str_info->should_release) + PyBuffer_Release(&str_info->view); +} + +#endif +/* Initialises a state object. */ +Py_LOCAL_INLINE(BOOL) state_init(RE_State* state, PatternObject* pattern, + PyObject* string, Py_ssize_t start, Py_ssize_t end, BOOL overlapped, int + concurrent, BOOL use_lock, BOOL visible_captures, BOOL match_all) { + RE_StringInfo str_info; + + /* Get the string to search or match. */ + if (!get_string(string, &str_info)) + return FALSE; + + /* If we fail to initialise the state then we need to release the buffer if + * the string is a buffer object. + */ + if (!state_init_2(state, pattern, string, &str_info, start, end, + overlapped, concurrent, use_lock, visible_captures, match_all)) { +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return FALSE; + } + + /* The state has been initialised successfully, so now the state has the + * responsibility of releasing the buffer if the string is a buffer object. + */ + return TRUE; +} + +/* Deallocates repeat data. */ +Py_LOCAL_INLINE(void) dealloc_repeats(RE_RepeatData* repeats, Py_ssize_t + repeat_count) { + Py_ssize_t i; + + if (!repeats) + return; + + for (i = 0; i < repeat_count; i++) { + re_dealloc(repeats[i].body_guard_list.spans); + re_dealloc(repeats[i].tail_guard_list.spans); + } + + re_dealloc(repeats); +} + +/* Deallocates fuzzy guards. */ +Py_LOCAL_INLINE(void) dealloc_fuzzy_guards(RE_FuzzyGuards* guards, Py_ssize_t + fuzzy_count) { + Py_ssize_t i; + + if (!guards) + return; + + for (i = 0; i < fuzzy_count; i++) { + re_dealloc(guards[i].body_guard_list.spans); + re_dealloc(guards[i].tail_guard_list.spans); + } + + re_dealloc(guards); +} + +/* Finalises a state object, discarding its contents. */ +Py_LOCAL_INLINE(void) state_fini(RE_State* state) { + RE_BacktrackBlock* current; + PatternObject* pattern; + RE_SavedGroups* saved_groups; + RE_SavedRepeats* saved_repeats; + RE_GroupCallFrame* frame; + size_t i; + + /* Discard the lock (mutex) if there's one. */ + if (state->lock) + PyThread_free_lock(state->lock); + + /* Deallocate the backtrack blocks. */ + current = state->backtrack_block.next; + while (current) { + RE_BacktrackBlock* next; + + next = current->next; + re_dealloc(current); + state->backtrack_allocated -= RE_BACKTRACK_BLOCK_SIZE; + current = next; + } + + pattern = state->pattern; + + saved_groups = state->first_saved_groups; + while (saved_groups) { + RE_SavedGroups* next; + + next = saved_groups->next; + re_dealloc(saved_groups->spans); + re_dealloc(saved_groups->counts); + re_dealloc(saved_groups); + saved_groups = next; + } + + saved_repeats = state->first_saved_repeats; + while (saved_repeats) { + RE_SavedRepeats* next; + + next = saved_repeats->next; + + dealloc_repeats(saved_repeats->repeats, pattern->repeat_count); + + re_dealloc(saved_repeats); + saved_repeats = next; + } + + if (pattern->groups_storage) + dealloc_groups(state->groups, pattern->true_group_count); + else + pattern->groups_storage = state->groups; + + if (pattern->repeats_storage) + dealloc_repeats(state->repeats, pattern->repeat_count); + else + pattern->repeats_storage = state->repeats; + + frame = state->first_group_call_frame; + while (frame) { + RE_GroupCallFrame* next; + + next = frame->next; + + dealloc_groups(frame->groups, pattern->true_group_count); + dealloc_repeats(frame->repeats, pattern->repeat_count); + + re_dealloc(frame); + frame = next; + } + + for (i = 0; i < pattern->call_ref_info_count; i++) + re_dealloc(state->group_call_guard_list[i].spans); + + if (state->group_call_guard_list) + re_dealloc(state->group_call_guard_list); + + if (state->fuzzy_guards) + dealloc_fuzzy_guards(state->fuzzy_guards, pattern->fuzzy_count); + + Py_DECREF(state->pattern); + Py_DECREF(state->string); +#if PY_VERSION_HEX >= 0x02060000 + + if (state->should_release) + PyBuffer_Release(&state->view); +#endif +} + +/* Converts a string index to an integer. + * + * If the index is None then the default will be returned. + */ +Py_LOCAL_INLINE(Py_ssize_t) as_string_index(PyObject* obj, Py_ssize_t def) { + Py_ssize_t value; + + if (obj == Py_None) + return def; + + value = PyInt_AsSsize_t(obj); + if (value != -1 || !PyErr_Occurred()) + return value; + + PyErr_Clear(); + + value = PyLong_AsLong(obj); + if (value != -1 || !PyErr_Occurred()) + return value; + + set_error(RE_ERROR_INDEX, NULL); + return 0; +} + +/* Deallocates a MatchObject. */ +static void match_dealloc(PyObject* self_) { + MatchObject* self; + + self = (MatchObject*)self_; + + Py_XDECREF(self->string); + Py_XDECREF(self->substring); + Py_DECREF(self->pattern); + if (self->groups) + re_dealloc(self->groups); + Py_XDECREF(self->regs); + PyObject_DEL(self); +} + +/* Gets a slice from a string. */ +Py_LOCAL_INLINE(PyObject*) get_slice(PyObject* string, Py_ssize_t start, + Py_ssize_t end) { + return PySequence_GetSlice(string, start, end); +} + +/* Gets a MatchObject's group by integer index. */ +Py_LOCAL_INLINE(PyObject*) match_get_group_by_index(MatchObject* self, + Py_ssize_t index, PyObject* def) { + RE_GroupSpan* span; + + if (index < 0 || index > (Py_ssize_t)self->group_count) { + /* Raise error if we were given a bad group number. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } + + if (index == 0) + return get_slice(self->substring, self->match_start - + self->substring_offset, self->match_end - self->substring_offset); + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + span = &self->groups[index - 1].span; + + if (span->start < 0 || span->end < 0) { + /* Return default value if the string or group is undefined. */ + Py_INCREF(def); + return def; + } + + return get_slice(self->substring, span->start - self->substring_offset, + span->end - self->substring_offset); +} + +/* Gets a MatchObject's start by integer index. */ +static PyObject* match_get_start_by_index(MatchObject* self, Py_ssize_t index) + { + RE_GroupSpan* span; + + if (index < 0 || index > (Py_ssize_t)self->group_count) { + /* Raise error if we were given a bad group number. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } + + if (index == 0) + return Py_BuildValue("n", self->match_start); + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + span = &self->groups[index - 1].span; + return Py_BuildValue("n", span->start); +} + +/* Gets a MatchObject's starts by integer index. */ +static PyObject* match_get_starts_by_index(MatchObject* self, Py_ssize_t index) + { + RE_GroupData* group; + PyObject* result; + PyObject* item; + size_t i; + + if (index < 0 || index > (Py_ssize_t)self->group_count) { + /* Raise error if we were given a bad group number. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } + + if (index == 0) { + result = PyList_New(1); + if (!result) + return NULL; + + item = Py_BuildValue("n", self->match_start); + if (!item) + goto error; + PyList_SET_ITEM(result, 0, item); + + return result; + } + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + group = &self->groups[index - 1]; + + result = PyList_New(group->capture_count); + if (!result) + return NULL; + + for (i = 0; i < group->capture_count; i++) { + item = Py_BuildValue("n", group->captures[i].start); + if (!item) + goto error; + PyList_SET_ITEM(result, i, item); + } + + return result; + +error: + Py_DECREF(result); + return NULL; +} + +/* Gets a MatchObject's end by integer index. */ +static PyObject* match_get_end_by_index(MatchObject* self, Py_ssize_t index) { + RE_GroupSpan* span; + + if (index < 0 || index > (Py_ssize_t)self->group_count) { + /* Raise error if we were given a bad group number. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } + + if (index == 0) + return Py_BuildValue("n", self->match_end); + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + span = &self->groups[index - 1].span; + return Py_BuildValue("n", span->end); +} + +/* Gets a MatchObject's ends by integer index. */ +static PyObject* match_get_ends_by_index(MatchObject* self, Py_ssize_t index) { + RE_GroupData* group; + PyObject* result; + PyObject* item; + size_t i; + + if (index < 0 || index > (Py_ssize_t)self->group_count) { + /* Raise error if we were given a bad group number. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } + + if (index == 0) { + result = PyList_New(1); + if (!result) + return NULL; + + item = Py_BuildValue("n", self->match_end); + if (!item) + goto error; + PyList_SET_ITEM(result, 0, item); + + return result; + } + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + group = &self->groups[index - 1]; + + result = PyList_New(group->capture_count); + if (!result) + return NULL; + + for (i = 0; i < group->capture_count; i++) { + item = Py_BuildValue("n", group->captures[i].end); + if (!item) + goto error; + PyList_SET_ITEM(result, i, item); + } + + return result; + +error: + Py_DECREF(result); + return NULL; +} + +/* Gets a MatchObject's span by integer index. */ +static PyObject* match_get_span_by_index(MatchObject* self, Py_ssize_t index) { + RE_GroupSpan* span; + + if (index < 0 || index > (Py_ssize_t)self->group_count) { + /* Raise error if we were given a bad group number. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } + + if (index == 0) + return Py_BuildValue("nn", self->match_start, self->match_end); + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + span = &self->groups[index - 1].span; + return Py_BuildValue("nn", span->start, span->end); +} + +/* Gets a MatchObject's spans by integer index. */ +static PyObject* match_get_spans_by_index(MatchObject* self, Py_ssize_t index) + { + RE_GroupData* group; + PyObject* result; + PyObject* item; + size_t i; + + if (index < 0 || index > (Py_ssize_t)self->group_count) { + /* Raise error if we were given a bad group number. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } + + if (index == 0) { + result = PyList_New(1); + if (!result) + return NULL; + + item = Py_BuildValue("nn", self->match_start, self->match_end); + if (!item) + goto error; + PyList_SET_ITEM(result, 0, item); + + return result; + } + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + group = &self->groups[index - 1]; + + result = PyList_New(group->capture_count); + if (!result) + return NULL; + + for (i = 0; i < group->capture_count; i++) { + item = Py_BuildValue("nn", group->captures[i].start, + group->captures[i].end); + if (!item) + goto error; + PyList_SET_ITEM(result, i, item); + } + + return result; + +error: + Py_DECREF(result); + return NULL; +} + +/* Gets a MatchObject's captures by integer index. */ +static PyObject* match_get_captures_by_index(MatchObject* self, Py_ssize_t + index) { + RE_GroupData* group; + PyObject* result; + PyObject* slice; + size_t i; + + if (index < 0 || index > (Py_ssize_t)self->group_count) { + /* Raise error if we were given a bad group number. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } + + if (index == 0) { + result = PyList_New(1); + if (!result) + return NULL; + + slice = get_slice(self->substring, self->match_start - + self->substring_offset, self->match_end - self->substring_offset); + if (!slice) + goto error; + PyList_SET_ITEM(result, 0, slice); + + return result; + } + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + group = &self->groups[index - 1]; + + result = PyList_New(group->capture_count); + if (!result) + return NULL; + + for (i = 0; i < group->capture_count; i++) { + slice = get_slice(self->substring, group->captures[i].start - + self->substring_offset, group->captures[i].end - + self->substring_offset); + if (!slice) + goto error; + PyList_SET_ITEM(result, i, slice); + } + + return result; + +error: + Py_DECREF(result); + return NULL; +} + +/* Converts a group index to an integer. */ +Py_LOCAL_INLINE(Py_ssize_t) as_group_index(PyObject* obj) { + Py_ssize_t value; + + value = PyInt_AsSsize_t(obj); + if (value != -1 || !PyErr_Occurred()) + return value; + + PyErr_Clear(); + + value = PyLong_AsLong(obj); + if (value != -1 || !PyErr_Occurred()) + return value; + + set_error(RE_ERROR_INDEX, NULL); + return -1; +} + +/* Gets a MatchObject's group index. + * + * The supplied index can be an integer or a string (group name) object. + */ +Py_LOCAL_INLINE(Py_ssize_t) match_get_group_index(MatchObject* self, PyObject* + index, BOOL allow_neg) { + Py_ssize_t group; + + /* Is the index an integer? */ + group = as_group_index(index); + if (group != -1 || !PyErr_Occurred()) { + Py_ssize_t min_group = 0; + + /* Adjust negative indices where valid and allowed. */ + if (group < 0 && allow_neg) { + group += (Py_ssize_t)self->group_count + 1; + min_group = 1; + } + + if (min_group <= group && group <= (Py_ssize_t)self->group_count) + return group; + + return -1; + } + + /* The index might be a group name. */ + if (self->pattern->groupindex) { + /* Look up the name. */ + PyErr_Clear(); + + index = PyObject_GetItem(self->pattern->groupindex, index); + if (index) { + /* Check that we have an integer. */ + group = as_group_index(index); + Py_DECREF(index); + if (group != -1 || !PyErr_Occurred()) + return group; + } + } + + PyErr_Clear(); + return -1; +} + +/* Gets a MatchObject's group by object index. */ +Py_LOCAL_INLINE(PyObject*) match_get_group(MatchObject* self, PyObject* index, + PyObject* def, BOOL allow_neg) { + /* Check that the index is an integer or a string. */ + if (PyInt_Check(index) || PyLong_Check(index) || PyUnicode_Check(index) || + PyString_Check(index)) + return match_get_group_by_index(self, match_get_group_index(self, + index, allow_neg), def); + + set_error(RE_ERROR_GROUP_INDEX_TYPE, index); + return NULL; +} + +/* Gets info from a MatchObject by object index. */ +static PyObject* get_by_arg(MatchObject* self, PyObject* index, + RE_GetByIndexFunc get_by_index) { + /* Check that the index is an integer or a string. */ + if (PyInt_Check(index) || PyLong_Check(index) || PyUnicode_Check(index) || + PyString_Check(index)) + return get_by_index(self, match_get_group_index(self, index, FALSE)); + + set_error(RE_ERROR_GROUP_INDEX_TYPE, index); + return NULL; +} + +/* MatchObject's 'group' method. */ +static PyObject* match_group(MatchObject* self, PyObject* args) { + Py_ssize_t size; + PyObject* result; + Py_ssize_t i; + + size = PyTuple_GET_SIZE(args); + + switch (size) { + case 0: + /* group() */ + result = match_get_group_by_index(self, 0, Py_None); + break; + case 1: + /* group(x) */ + result = match_get_group(self, PyTuple_GET_ITEM(args, 0), Py_None, + FALSE); + break; + default: + /* group(x, y, z, ...) */ + /* Fetch multiple items. */ + result = PyTuple_New(size); + if (!result) + return NULL; + for (i = 0; i < size; i++) { + PyObject* item = match_get_group(self, PyTuple_GET_ITEM(args, i), + Py_None, FALSE); + if (!item) { + Py_DECREF(result); + return NULL; + } + PyTuple_SET_ITEM(result, i, item); + } + break; + } + return result; +} + +/* Generic method for getting info from a MatchObject. */ +static PyObject* get_from_match(MatchObject* self, PyObject* args, + RE_GetByIndexFunc get_by_index) { + Py_ssize_t size; + PyObject* result; + Py_ssize_t i; + + size = PyTuple_GET_SIZE(args); + + switch (size) { + case 0: + /* get() */ + result = get_by_index(self, 0); + break; + case 1: + /* get(x) */ + result = get_by_arg(self, PyTuple_GET_ITEM(args, 0), get_by_index); + break; + default: + /* get(x, y, z, ...) */ + /* Fetch multiple items. */ + result = PyTuple_New(size); + if (!result) + return NULL; + for (i = 0; i < size; i++) { + PyObject* item = get_by_arg(self, PyTuple_GET_ITEM(args, i), + get_by_index); + if (!item) { + Py_DECREF(result); + return NULL; + } + PyTuple_SET_ITEM(result, i, item); + } + break; + } + return result; +} + +/* MatchObject's 'start' method. */ +static PyObject* match_start(MatchObject* self, PyObject* args) { + return get_from_match(self, args, match_get_start_by_index); +} + +/* MatchObject's 'starts' method. */ +static PyObject* match_starts(MatchObject* self, PyObject* args) { + return get_from_match(self, args, match_get_starts_by_index); +} + +/* MatchObject's 'end' method. */ +static PyObject* match_end(MatchObject* self, PyObject* args) { + return get_from_match(self, args, match_get_end_by_index); +} + +/* MatchObject's 'ends' method. */ +static PyObject* match_ends(MatchObject* self, PyObject* args) { + return get_from_match(self, args, match_get_ends_by_index); +} + +/* MatchObject's 'span' method. */ +static PyObject* match_span(MatchObject* self, PyObject* args) { + return get_from_match(self, args, match_get_span_by_index); +} + +/* MatchObject's 'spans' method. */ +static PyObject* match_spans(MatchObject* self, PyObject* args) { + return get_from_match(self, args, match_get_spans_by_index); +} + +/* MatchObject's 'captures' method. */ +static PyObject* match_captures(MatchObject* self, PyObject* args) { + return get_from_match(self, args, match_get_captures_by_index); +} + +/* MatchObject's 'groups' method. */ +static PyObject* match_groups(MatchObject* self, PyObject* args, PyObject* + kwargs) { + PyObject* result; + size_t g; + + PyObject* def = Py_None; + static char* kwlist[] = { "default", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O:groups", kwlist, &def)) + return NULL; + + result = PyTuple_New(self->group_count); + if (!result) + return NULL; + + /* Group 0 is the entire matched portion of the string. */ + for (g = 0; g < self->group_count; g++) { + PyObject* item; + item = match_get_group_by_index(self, g + 1, def); + if (!item) { + Py_DECREF(result); + return NULL; + } + PyTuple_SET_ITEM(result, g, item); + } + + return result; +} + +/* MatchObject's 'groupdict' method. */ +static PyObject* match_groupdict(MatchObject* self, PyObject* args, PyObject* + kwargs) { + PyObject* result; + PyObject* keys; + Py_ssize_t g; + + PyObject* def = Py_None; + static char* kwlist[] = { "default", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O:groupdict", kwlist, + &def)) + return NULL; + + result = PyDict_New(); + if (!result || !self->pattern->groupindex) + return result; + + keys = PyMapping_Keys(self->pattern->groupindex); + if (!keys) + goto failed; + + for (g = 0; g < PyList_GET_SIZE(keys); g++) { + PyObject* key; + PyObject* value; + int status; + + key = PyList_GET_ITEM(keys, g); + if (!key) + goto failed; + value = match_get_group(self, key, def, FALSE); + if (!value) { + Py_DECREF(key); + goto failed; + } + status = PyDict_SetItem(result, key, value); + Py_DECREF(value); + if (status < 0) + goto failed; + } + + Py_DECREF(keys); + + return result; + +failed: + Py_XDECREF(keys); + Py_DECREF(result); + return NULL; +} + +/* MatchObject's 'capturesdict' method. */ +static PyObject* match_capturesdict(MatchObject* self) { + PyObject* result; + PyObject* keys; + Py_ssize_t g; + + result = PyDict_New(); + if (!result || !self->pattern->groupindex) + return result; + + keys = PyMapping_Keys(self->pattern->groupindex); + if (!keys) + goto failed; + + for (g = 0; g < PyList_GET_SIZE(keys); g++) { + PyObject* key; + Py_ssize_t group; + PyObject* captures; + int status; + + key = PyList_GET_ITEM(keys, g); + if (!key) + goto failed; + group = match_get_group_index(self, key, FALSE); + if (group < 0) { + Py_DECREF(key); + goto failed; + } + captures = match_get_captures_by_index(self, group); + if (!captures) { + Py_DECREF(key); + goto failed; + } + status = PyDict_SetItem(result, key, captures); + Py_DECREF(captures); + if (status < 0) + goto failed; + } + + Py_DECREF(keys); + + return result; + +failed: + Py_XDECREF(keys); + Py_DECREF(result); + return NULL; +} + +/* Gets a Python object by name from a named module. */ +Py_LOCAL_INLINE(PyObject*) get_object(char* module_name, char* object_name) { + PyObject* module; + PyObject* object; + + module = PyImport_ImportModule(module_name); + if (!module) + return NULL; + + object = PyObject_GetAttrString(module, object_name); + Py_DECREF(module); + + return object; +} + +/* Calls a function in a module. */ +Py_LOCAL_INLINE(PyObject*) call(char* module_name, char* function_name, + PyObject* args) { + PyObject* function; + PyObject* result; + + if (!args) + return NULL; + + function = get_object(module_name, function_name); + if (!function) + return NULL; + + result = PyObject_CallObject(function, args); + Py_DECREF(function); + Py_DECREF(args); + + return result; +} + +/* Gets a replacement item from the replacement list. + * + * The replacement item could be a string literal or a group. + */ +Py_LOCAL_INLINE(PyObject*) get_match_replacement(MatchObject* self, PyObject* + item, Py_ssize_t group_count) { + Py_ssize_t index; + + if (PyUnicode_Check(item) || PyString_Check(item)) { + /* It's a literal, which can be added directly to the list. */ + Py_INCREF(item); + return item; + } + + /* Is it a group reference? */ + index = as_group_index(item); + if (index == -1 && PyErr_Occurred()) { + /* Not a group either! */ + set_error(RE_ERROR_REPLACEMENT, NULL); + return NULL; + } + + if (index == 0) { + /* The entire matched portion of the string. */ + return get_slice(self->substring, self->match_start - + self->substring_offset, self->match_end - self->substring_offset); + } else if (index >= 1 && index <= group_count) { + /* A group. If it didn't match then return None instead. */ + RE_GroupData* group; + + group = &self->groups[index - 1]; + + if (group->capture_count > 0) + return get_slice(self->substring, group->span.start - + self->substring_offset, group->span.end - + self->substring_offset); + else { + Py_INCREF(Py_None); + return Py_None; + } + } else { + /* No such group. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } +} + +/* Adds an item to be joined. */ +Py_LOCAL_INLINE(int) add_item(JoinInfo* join_info, PyObject* item) { + PyObject* new_item; + int status; + + if (join_info->is_unicode) { + if (PyUnicode_Check(item)) { + new_item = item; + Py_INCREF(new_item); + } else { + new_item = PyUnicode_FromObject(item); + if (!new_item) { + set_error(RE_ERROR_NOT_UNICODE, item); + return RE_ERROR_NOT_UNICODE; + } + } + } else { + if (PyString_Check(item)) { + new_item = item; + Py_INCREF(new_item); + } else { + new_item = PyUnicode_FromObject(item); + if (!new_item) { + set_error(RE_ERROR_NOT_STRING, item); + return RE_ERROR_NOT_STRING; + } + } + } + + /* If the list already exists then just add the item to it. */ + if (join_info->list) { + status = PyList_Append(join_info->list, new_item); + if (status < 0) + goto error; + + Py_DECREF(new_item); + return status; + } + + /* If we already have an item then we now have 2(!) and we need to put them + * into a list. + */ + if (join_info->item) { + join_info->list = PyList_New(2); + if (!join_info->list) { + status = RE_ERROR_MEMORY; + goto error; + } + + PyList_SET_ITEM(join_info->list, 0, join_info->item); + join_info->item = NULL; + + PyList_SET_ITEM(join_info->list, 1, new_item); + return 0; + } + + /* This is the first item. */ + join_info->item = new_item; + + return 0; + +error: + Py_DECREF(new_item); + Py_XDECREF(join_info->list); + Py_XDECREF(join_info->item); + set_error(status, NULL); + return status; +} + +/* Joins together a list of strings for pattern_subx. */ +Py_LOCAL_INLINE(PyObject*) join_list_info(JoinInfo* join_info) { + /* If the list already exists then just do the join. */ + if (join_info->list) { + PyObject* joiner; + PyObject* result; + + if (join_info->reversed) + /* The list needs to be reversed before being joined. */ + PyList_Reverse(join_info->list); + + if (join_info->is_unicode) + { + /* Concatenate the Unicode strings. */ + joiner = PyUnicode_FromUnicode(NULL, 0); + if (!joiner) { + Py_DECREF(join_info->list); + return NULL; + } + + result = PyUnicode_Join(joiner, join_info->list); + } else { + joiner = PyString_FromString(""); + if (!joiner) { + Py_DECREF(join_info->list); + return NULL; + } + + /* Concatenate the bytestrings. */ + result = _PyString_Join(joiner, join_info->list); + } + + Py_DECREF(joiner); + Py_DECREF(join_info->list); + + return result; + } + + /* If we have only 1 item, so we'll just return it. */ + if (join_info->item) + return join_info->item; + + /* There are no items, so return an empty string. */ + if (join_info->is_unicode) + return PyUnicode_FromUnicode(NULL, 0); + else + return PyString_FromString(""); +} + +/* Checks whether a string replacement is a literal. + * + * To keep it simple we'll say that a literal is a string which can be used + * as-is. + * + * Returns its length if it is a literal, otherwise -1. + */ +Py_LOCAL_INLINE(Py_ssize_t) check_replacement_string(PyObject* str_replacement, + char special_char) { + RE_StringInfo str_info; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + Py_ssize_t pos; + + if (!get_string(str_replacement, &str_info)) + return -1; + + switch (str_info.charsize) { + case 1: + char_at = bytes1_char_at; + break; + case 2: + char_at = bytes2_char_at; + break; + case 4: + char_at = bytes4_char_at; + break; + default: +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); +#endif + return -1; + } + + for (pos = 0; pos < str_info.length; pos++) { + if (char_at(str_info.characters, pos) == special_char) { +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return -1; + } + } + +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return str_info.length; +} + +/* MatchObject's 'expand' method. */ +static PyObject* match_expand(MatchObject* self, PyObject* str_template) { + Py_ssize_t literal_length; + PyObject* replacement; + JoinInfo join_info; + Py_ssize_t size; + Py_ssize_t i; + + /* Is the template just a literal? */ + literal_length = check_replacement_string(str_template, '\\'); + if (literal_length >= 0) { + /* It's a literal. */ + Py_INCREF(str_template); + return str_template; + } + + /* Hand the template to the template compiler. */ + replacement = call(RE_MODULE, "_compile_replacement_helper", + PyTuple_Pack(2, self->pattern, str_template)); + if (!replacement) + return NULL; + + join_info.list = NULL; + join_info.item = NULL; + join_info.reversed = FALSE; + join_info.is_unicode = PyUnicode_Check(self->string); + + /* Add each part of the template to the list. */ + size = PyList_GET_SIZE(replacement); + for (i = 0; i < size; i++) { + PyObject* item; + PyObject* str_item; + + item = PyList_GET_ITEM(replacement, i); + str_item = get_match_replacement(self, item, self->group_count); + if (!str_item) + goto error; + + /* Add to the list. */ + if (str_item == Py_None) + Py_DECREF(str_item); + else { + int status; + + status = add_item(&join_info, str_item); + Py_DECREF(str_item); + if (status < 0) + goto error; + } + } + + Py_DECREF(replacement); + + /* Convert the list to a single string (also cleans up join_info). */ + return join_list_info(&join_info); + +error: + Py_XDECREF(join_info.list); + Py_XDECREF(join_info.item); + Py_DECREF(replacement); + return NULL; +} + +#if PY_VERSION_HEX >= 0x02060000 +/* Gets a MatchObject's group dictionary. */ +static PyObject* match_get_group_dict(MatchObject* self) { + PyObject* result; + PyObject* keys; + Py_ssize_t g; + + result = PyDict_New(); + if (!result || !self->pattern->groupindex) + return result; + + keys = PyMapping_Keys(self->pattern->groupindex); + if (!keys) + goto failed; + + for (g = 0; g < PyList_GET_SIZE(keys); g++) { + int status; + PyObject* key; + PyObject* value; + key = PyList_GET_ITEM(keys, g); + if (!key) + goto failed; + value = match_get_group(self, key, Py_None, FALSE); + if (!value) { + Py_DECREF(key); + goto failed; + } + status = PyDict_SetItem(result, key, value); + Py_DECREF(value); + if (status < 0) + goto failed; + } + + Py_DECREF(keys); + + return result; + +failed: + Py_XDECREF(keys); + Py_DECREF(result); + return NULL; +} + +/* MatchObject's 'expandf' method. */ +static PyObject* match_expandf(MatchObject* self, PyObject* str_template) { + PyObject* format_func; + PyObject* args = NULL; + size_t g; + PyObject* kwargs = NULL; + PyObject* result; + + format_func = PyObject_GetAttrString(str_template, "format"); + if (!format_func) + return NULL; + + args = PyTuple_New(self->group_count + 1); + if (!args) + goto error; + + for (g = 0; g < self->group_count + 1; g++) + PyTuple_SetItem(args, g, match_get_group_by_index(self, g, Py_None)); + + kwargs = match_get_group_dict(self); + if (!kwargs) + goto error; + + result = PyObject_Call(format_func, args, kwargs); + Py_DECREF(kwargs); + Py_DECREF(args); + Py_DECREF(format_func); + + return result; + +error: + Py_XDECREF(args); + Py_DECREF(format_func); + return NULL; +} + +#endif +Py_LOCAL_INLINE(PyObject*) make_match_copy(MatchObject* self); + +/* MatchObject's '__copy__' method. */ +static PyObject* match_copy(MatchObject* self, PyObject *unused) { + return make_match_copy(self); +} + +/* MatchObject's '__deepcopy__' method. */ +static PyObject* match_deepcopy(MatchObject* self, PyObject* memo) { + return make_match_copy(self); +} + +/* MatchObject's 'regs' attribute. */ +static PyObject* match_regs(MatchObject* self) { + PyObject* regs; + PyObject* item; + size_t g; + + regs = PyTuple_New(self->group_count + 1); + if (!regs) + return NULL; + + item = Py_BuildValue("nn", self->match_start, self->match_end); + if (!item) { + Py_DECREF(regs); + return NULL; + } + PyTuple_SET_ITEM(regs, 0, item); + + for (g = 0; g < self->group_count; g++) { + RE_GroupSpan* span; + + span = &self->groups[g].span; + item = Py_BuildValue("nn", span->start, span->end); + if (!item) { + Py_DECREF(regs); + return NULL; + } + PyTuple_SET_ITEM(regs, g + 1, item); + } + + Py_INCREF(regs); + self->regs = regs; + + return regs; +} + +/* MatchObject's slice method. */ +Py_LOCAL_INLINE(PyObject*) match_get_group_slice(MatchObject* self, PyObject* + slice) { + Py_ssize_t start; + Py_ssize_t end; + Py_ssize_t step; + Py_ssize_t slice_length; + + if (PySlice_GetIndicesEx((PySliceObject*)slice, self->group_count + 1, + &start, &end, &step, &slice_length) < 0) + return NULL; + + if (slice_length <= 0) + return PyTuple_New(0); + else { + PyObject* result; + Py_ssize_t cur; + Py_ssize_t i; + + result = PyTuple_New(slice_length); + if (!result) + return NULL; + + cur = start; + for (i = 0; i < slice_length; i++) { + PyTuple_SetItem(result, i, match_get_group_by_index(self, cur, + Py_None)); + cur += step; + } + + return result; + } +} + +/* MatchObject's length method. */ +static Py_ssize_t match_length(MatchObject* self) { + return self->group_count + 1; +} + +/* MatchObject's '__getitem__' method. */ +static PyObject* match_getitem(MatchObject* self, PyObject* item) { + if (PySlice_Check(item)) + return match_get_group_slice(self, item); + + return match_get_group(self, item, Py_None, TRUE); +} + +/* Determines the portion of the target string which is covered by the group + * captures. + */ +Py_LOCAL_INLINE(void) determine_target_substring(MatchObject* match, + Py_ssize_t* slice_start, Py_ssize_t* slice_end) { + Py_ssize_t start; + Py_ssize_t end; + size_t g; + + start = match->pos; + end = match->endpos; + + for (g = 0; g < match->group_count; g++) { + RE_GroupSpan* span; + size_t c; + + span = &match->groups[g].span; + if (span->start >= 0 && span->start < start) + start = span->start; + if (span->end >= 0 && span->end > end) + end = span->end; + + for (c = 0; c < match->groups[g].capture_count; c++) { + RE_GroupSpan* span; + + span = match->groups[g].captures; + if (span->start >= 0 && span->start < start) + start = span->start; + if (span->end >= 0 && span->end > end) + end = span->end; + } + } + + *slice_start = start; + *slice_end = end; +} + +/* MatchObject's 'detach_string' method. */ +static PyObject* match_detach_string(MatchObject* self, PyObject* unused) { + if (self->string) { + Py_ssize_t start; + Py_ssize_t end; + PyObject* substring; + + determine_target_substring(self, &start, &end); + + substring = get_slice(self->string, start, end); + if (substring) { + Py_XDECREF(self->substring); + self->substring = substring; + self->substring_offset = start; + + Py_XDECREF(self->string); + self->string = NULL; + } + } + + Py_INCREF(Py_None); + return Py_None; +} + +/* The documentation of a MatchObject. */ +PyDoc_STRVAR(match_group_doc, + "group([group1, ...]) --> string or tuple of strings.\n\ + Return one or more subgroups of the match. If there is a single argument,\n\ + the result is a single string, or None if the group did not contribute to\n\ + the match; if there are multiple arguments, the result is a tuple with one\n\ + item per argument; if there are no arguments, the whole match is returned.\n\ + Group 0 is the whole match."); + +PyDoc_STRVAR(match_start_doc, + "start([group1, ...]) --> int or tuple of ints.\n\ + Return the index of the start of one or more subgroups of the match. If\n\ + there is a single argument, the result is an index, or -1 if the group did\n\ + not contribute to the match; if there are multiple arguments, the result is\n\ + a tuple with one item per argument; if there are no arguments, the index of\n\ + the start of the whole match is returned. Group 0 is the whole match."); + +PyDoc_STRVAR(match_end_doc, + "end([group1, ...]) --> int or tuple of ints.\n\ + Return the index of the end of one or more subgroups of the match. If there\n\ + is a single argument, the result is an index, or -1 if the group did not\n\ + contribute to the match; if there are multiple arguments, the result is a\n\ + tuple with one item per argument; if there are no arguments, the index of\n\ + the end of the whole match is returned. Group 0 is the whole match."); + +PyDoc_STRVAR(match_span_doc, + "span([group1, ...]) --> 2-tuple of int or tuple of 2-tuple of ints.\n\ + Return the span (a 2-tuple of the indices of the start and end) of one or\n\ + more subgroups of the match. If there is a single argument, the result is a\n\ + span, or (-1, -1) if the group did not contribute to the match; if there are\n\ + multiple arguments, the result is a tuple with one item per argument; if\n\ + there are no arguments, the span of the whole match is returned. Group 0 is\n\ + the whole match."); + +PyDoc_STRVAR(match_groups_doc, + "groups(default=None) --> tuple of strings.\n\ + Return a tuple containing all the subgroups of the match. The argument is\n\ + the default for groups that did not participate in the match."); + +PyDoc_STRVAR(match_groupdict_doc, + "groupdict(default=None) --> dict.\n\ + Return a dictionary containing all the named subgroups of the match, keyed\n\ + by the subgroup name. The argument is the value to be given for groups that\n\ + did not participate in the match."); + +PyDoc_STRVAR(match_capturesdict_doc, + "capturesdict() --> dict.\n\ + Return a dictionary containing the captures of all the named subgroups of the\n\ + match, keyed by the subgroup name."); + +PyDoc_STRVAR(match_expand_doc, + "expand(template) --> string.\n\ + Return the string obtained by doing backslash substitution on the template,\n\ + as done by the sub() method."); + +#if PY_VERSION_HEX >= 0x02060000 +PyDoc_STRVAR(match_expandf_doc, + "expandf(format) --> string.\n\ + Return the string obtained by using the format, as done by the subf()\n\ + method."); + +#endif +PyDoc_STRVAR(match_captures_doc, + "captures([group1, ...]) --> list of strings or tuple of list of strings.\n\ + Return the captures of one or more subgroups of the match. If there is a\n\ + single argument, the result is a list of strings; if there are multiple\n\ + arguments, the result is a tuple of lists with one item per argument; if\n\ + there are no arguments, the captures of the whole match is returned. Group\n\ + 0 is the whole match."); + +PyDoc_STRVAR(match_starts_doc, + "starts([group1, ...]) --> list of ints or tuple of list of ints.\n\ + Return the indices of the starts of the captures of one or more subgroups of\n\ + the match. If there is a single argument, the result is a list of indices;\n\ + if there are multiple arguments, the result is a tuple of lists with one\n\ + item per argument; if there are no arguments, the indices of the starts of\n\ + the captures of the whole match is returned. Group 0 is the whole match."); + +PyDoc_STRVAR(match_ends_doc, + "ends([group1, ...]) --> list of ints or tuple of list of ints.\n\ + Return the indices of the ends of the captures of one or more subgroups of\n\ + the match. If there is a single argument, the result is a list of indices;\n\ + if there are multiple arguments, the result is a tuple of lists with one\n\ + item per argument; if there are no arguments, the indices of the ends of the\n\ + captures of the whole match is returned. Group 0 is the whole match."); + +PyDoc_STRVAR(match_spans_doc, + "spans([group1, ...]) --> list of 2-tuple of ints or tuple of list of 2-tuple of ints.\n\ + Return the spans (a 2-tuple of the indices of the start and end) of the\n\ + captures of one or more subgroups of the match. If there is a single\n\ + argument, the result is a list of spans; if there are multiple arguments,\n\ + the result is a tuple of lists with one item per argument; if there are no\n\ + arguments, the spans of the captures of the whole match is returned. Group\n\ + 0 is the whole match."); + +PyDoc_STRVAR(match_detach_string_doc, + "detach_string()\n\ + Detaches the target string from the match object. The 'string' attribute\n\ + will become None."); + +/* MatchObject's methods. */ +static PyMethodDef match_methods[] = { + {"group", (PyCFunction)match_group, METH_VARARGS, match_group_doc}, + {"start", (PyCFunction)match_start, METH_VARARGS, match_start_doc}, + {"end", (PyCFunction)match_end, METH_VARARGS, match_end_doc}, + {"span", (PyCFunction)match_span, METH_VARARGS, match_span_doc}, + {"groups", (PyCFunction)match_groups, METH_VARARGS|METH_KEYWORDS, + match_groups_doc}, + {"groupdict", (PyCFunction)match_groupdict, METH_VARARGS|METH_KEYWORDS, + match_groupdict_doc}, + {"capturesdict", (PyCFunction)match_capturesdict, METH_NOARGS, + match_capturesdict_doc}, + {"expand", (PyCFunction)match_expand, METH_O, match_expand_doc}, +#if PY_VERSION_HEX >= 0x02060000 + {"expandf", (PyCFunction)match_expandf, METH_O, match_expandf_doc}, +#endif + {"captures", (PyCFunction)match_captures, METH_VARARGS, + match_captures_doc}, + {"starts", (PyCFunction)match_starts, METH_VARARGS, match_starts_doc}, + {"ends", (PyCFunction)match_ends, METH_VARARGS, match_ends_doc}, + {"spans", (PyCFunction)match_spans, METH_VARARGS, match_spans_doc}, + {"detach_string", (PyCFunction)match_detach_string, METH_NOARGS, + match_detach_string_doc}, + {"__copy__", (PyCFunction)match_copy, METH_NOARGS}, + {"__deepcopy__", (PyCFunction)match_deepcopy, METH_O}, + {"__getitem__", (PyCFunction)match_getitem, METH_O|METH_COEXIST}, + {NULL, NULL} +}; + +PyDoc_STRVAR(match_doc, "Match object"); + +/* MatchObject's 'lastindex' attribute. */ +static PyObject* match_lastindex(PyObject* self_) { + MatchObject* self; + + self = (MatchObject*)self_; + + if (self->lastindex >= 0) + return Py_BuildValue("n", self->lastindex); + + Py_INCREF(Py_None); + return Py_None; +} + +/* MatchObject's 'lastgroup' attribute. */ +static PyObject* match_lastgroup(PyObject* self_) { + MatchObject* self; + + self = (MatchObject*)self_; + + if (self->pattern->indexgroup && self->lastgroup >= 0) { + PyObject* index = Py_BuildValue("n", self->lastgroup); + PyObject* result = PyDict_GetItem(self->pattern->indexgroup, index); + Py_DECREF(index); + if (result) { + Py_INCREF(result); + return result; + } + PyErr_Clear(); + } + + Py_INCREF(Py_None); + return Py_None; +} + +/* MatchObject's 'string' attribute. */ +static PyObject* match_string(PyObject* self_) { + MatchObject* self; + + self = (MatchObject*)self_; + + if (self->string) { + Py_INCREF(self->string); + return self->string; + } else { + Py_INCREF(Py_None); + return Py_None; + } +} + +static PyGetSetDef match_getset[] = { + {"lastindex", (getter)match_lastindex, (setter)NULL, + "The group number of the last matched capturing group, or None."}, + {"lastgroup", (getter)match_lastgroup, (setter)NULL, + "The name of the last matched capturing group, or None."}, + {"regs", (getter)match_regs, (setter)NULL, + "A tuple of the spans of the capturing groups."}, + {"string", (getter)match_string, (setter)NULL, + "The string that was searched, or None if it has been detached."}, + {NULL} /* Sentinel */ +}; + +static PyMemberDef match_members[] = { + {"re", T_OBJECT, offsetof(MatchObject, pattern), READONLY, + "The regex object that produced this match object."}, + {"pos", T_PYSSIZET, offsetof(MatchObject, pos), READONLY, + "The position at which the regex engine starting searching."}, + {"endpos", T_PYSSIZET, offsetof(MatchObject, endpos), READONLY, + "The final position beyond which the regex engine won't search."}, + {NULL} /* Sentinel */ +}; + +static PyMappingMethods match_as_mapping = { + (lenfunc)match_length, /* mp_length */ + (binaryfunc)match_getitem, /* mp_subscript */ + 0, /* mp_ass_subscript */ +}; + +static PyTypeObject Match_Type = { + PyObject_HEAD_INIT(NULL) + 0, + "_" RE_MODULE "." "Match", + sizeof(MatchObject) +}; + +/* Copies the groups. */ +Py_LOCAL_INLINE(RE_GroupData*) copy_groups(RE_GroupData* groups, Py_ssize_t + group_count) { + Py_ssize_t span_count; + Py_ssize_t g; + RE_GroupData* groups_copy; + RE_GroupSpan* spans_copy; + Py_ssize_t offset; + + /* Calculate the total size of the group info. */ + span_count = 0; + for (g = 0; g < group_count; g++) + span_count += groups[g].capture_count; + + /* Allocate the storage for the group info in a single block. */ + groups_copy = (RE_GroupData*)re_alloc(group_count * sizeof(RE_GroupData) + + span_count * sizeof(RE_GroupSpan)); + if (!groups_copy) + return NULL; + + /* The storage for the spans comes after the other group info. */ + spans_copy = (RE_GroupSpan*)&groups_copy[group_count]; + + /* There's no need to initialise the spans info. */ + memset(groups_copy, 0, group_count * sizeof(RE_GroupData)); + + offset = 0; + for (g = 0; g < group_count; g++) { + RE_GroupData* orig; + RE_GroupData* copy; + + orig = &groups[g]; + copy = &groups_copy[g]; + copy->span = orig->span; + + copy->captures = &spans_copy[offset]; + offset += orig->capture_count; + + if (orig->capture_count > 0) { + Py_MEMCPY(copy->captures, orig->captures, orig->capture_count * + sizeof(RE_GroupSpan)); + copy->capture_capacity = orig->capture_count; + copy->capture_count = orig->capture_count; + } + } + + return groups_copy; +} + +/* Makes a copy of a MatchObject. */ +Py_LOCAL_INLINE(PyObject*) make_match_copy(MatchObject* self) { + MatchObject* match; + + if (!self->string) { + /* The target string has been detached, so the MatchObject is now + * immutable. + */ + Py_INCREF(self); + return (PyObject*)self; + } + + /* Create a MatchObject. */ + match = PyObject_NEW(MatchObject, &Match_Type); + if (!match) + return NULL; + + match->string = self->string; + match->substring = self->substring; + match->substring_offset = self->substring_offset; + match->pattern = self->pattern; + match->regs = self->regs; + Py_INCREF(match->string); + Py_INCREF(match->substring); + Py_INCREF(match->pattern); + + /* Copy the groups to the MatchObject. */ + if (self->group_count > 0) { + match->groups = copy_groups(self->groups, self->group_count); + if (!match->groups) { + Py_DECREF(match); + return NULL; + } + } else + match->groups = NULL; + + match->group_count = self->group_count; + + match->pos = self->pos; + match->endpos = self->endpos; + + match->match_start = self->match_start; + match->match_end = self->match_end; + + match->lastindex = match->lastindex; + match->lastgroup = match->lastgroup; + + return (PyObject*)match; +} + +/* Creates a new MatchObject. */ +Py_LOCAL_INLINE(PyObject*) pattern_new_match(PatternObject* pattern, RE_State* + state, int status) { + /* Create MatchObject (from state object). */ + if (status > 0) { + MatchObject* match; + + /* Create a MatchObject. */ + match = PyObject_NEW(MatchObject, &Match_Type); + if (!match) + return NULL; + + match->string = state->string; + match->substring = state->string; + match->substring_offset = 0; + match->pattern = pattern; + match->regs = NULL; + Py_INCREF(match->string); + Py_INCREF(match->substring); + Py_INCREF(match->pattern); + + /* Copy the groups to the MatchObject. */ + if (pattern->public_group_count > 0) { + match->groups = copy_groups(state->groups, + pattern->public_group_count); + if (!match->groups) { + Py_DECREF(match); + return NULL; + } + } else + match->groups = NULL; + + match->group_count = pattern->public_group_count; + + match->pos = state->slice_start; + match->endpos = state->slice_end; + + if (state->reverse) { + match->match_start = state->text_pos; + match->match_end = state->match_pos; + } else { + match->match_start = state->match_pos; + match->match_end = state->text_pos; + } + + match->lastindex = state->lastindex; + match->lastgroup = state->lastgroup; + + return (PyObject*)match; + } else if (status == 0) { + /* No match. */ + Py_INCREF(Py_None); + return Py_None; + } else { + /* Internal error. */ + set_error(status, NULL); + return NULL; + } +} + +/* Gets the text of a capture group from a state. */ +Py_LOCAL_INLINE(PyObject*) state_get_group(RE_State* state, Py_ssize_t index, + PyObject* string, BOOL empty) { + RE_GroupData* group; + Py_ssize_t start; + Py_ssize_t end; + + group = &state->groups[index - 1]; + + if (string != Py_None && index >= 1 && index <= + (Py_ssize_t)state->pattern->public_group_count && group->capture_count > + 0) { + start = group->span.start; + end = group->span.end; + } else { + if (empty) + /* Want an empty string. */ + start = end = 0; + else { + Py_INCREF(Py_None); + return Py_None; + } + } + + return get_slice(string, start, end); +} + +/* Acquires the lock (mutex) on the state if there's one. + * + * It also increments the owner's refcount just to ensure that it won't be + * destroyed by another thread. + */ +Py_LOCAL_INLINE(void) acquire_state_lock(PyObject* owner, RE_SafeState* + safe_state) { + RE_State* state; + + state = safe_state->re_state; + + if (state->lock) { + /* In order to avoid deadlock we need to release the GIL while trying + * to acquire the lock. + */ + Py_INCREF(owner); + if (!PyThread_acquire_lock(state->lock, 0)) { + release_GIL(safe_state); + PyThread_acquire_lock(state->lock, 1); + acquire_GIL(safe_state); + } + } +} + +/* Releases the lock (mutex) on the state if there's one. + * + * It also decrements the owner's refcount, which was incremented when the lock + * was acquired. + */ +Py_LOCAL_INLINE(void) release_state_lock(PyObject* owner, RE_SafeState* + safe_state) { + RE_State* state; + + state = safe_state->re_state; + + if (state->lock) { + PyThread_release_lock(state->lock); + Py_DECREF(owner); + } +} + +/* Implements the functionality of ScanObject's search and match methods. */ +static PyObject* scanner_search_or_match(ScannerObject* self, BOOL search) { + RE_State* state; + RE_SafeState safe_state; + PyObject* match; + + state = &self->state; + + /* Initialise the "safe state" structure. */ + safe_state.re_state = state; + safe_state.thread_state = NULL; + + /* Acquire the state lock in case we're sharing the scanner object across + * threads. + */ + acquire_state_lock((PyObject*)self, &safe_state); + + if (self->status == 0) { + /* No match. */ + release_state_lock((PyObject*)self, &safe_state); + Py_INCREF(Py_None); + return Py_None; + } else if (self->status < 0) { + /* Internal error. */ + release_state_lock((PyObject*)self, &safe_state); + set_error(self->status, NULL); + return NULL; + } + + /* Look for another match. */ + self->status = do_match(&safe_state, search); + if (self->status < 0) { + /* Internal error. */ + release_state_lock((PyObject*)self, &safe_state); + return NULL; + } + + /* Create the match object. */ + match = pattern_new_match(self->pattern, state, self->status); + + if (search && state->overlapped) { + /* Advance one character. */ + Py_ssize_t step; + + step = state->reverse ? -1 : 1; + state->text_pos = state->match_pos + step; + state->must_advance = FALSE; + } else + /* Continue from where we left off, but don't allow 2 contiguous + * zero-width matches. + */ + state->must_advance = state->text_pos == state->match_pos; + + /* Release the state lock. */ + release_state_lock((PyObject*)self, &safe_state); + + return match; +} + +/* ScannerObject's 'match' method. */ +static PyObject* scanner_match(ScannerObject* self, PyObject* unused) { + return scanner_search_or_match(self, FALSE); +} + +/* ScannerObject's 'search' method. */ +static PyObject* scanner_search(ScannerObject* self, PyObject *unused) { + return scanner_search_or_match(self, TRUE); +} + +/* ScannerObject's 'next' method. */ +static PyObject* scanner_next(PyObject* self) { + PyObject* match; + + match = scanner_search((ScannerObject*)self, NULL); + + if (match == Py_None) { + /* No match. */ + Py_DECREF(Py_None); + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + return match; +} + +/* Returns an iterator for a ScannerObject. + * + * The iterator is actually the ScannerObject itself. + */ +static PyObject* scanner_iter(PyObject* self) { + Py_INCREF(self); + return self; +} + +/* Gets the next result from a scanner iterator. */ +static PyObject* scanner_iternext(PyObject* self) { + PyObject* match; + + match = scanner_search((ScannerObject*)self, NULL); + + if (match == Py_None) { + /* No match. */ + Py_DECREF(match); + return NULL; + } + + return match; +} + +/* Makes a copy of a ScannerObject. + * + * It actually doesn't make a copy, just returns the original object. + */ +Py_LOCAL_INLINE(PyObject*) make_scanner_copy(ScannerObject* self) { + Py_INCREF(self); + return (PyObject*)self; +} + +/* ScannerObject's '__copy__' method. */ +static PyObject* scanner_copy(ScannerObject* self, PyObject *unused) { + return make_scanner_copy(self); +} + +/* ScannerObject's '__deepcopy__' method. */ +static PyObject* scanner_deepcopy(ScannerObject* self, PyObject* memo) { + return make_scanner_copy(self); +} + +/* The documentation of a ScannerObject. */ +PyDoc_STRVAR(scanner_match_doc, + "match() --> MatchObject or None.\n\ + Match at the current position in the string."); + +PyDoc_STRVAR(scanner_search_doc, + "search() --> MatchObject or None.\n\ + Search from the current position in the string."); + +/* ScannerObject's methods. */ +static PyMethodDef scanner_methods[] = { + {"next", (PyCFunction)scanner_next, METH_NOARGS}, + {"match", (PyCFunction)scanner_match, METH_NOARGS, scanner_match_doc}, + {"search", (PyCFunction)scanner_search, METH_NOARGS, scanner_search_doc}, + {"__copy__", (PyCFunction)scanner_copy, METH_NOARGS}, + {"__deepcopy__", (PyCFunction)scanner_deepcopy, METH_O}, + {NULL, NULL} +}; + +PyDoc_STRVAR(scanner_doc, "Scanner object"); + +/* Deallocates a ScannerObject. */ +static void scanner_dealloc(PyObject* self_) { + ScannerObject* self; + + self = (ScannerObject*)self_; + + state_fini(&self->state); + Py_DECREF(self->pattern); + PyObject_DEL(self); +} + +static PyMemberDef scanner_members[] = { + {"pattern", T_OBJECT, offsetof(ScannerObject, pattern), READONLY, + "The regex object that produced this scanner object."}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject Scanner_Type = { + PyObject_HEAD_INIT(NULL) + 0, + "_" RE_MODULE "." "Scanner", + sizeof(ScannerObject) +}; + +/* Decodes a 'concurrent' argument. */ +Py_LOCAL_INLINE(int) decode_concurrent(PyObject* concurrent) { + Py_ssize_t value; + + if (concurrent == Py_None) + return RE_CONC_DEFAULT; + + value = PyLong_AsLong(concurrent); + if (value == -1 && PyErr_Occurred()) { + set_error(RE_ERROR_CONCURRENT, NULL); + return -1; + } + + return value ? RE_CONC_YES : RE_CONC_NO; +} + +/* Creates a new ScannerObject. */ +static PyObject* pattern_scanner(PatternObject* pattern, PyObject* args, + PyObject* kwargs) { + /* Create search state object. */ + ScannerObject* self; + Py_ssize_t start; + Py_ssize_t end; + int conc; + + PyObject* string; + PyObject* pos = Py_None; + PyObject* endpos = Py_None; + Py_ssize_t overlapped = FALSE; + PyObject* concurrent = Py_None; + static char* kwlist[] = { "string", "pos", "endpos", "overlapped", + "concurrent", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOnO:scanner", kwlist, + &string, &pos, &endpos, &overlapped, &concurrent)) + return NULL; + + start = as_string_index(pos, 0); + if (start == -1 && PyErr_Occurred()) + return NULL; + + end = as_string_index(endpos, PY_SSIZE_T_MAX); + if (end == -1 && PyErr_Occurred()) + return NULL; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + /* Create a scanner object. */ + self = PyObject_NEW(ScannerObject, &Scanner_Type); + if (!self) + return NULL; + + self->pattern = pattern; + Py_INCREF(self->pattern); + + /* The MatchObject, and therefore repeated captures, will be visible. */ + if (!state_init(&self->state, pattern, string, start, end, overlapped != 0, + conc, TRUE, TRUE, FALSE)) { + PyObject_DEL(self); + return NULL; + } + + self->status = 1; + + return (PyObject*) self; +} + +/* Performs the split for the SplitterObject. */ +static PyObject* next_split_part(SplitterObject* self) { + RE_State* state; + RE_SafeState safe_state; + PyObject* result = NULL; /* Initialise to stop compiler warning. */ + + state = &self->state; + + /* Initialise the "safe state" structure. */ + safe_state.re_state = state; + safe_state.thread_state = NULL; + + /* Acquire the state lock in case we're sharing the splitter object across + * threads. + */ + acquire_state_lock((PyObject*)self, &safe_state); + + if (self->status == 0) { + /* Finished. */ + release_state_lock((PyObject*)self, &safe_state); + result = Py_False; + Py_INCREF(result); + return result; + } else if (self->status < 0) { + /* Internal error. */ + release_state_lock((PyObject*)self, &safe_state); + set_error(self->status, NULL); + return NULL; + } + + if (self->index == 0) { + if (self->split_count < self->maxsplit) { + Py_ssize_t step; + Py_ssize_t end_pos; + + if (state->reverse) { + step = -1; + end_pos = state->slice_start; + } else { + step = 1; + end_pos = state->slice_end; + } + +retry: + self->status = do_match(&safe_state, TRUE); + if (self->status < 0) + goto error; + + if (self->status == RE_ERROR_SUCCESS) { + if (!state->zero_width) { + /* The current behaviour is to advance one character if the + * split was zero-width. Unfortunately, this can give an + * incorrect result. GvR wants this behaviour to be + * retained so as not to break any existing software which + * might rely on it. The correct behaviour is enabled by + * setting the 'new' flag. + */ + if (state->text_pos == state->match_pos) { + if (self->last_pos == end_pos) + goto no_match; + + /* Advance one character. */ + state->text_pos += step; + state->must_advance = FALSE; + goto retry; + } + } + + ++self->split_count; + + /* Get segment before this match. */ + if (state->reverse) + result = get_slice(state->string, state->match_pos, + self->last_pos); + else + result = get_slice(state->string, self->last_pos, + state->match_pos); + if (!result) + goto error; + + self->last_pos = state->text_pos; + + /* The correct behaviour is to reject a zero-width match just + * after a split point. The current behaviour is to advance one + * character if the match was zero-width. Unfortunately, this + * can give an incorrect result. GvR wants this behaviour to be + * retained so as not to break any existing software which + * might rely on it. The correct behaviour is enabled by + * setting the 'new' flag. + */ + if (state->zero_width) + /* Continue from where we left off, but don't allow a + * contiguous zero-width match. + */ + state->must_advance = TRUE; + else { + if (state->text_pos == state->match_pos) + /* Advance one character. */ + state->text_pos += step; + + state->must_advance = FALSE; + } + } + } else + goto no_match; + + if (self->status == RE_ERROR_FAILURE) { +no_match: + /* Get segment following last match (even if empty). */ + if (state->reverse) + result = get_slice(state->string, 0, self->last_pos); + else + result = get_slice(state->string, self->last_pos, + state->text_length); + if (!result) + goto error; + } + } else { + /* Add group. */ + result = state_get_group(state, self->index, state->string, FALSE); + if (!result) + goto error; + } + + ++self->index; + if (self->index > (Py_ssize_t)state->pattern->public_group_count) + self->index = 0; + + /* Release the state lock. */ + release_state_lock((PyObject*)self, &safe_state); + + return result; + +error: + /* Release the state lock. */ + release_state_lock((PyObject*)self, &safe_state); + + return NULL; +} + +/* SplitterObject's 'split' method. */ +static PyObject* splitter_split(SplitterObject* self, PyObject *unused) { + PyObject* result; + + result = next_split_part(self); + + if (result == Py_False) { + /* The sentinel. */ + Py_DECREF(Py_False); + Py_INCREF(Py_None); + return Py_None; + } + + return result; +} + +/* SplitterObject's 'next' method. */ +static PyObject* splitter_next(PyObject* self) { + PyObject* result; + + result = next_split_part((SplitterObject*)self); + + if (result == Py_False) { + /* No match. */ + Py_DECREF(Py_False); + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + return result; +} + +/* Returns an iterator for a SplitterObject. + * + * The iterator is actually the SplitterObject itself. + */ +static PyObject* splitter_iter(PyObject* self) { + Py_INCREF(self); + return self; +} + +/* Gets the next result from a splitter iterator. */ +static PyObject* splitter_iternext(PyObject* self) { + PyObject* result; + + result = next_split_part((SplitterObject*)self); + + if (result == Py_False) { + /* No match. */ + Py_DECREF(result); + return NULL; + } + + return result; +} + +/* Makes a copy of a SplitterObject. + * + * It actually doesn't make a copy, just returns the original object. + */ +Py_LOCAL_INLINE(PyObject*) make_splitter_copy(SplitterObject* self) { + Py_INCREF(self); + return (PyObject*)self; +} + +/* SplitterObject's '__copy__' method. */ +static PyObject* splitter_copy(SplitterObject* self, PyObject *unused) { + return make_splitter_copy(self); +} + +/* SplitterObject's '__deepcopy__' method. */ +static PyObject* splitter_deepcopy(SplitterObject* self, PyObject* memo) { + return make_splitter_copy(self); +} + +/* The documentation of a SplitterObject. */ +PyDoc_STRVAR(splitter_split_doc, + "split() --> string or None.\n\ + Return the next part of the split string."); + +/* SplitterObject's methods. */ +static PyMethodDef splitter_methods[] = { + {"next", (PyCFunction)splitter_next, METH_NOARGS}, + {"split", (PyCFunction)splitter_split, METH_NOARGS, splitter_split_doc}, + {"__copy__", (PyCFunction)splitter_copy, METH_NOARGS}, + {"__deepcopy__", (PyCFunction)splitter_deepcopy, METH_O}, + {NULL, NULL} +}; + +PyDoc_STRVAR(splitter_doc, "Splitter object"); + +/* Deallocates a SplitterObject. */ +static void splitter_dealloc(PyObject* self_) { + SplitterObject* self; + + self = (SplitterObject*)self_; + + state_fini(&self->state); + Py_DECREF(self->pattern); + PyObject_DEL(self); +} + +static PyMemberDef splitter_members[] = { + {"pattern", T_OBJECT, offsetof(SplitterObject, pattern), READONLY, + "The regex object that produced this splitter object."}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject Splitter_Type = { + PyObject_HEAD_INIT(NULL) + 0, + "_" RE_MODULE "." "Splitter", + sizeof(SplitterObject) +}; + +/* Creates a new SplitterObject. */ +static PyObject* pattern_splitter(PatternObject* pattern, PyObject* args, + PyObject* kwargs) { + /* Create split state object. */ + int conc; + SplitterObject* self; + RE_State* state; + + PyObject* string; + Py_ssize_t maxsplit = 0; + PyObject* concurrent = Py_None; + static char* kwlist[] = { "string", "maxsplit", "concurrent", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nO:splitter", kwlist, + &string, &maxsplit, &concurrent)) + return NULL; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + /* Create a splitter object. */ + self = PyObject_NEW(SplitterObject, &Splitter_Type); + if (!self) + return NULL; + + self->pattern = pattern; + Py_INCREF(self->pattern); + + if (maxsplit == 0) + maxsplit = PY_SSIZE_T_MAX; + + state = &self->state; + + /* The MatchObject, and therefore repeated captures, will not be visible. + */ + if (!state_init(state, pattern, string, 0, PY_SSIZE_T_MAX, FALSE, conc, + TRUE, FALSE, FALSE)) { + PyObject_DEL(self); + return NULL; + } + + self->maxsplit = maxsplit; + self->last_pos = state->reverse ? state->text_length : 0; + self->split_count = 0; + self->index = 0; + self->status = 1; + + return (PyObject*) self; +} + +/* Implements the functionality of PatternObject's search and match methods. */ +static PyObject* pattern_search_or_match(PatternObject* self, PyObject* args, + PyObject* kwargs, char* args_desc, BOOL search, BOOL match_all) { + Py_ssize_t start; + Py_ssize_t end; + int conc; + RE_State state; + RE_SafeState safe_state; + int status; + PyObject* match; + + PyObject* string; + PyObject* pos = Py_None; + PyObject* endpos = Py_None; + PyObject* concurrent = Py_None; + static char* kwlist[] = { "string", "pos", "endpos", "concurrent", NULL }; + /* When working with a short string, such as a line from a file, the + * relative cost of PyArg_ParseTupleAndKeywords can be significant, and + * it's worth not using it when there are only positional arguments. + */ + Py_ssize_t arg_count; + if (args && !kwargs && PyTuple_CheckExact(args)) + arg_count = PyTuple_GET_SIZE(args); + else + arg_count = -1; + + if (1 <= arg_count && arg_count <= 4) { + string = PyTuple_GET_ITEM(args, 0); + if (arg_count >= 2) + pos = PyTuple_GET_ITEM(args, 1); + if (arg_count >= 3) + endpos = PyTuple_GET_ITEM(args, 2); + if (arg_count >= 4) + concurrent = PyTuple_GET_ITEM(args, 3); + } else if (!PyArg_ParseTupleAndKeywords(args, kwargs, args_desc, kwlist, + &string, &pos, &endpos, &concurrent)) + return NULL; + + start = as_string_index(pos, 0); + if (start == -1 && PyErr_Occurred()) + return NULL; + + end = as_string_index(endpos, PY_SSIZE_T_MAX); + if (end == -1 && PyErr_Occurred()) + return NULL; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + /* The MatchObject, and therefore repeated captures, will be visible. */ + if (!state_init(&state, self, string, start, end, FALSE, conc, FALSE, TRUE, + match_all)) + return NULL; + + /* Initialise the "safe state" structure. */ + safe_state.re_state = &state; + safe_state.thread_state = NULL; + + status = do_match(&safe_state, search); + if (status < 0) { + state_fini(&state); + return NULL; + } + + /* Create the match object. */ + match = pattern_new_match(self, &state, status); + + state_fini(&state); + + return match; +} + +/* PatternObject's 'match' method. */ +static PyObject* pattern_match(PatternObject* self, PyObject* args, PyObject* + kwargs) { + return pattern_search_or_match(self, args, kwargs, "O|OOO:match", FALSE, + FALSE); +} + +/* PatternObject's 'fullmatch' method. */ +static PyObject* pattern_fullmatch(PatternObject* self, PyObject* args, + PyObject* kwargs) { + return pattern_search_or_match(self, args, kwargs, "O|OOO:fullmatch", + FALSE, TRUE); +} + +/* PatternObject's 'search' method. */ +static PyObject* pattern_search(PatternObject* self, PyObject* args, PyObject* + kwargs) { + return pattern_search_or_match(self, args, kwargs, "O|OOO:search", TRUE, + FALSE); +} + +/* Gets the limits of the matching. */ +Py_LOCAL_INLINE(BOOL) get_limits(PyObject* pos, PyObject* endpos, Py_ssize_t + length, Py_ssize_t* start, Py_ssize_t* end) { + Py_ssize_t s; + Py_ssize_t e; + + s = as_string_index(pos, 0); + if (s == -1 && PyErr_Occurred()) + return FALSE; + + e = as_string_index(endpos, PY_SSIZE_T_MAX); + if (e == -1 && PyErr_Occurred()) + return FALSE; + + /* Adjust boundaries. */ + if (s < 0) + s += length; + if (s < 0) + s = 0; + else if (s > length) + s = length; + + if (e < 0) + e += length; + if (e < 0) + e = 0; + else if (e > length) + e = length; + + *start = s; + *end = e; + + return TRUE; +} + +/* Gets a replacement item from the replacement list. + * + * The replacement item could be a string literal or a group. + * + * It can return None to represent an empty string. + */ +Py_LOCAL_INLINE(PyObject*) get_sub_replacement(PyObject* item, PyObject* + string, RE_State* state, Py_ssize_t group_count) { + Py_ssize_t index; + + if (PyUnicode_CheckExact(item) || PyString_CheckExact(item)) { + /* It's a literal, which can be added directly to the list. */ + Py_INCREF(item); + return item; + } + + /* Is it a group reference? */ + index = as_group_index(item); + if (index == -1 && PyErr_Occurred()) { + /* Not a group either! */ + set_error(RE_ERROR_REPLACEMENT, NULL); + return NULL; + } + + if (index == 0) { + /* The entire matched portion of the string. */ + if (state->match_pos == state->text_pos) { + /* Return None for "". */ + Py_INCREF(Py_None); + return Py_None; + } + + if (state->reverse) + return get_slice(string, state->text_pos, state->match_pos); + else + return get_slice(string, state->match_pos, state->text_pos); + } else if (1 <= index && index <= group_count) { + /* A group. */ + RE_GroupData* group; + + group = &state->groups[index - 1]; + + if (group->capture_count == 0 && group->span.start != group->span.end) + { + /* The group didn't match or is "", so return None for "". */ + Py_INCREF(Py_None); + return Py_None; + } + + return get_slice(string, group->span.start, group->span.end); + } else { + /* No such group. */ + set_error(RE_ERROR_INVALID_GROUP_REF, NULL); + return NULL; + } +} + +/* PatternObject's 'subx' method. */ +Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* + str_template, PyObject* string, Py_ssize_t maxsub, int sub_type, PyObject* + pos, PyObject* endpos, int concurrent) { + RE_StringInfo str_info; + Py_ssize_t start; + Py_ssize_t end; + BOOL is_callable = FALSE; + BOOL is_literal = FALSE; + BOOL is_template = FALSE; + PyObject* replacement = NULL; +#if PY_VERSION_HEX >= 0x02060000 + BOOL is_format = FALSE; +#endif + RE_State state; + RE_SafeState safe_state; + JoinInfo join_info; + Py_ssize_t sub_count; + Py_ssize_t last_pos; + PyObject* item; + Py_ssize_t end_pos; + + /* Get the string. */ + if (!get_string(string, &str_info)) + return NULL; + + /* Get the limits of the search. */ + if (!get_limits(pos, endpos, str_info.length, &start, &end)) { +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return NULL; + } + + /* If the pattern is too long for the string, then take a shortcut, unless + * it's a fuzzy pattern. + */ + if (!self->is_fuzzy && (Py_ssize_t)self->min_width > end - start) { + PyObject* result; + + Py_INCREF(string); + + if (sub_type & RE_SUBN) + result = Py_BuildValue("Nn", string, 0); + else + result = string; + +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return result; + } + + if (maxsub == 0) + maxsub = PY_SSIZE_T_MAX; + + /* sub/subn takes either a function or a string template. */ + if (PyCallable_Check(str_template)) { + /* It's callable. */ + is_callable = TRUE; + + replacement = str_template; + Py_INCREF(replacement); +#if PY_VERSION_HEX >= 0x02060000 + } else if (sub_type & RE_SUBF) { + /* Is it a literal format? + * + * To keep it simple we'll say that a literal is a string which can be + * used as-is, so no placeholders. + */ + Py_ssize_t literal_length; + + literal_length = check_replacement_string(str_template, '{'); + if (literal_length > 0) { + /* It's a literal. */ + is_literal = TRUE; + + replacement = str_template; + Py_INCREF(replacement); + } else if (literal_length < 0) { + /* It isn't a literal, so get the 'format' method. */ + is_format = TRUE; + + replacement = PyObject_GetAttrString(str_template, "format"); + if (!replacement) { + release_buffer(&str_info); + return NULL; + } + } +#endif + } else { + /* Is it a literal template? + * + * To keep it simple we'll say that a literal is a string which can be + * used as-is, so no backslashes. + */ + Py_ssize_t literal_length; + + literal_length = check_replacement_string(str_template, '\\'); + if (literal_length > 0) { + /* It's a literal. */ + is_literal = TRUE; + + replacement = str_template; + Py_INCREF(replacement); + } else if (literal_length < 0 ) { + /* It isn't a literal, so hand it over to the template compiler. */ + is_template = TRUE; + + replacement = call(RE_MODULE, "_compile_replacement_helper", + PyTuple_Pack(2, self, str_template)); + if (!replacement) { +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return NULL; + } + } + } + + /* The MatchObject, and therefore repeated captures, will be visible only + * if the replacement is callable. + */ + if (!state_init_2(&state, self, string, &str_info, start, end, FALSE, + concurrent, FALSE, is_callable, FALSE)) { +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + Py_XDECREF(replacement); + return NULL; + } + + /* Initialise the "safe state" structure. */ + safe_state.re_state = &state; + safe_state.thread_state = NULL; + + join_info.item = NULL; + join_info.list = NULL; + join_info.reversed = state.reverse; + join_info.is_unicode = PyUnicode_Check(string); + + sub_count = 0; + last_pos = state.reverse ? state.text_length : 0; + while (sub_count < maxsub) { + int status; + + status = do_match(&safe_state, TRUE); + if (status < 0) + goto error; + + if (status == 0) + break; + + /* Append the segment before this match. */ + if (state.match_pos != last_pos) { + if (state.reverse) + item = get_slice(string, state.match_pos, last_pos); + else + item = get_slice(string, last_pos, state.match_pos); + if (!item) + goto error; + + /* Add to the list. */ + status = add_item(&join_info, item); + Py_DECREF(item); + if (status < 0) + goto error; + } + + /* Add this match. */ + if (is_literal) { + /* The replacement is a literal string. */ + status = add_item(&join_info, replacement); + if (status < 0) + goto error; +#if PY_VERSION_HEX >= 0x02060000 + } else if (is_format) { + /* The replacement is a format string. */ + MatchObject* match; + PyObject* args; + size_t g; + PyObject* kwargs; + + /* We need to create the arguments for the 'format' method. We'll + * start by creating a MatchObject. + */ + match = (MatchObject*)pattern_new_match(self, &state, 1); + if (!match) + goto error; + + /* The args are a tuple of the capture group matches. */ + args = PyTuple_New(state.pattern->public_group_count + 1); + if (!args) { + Py_DECREF(match); + goto error; + } + + for (g = 0; g < state.pattern->public_group_count + 1; g++) + PyTuple_SetItem(args, g, match_get_group_by_index(match, g, + Py_None)); + + /* The kwargs are a dict of the named capture group matches. */ + kwargs = match_get_group_dict(match); + if (!kwargs) { + Py_DECREF(args); + Py_DECREF(match); + goto error; + } + + /* Call the 'format' method. */ + item = PyObject_Call(replacement, args, kwargs); + Py_DECREF(kwargs); + Py_DECREF(args); + Py_DECREF(match); + if (!item) + goto error; + + /* Add the result to the list. */ + status = add_item(&join_info, item); + Py_DECREF(item); + if (status < 0) + goto error; +#endif + } else if (is_template) { + /* The replacement is a list template. */ + Py_ssize_t size; + Py_ssize_t i; + + /* Add each part of the template to the list. */ + size = PyList_GET_SIZE(replacement); + for (i = 0; i < size; i++) { + PyObject* item; + PyObject* str_item; + + item = PyList_GET_ITEM(replacement, i); + str_item = get_sub_replacement(item, string, &state, + self->public_group_count); + if (!str_item) + goto error; + + /* Add the result to the list. */ + if (str_item == Py_None) + /* None for "". */ + Py_DECREF(str_item); + else { + status = add_item(&join_info, str_item); + Py_DECREF(str_item); + if (status < 0) + goto error; + } + } + } else if (is_callable) { + /* Pass a MatchObject to the replacement function. */ + PyObject* match; + PyObject* args; + + /* We need to create a MatchObject to pass to the replacement + * function. + */ + match = pattern_new_match(self, &state, 1); + if (!match) + goto error; + + /* The args for the replacement function. */ + args = PyTuple_Pack(1, match); + if (!args) { + Py_DECREF(match); + goto error; + } + + /* Call the replacement function. */ + item = PyObject_CallObject(replacement, args); + Py_DECREF(args); + Py_DECREF(match); + if (!item) + goto error; + + /* Add the result to the list. */ + status = add_item(&join_info, item); + Py_DECREF(item); + if (status < 0) + goto error; + } + + ++sub_count; + + /* Continue from where we left off, but don't allow 2 contiguous + * zero-width matches. + */ + state.must_advance = state.match_pos == state.text_pos; + last_pos = state.text_pos; + } + + /* Get the segment following the last match. We use 'length' instead of + * 'text_length' because the latter is truncated to 'slice_end', a + * documented idiosyncracy of the 're' module. + */ + end_pos = state.reverse ? 0 : str_info.length; + if (last_pos != end_pos) { + int status; + + /* The segment is part of the original string. */ + if (state.reverse) + item = get_slice(string, 0, last_pos); + else + item = get_slice(string, last_pos, str_info.length); + if (!item) + goto error; + status = add_item(&join_info, item); + Py_DECREF(item); + if (status < 0) + goto error; + } + + Py_XDECREF(replacement); + + /* Convert the list to a single string (also cleans up join_info). */ + item = join_list_info(&join_info); + + state_fini(&state); + + if (!item) + return NULL; + + if (sub_type & RE_SUBN) + return Py_BuildValue("Nn", item, sub_count); + + return item; + +error: + Py_XDECREF(join_info.list); + Py_XDECREF(join_info.item); + state_fini(&state); + Py_XDECREF(replacement); + return NULL; +} + +/* PatternObject's 'sub' method. */ +static PyObject* pattern_sub(PatternObject* self, PyObject* args, PyObject* + kwargs) { + int conc; + + PyObject* replacement; + PyObject* string; + Py_ssize_t count = 0; + PyObject* pos = Py_None; + PyObject* endpos = Py_None; + PyObject* concurrent = Py_None; + static char* kwlist[] = { "repl", "string", "count", "pos", "endpos", + "concurrent", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nOOO:sub", kwlist, + &replacement, &string, &count, &pos, &endpos, &concurrent)) + return NULL; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + return pattern_subx(self, replacement, string, count, RE_SUB, pos, endpos, + conc); +} + +#if PY_VERSION_HEX >= 0x02060000 +/* PatternObject's 'subf' method. */ +static PyObject* pattern_subf(PatternObject* self, PyObject* args, PyObject* + kwargs) + { + int conc; + + PyObject* format; + PyObject* string; + Py_ssize_t count = 0; + PyObject* pos = Py_None; + PyObject* endpos = Py_None; + PyObject* concurrent = Py_None; + static char* kwlist[] = { "format", "string", "count", "pos", "endpos", + "concurrent", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nOOO:sub", kwlist, + &format, &string, &count, &pos, &endpos, &concurrent)) + return NULL; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + return pattern_subx(self, format, string, count, RE_SUBF, pos, endpos, + conc); +} + +#endif +/* PatternObject's 'subn' method. */ +static PyObject* pattern_subn(PatternObject* self, PyObject* args, PyObject* + kwargs) { + int conc; + + PyObject* replacement; + PyObject* string; + Py_ssize_t count = 0; + PyObject* pos = Py_None; + PyObject* endpos = Py_None; + PyObject* concurrent = Py_None; + static char* kwlist[] = { "repl", "string", "count", "pos", "endpos", + "concurrent", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nOOO:subn", kwlist, + &replacement, &string, &count, &pos, &endpos, &concurrent)) + return NULL; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + return pattern_subx(self, replacement, string, count, RE_SUBN, pos, endpos, + conc); +} + +#if PY_VERSION_HEX >= 0x02060000 +/* PatternObject's 'subfn' method. */ +static PyObject* pattern_subfn(PatternObject* self, PyObject* args, PyObject* + kwargs) { + int conc; + + PyObject* format; + PyObject* string; + Py_ssize_t count = 0; + PyObject* pos = Py_None; + PyObject* endpos = Py_None; + PyObject* concurrent = Py_None; + static char* kwlist[] = { "format", "string", "count", "pos", "endpos", + "concurrent", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nOOO:subn", kwlist, + &format, &string, &count, &pos, &endpos, &concurrent)) + return NULL; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + return pattern_subx(self, format, string, count, RE_SUBF | RE_SUBN, pos, + endpos, conc); +} + +#endif +/* PatternObject's 'split' method. */ +static PyObject* pattern_split(PatternObject* self, PyObject* args, PyObject* + kwargs) { + int conc; + + RE_State state; + RE_SafeState safe_state; + PyObject* list; + PyObject* item; + int status; + Py_ssize_t split_count; + size_t g; + Py_ssize_t start_pos; + Py_ssize_t end_pos; + Py_ssize_t step; + Py_ssize_t last_pos; + + PyObject* string; + Py_ssize_t maxsplit = 0; + PyObject* concurrent = Py_None; + static char* kwlist[] = { "string", "maxsplit", "concurrent", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nO:split", kwlist, + &string, &maxsplit, &concurrent)) + return NULL; + + if (maxsplit == 0) + maxsplit = PY_SSIZE_T_MAX; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + /* The MatchObject, and therefore repeated captures, will not be visible. + */ + if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX, FALSE, conc, + FALSE, FALSE, FALSE)) + return NULL; + + /* Initialise the "safe state" structure. */ + safe_state.re_state = &state; + safe_state.thread_state = NULL; + + list = PyList_New(0); + if (!list) { + state_fini(&state); + return NULL; + } + + split_count = 0; + if (state.reverse) { + start_pos = state.text_length; + end_pos = 0; + step = -1; + } else { + start_pos = 0; + end_pos = state.text_length; + step = 1; + } + + last_pos = start_pos; + while (split_count < maxsplit) { + status = do_match(&safe_state, TRUE); + if (status < 0) + goto error; + + if (status == 0) + /* No more matches. */ + break; + + if (!state.zero_width) { + /* The current behaviour is to advance one character if the split + * was zero-width. Unfortunately, this can give an incorrect + * result. GvR wants this behaviour to be retained so as not to + * break any existing software which might rely on it. The correct + * behaviour is enabled by setting the 'new' flag. + */ + if (state.text_pos == state.match_pos) { + if (last_pos == end_pos) + break; + + /* Advance one character. */ + state.text_pos += step; + state.must_advance = FALSE; + continue; + } + } + + /* Get segment before this match. */ + if (state.reverse) + item = get_slice(string, state.match_pos, last_pos); + else + item = get_slice(string, last_pos, state.match_pos); + if (!item) + goto error; + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + + /* Add groups (if any). */ + for (g = 1; g <= self->public_group_count; g++) { + item = state_get_group(&state, g, string, FALSE); + if (!item) + goto error; + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + } + + ++split_count; + last_pos = state.text_pos; + + /* The correct behaviour is to reject a zero-width match just after a + * split point. The current behaviour is to advance one character if + * the match was zero-width. Unfortunately, this can give an incorrect + * result. GvR wants this behaviour to be retained so as not to break + * any existing software which might rely on it. The correct behaviour + * is enabled by setting the 'new' flag. + */ + if (state.zero_width) + /* Continue from where we left off, but don't allow a contiguous + * zero-width match. + */ + state.must_advance = TRUE; + else { + if (state.text_pos == state.match_pos) + /* Advance one character. */ + state.text_pos += step; + + state.must_advance = FALSE; + } + } + + /* Get segment following last match (even if empty). */ + if (state.reverse) + item = get_slice(string, 0, last_pos); + else + item = get_slice(string, last_pos, state.text_length); + if (!item) + goto error; + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + + state_fini(&state); + + return list; + +error: + Py_DECREF(list); + state_fini(&state); + return NULL; +} + +/* PatternObject's 'splititer' method. */ +static PyObject* pattern_splititer(PatternObject* pattern, PyObject* args, + PyObject* kwargs) { + return pattern_splitter(pattern, args, kwargs); +} + +/* PatternObject's 'findall' method. */ +static PyObject* pattern_findall(PatternObject* self, PyObject* args, PyObject* + kwargs) { + Py_ssize_t start; + Py_ssize_t end; + RE_State state; + int conc; + RE_SafeState safe_state; + PyObject* list; + Py_ssize_t step; + int status; + size_t g; + Py_ssize_t b; + Py_ssize_t e; + + PyObject* string; + PyObject* pos = Py_None; + PyObject* endpos = Py_None; + Py_ssize_t overlapped = FALSE; + PyObject* concurrent = Py_None; + static char* kwlist[] = { "string", "pos", "endpos", "overlapped", + "concurrent", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOnO:findall", kwlist, + &string, &pos, &endpos, &overlapped, &concurrent)) + return NULL; + + start = as_string_index(pos, 0); + if (start == -1 && PyErr_Occurred()) + return NULL; + + end = as_string_index(endpos, PY_SSIZE_T_MAX); + if (end == -1 && PyErr_Occurred()) + return NULL; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + /* The MatchObject, and therefore repeated captures, will not be visible. + */ + if (!state_init(&state, self, string, start, end, overlapped != 0, conc, + FALSE, FALSE, FALSE)) + return NULL; + + /* Initialise the "safe state" structure. */ + safe_state.re_state = &state; + safe_state.thread_state = NULL; + + list = PyList_New(0); + if (!list) { + state_fini(&state); + return NULL; + } + + step = state.reverse ? -1 : 1; + while (state.slice_start <= state.text_pos && state.text_pos <= + state.slice_end) { + PyObject* item; + + status = do_match(&safe_state, TRUE); + if (status < 0) + goto error; + + if (status == 0) + break; + + /* Don't bother to build a MatchObject. */ + switch (self->public_group_count) { + case 0: + if (state.reverse) { + b = state.text_pos; + e = state.match_pos; + } else { + b = state.match_pos; + e = state.text_pos; + } + item = get_slice(string, b, e); + if (!item) + goto error; + break; + case 1: + item = state_get_group(&state, 1, string, TRUE); + if (!item) + goto error; + break; + default: + item = PyTuple_New(self->public_group_count); + if (!item) + goto error; + for (g = 0; g < self->public_group_count; g++) { + PyObject* o = state_get_group(&state, g + 1, string, TRUE); + if (!o) { + Py_DECREF(item); + goto error; + } + PyTuple_SET_ITEM(item, g, o); + } + break; + } + + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + + if (state.overlapped) { + /* Advance one character. */ + state.text_pos = state.match_pos + step; + state.must_advance = FALSE; + } else + /* Continue from where we left off, but don't allow 2 contiguous + * zero-width matches. + */ + state.must_advance = state.text_pos == state.match_pos; + } + + state_fini(&state); + + return list; + +error: + Py_DECREF(list); + state_fini(&state); + return NULL; +} + +/* PatternObject's 'finditer' method. */ +static PyObject* pattern_finditer(PatternObject* pattern, PyObject* args, + PyObject* kwargs) { + return pattern_scanner(pattern, args, kwargs); +} + +/* Makes a copy of a PatternObject. */ +Py_LOCAL_INLINE(PyObject*) make_pattern_copy(PatternObject* self) { + Py_INCREF(self); + return (PyObject*)self; +} + +/* PatternObject's '__copy__' method. */ +static PyObject* pattern_copy(PatternObject* self, PyObject *unused) { + return make_pattern_copy(self); +} + +/* PatternObject's '__deepcopy__' method. */ +static PyObject* pattern_deepcopy(PatternObject* self, PyObject* memo) { + return make_pattern_copy(self); +} + +/* The documentation of a PatternObject. */ +PyDoc_STRVAR(pattern_match_doc, + "match(string, pos=None, endpos=None, concurrent=None) --> MatchObject or None.\n\ + Match zero or more characters at the beginning of the string."); + +PyDoc_STRVAR(pattern_fullmatch_doc, + "fullmatch(string, pos=None, endpos=None, concurrent=None) --> MatchObject or None.\n\ + Match zero or more characters against all of the string."); + +PyDoc_STRVAR(pattern_search_doc, + "search(string, pos=None, endpos=None, concurrent=None) --> MatchObject or None.\n\ + Search through string looking for a match, and return a corresponding\n\ + match object instance. Return None if no match is found."); + +PyDoc_STRVAR(pattern_sub_doc, + "sub(repl, string, count=0, flags=0, pos=None, endpos=None, concurrent=None) --> newstring\n\ + Return the string obtained by replacing the leftmost (or rightmost with a\n\ + reverse pattern) non-overlapping occurrences of pattern in string by the\n\ + replacement repl."); + +#if PY_VERSION_HEX >= 0x02060000 +PyDoc_STRVAR(pattern_subf_doc, + "subf(format, string, count=0, flags=0, pos=None, endpos=None, concurrent=None) --> newstring\n\ + Return the string obtained by replacing the leftmost (or rightmost with a\n\ + reverse pattern) non-overlapping occurrences of pattern in string by the\n\ + replacement format."); + +#endif +PyDoc_STRVAR(pattern_subn_doc, + "subn(repl, string, count=0, flags=0, pos=None, endpos=None, concurrent=None) --> (newstring, number of subs)\n\ + Return the tuple (new_string, number_of_subs_made) found by replacing the\n\ + leftmost (or rightmost with a reverse pattern) non-overlapping occurrences\n\ + of pattern with the replacement repl."); + +#if PY_VERSION_HEX >= 0x02060000 +PyDoc_STRVAR(pattern_subfn_doc, + "subfn(format, string, count=0, flags=0, pos=None, endpos=None, concurrent=None) --> (newstring, number of subs)\n\ + Return the tuple (new_string, number_of_subs_made) found by replacing the\n\ + leftmost (or rightmost with a reverse pattern) non-overlapping occurrences\n\ + of pattern with the replacement format."); + +#endif +PyDoc_STRVAR(pattern_split_doc, + "split(string, string, maxsplit=0, concurrent=None) --> list.\n\ + Split string by the occurrences of pattern."); + +PyDoc_STRVAR(pattern_splititer_doc, + "splititer(string, maxsplit=0, concurrent=None) --> iterator.\n\ + Return an iterator yielding the parts of a split string."); + +PyDoc_STRVAR(pattern_findall_doc, + "findall(string, pos=None, endpos=None, overlapped=False, concurrent=None) --> list.\n\ + Return a list of all matches of pattern in string. The matches may be\n\ + overlapped if overlapped is True."); + +PyDoc_STRVAR(pattern_finditer_doc, + "finditer(string, pos=None, endpos=None, overlapped=False, concurrent=None) --> iterator.\n\ + Return an iterator over all matches for the RE pattern in string. The\n\ + matches may be overlapped if overlapped is True. For each match, the\n\ + iterator returns a MatchObject."); + +PyDoc_STRVAR(pattern_scanner_doc, + "scanner(string, pos=None, endpos=None, overlapped=False, concurrent=None) --> scanner.\n\ + Return an scanner for the RE pattern in string. The matches may be overlapped\n\ + if overlapped is True."); + +/* The methods of a PatternObject. */ +static PyMethodDef pattern_methods[] = { + {"match", (PyCFunction)pattern_match, METH_VARARGS|METH_KEYWORDS, + pattern_match_doc}, + {"fullmatch", (PyCFunction)pattern_fullmatch, METH_VARARGS|METH_KEYWORDS, + pattern_fullmatch_doc}, + {"search", (PyCFunction)pattern_search, METH_VARARGS|METH_KEYWORDS, + pattern_search_doc}, + {"sub", (PyCFunction)pattern_sub, METH_VARARGS|METH_KEYWORDS, + pattern_sub_doc}, +#if PY_VERSION_HEX >= 0x02060000 + {"subf", (PyCFunction)pattern_subf, METH_VARARGS|METH_KEYWORDS, + pattern_subf_doc}, +#endif + {"subn", (PyCFunction)pattern_subn, METH_VARARGS|METH_KEYWORDS, + pattern_subn_doc}, +#if PY_VERSION_HEX >= 0x02060000 + {"subfn", (PyCFunction)pattern_subfn, METH_VARARGS|METH_KEYWORDS, + pattern_subfn_doc}, +#endif + {"split", (PyCFunction)pattern_split, METH_VARARGS|METH_KEYWORDS, + pattern_split_doc}, + {"splititer", (PyCFunction)pattern_splititer, METH_VARARGS|METH_KEYWORDS, + pattern_splititer_doc}, + {"findall", (PyCFunction)pattern_findall, METH_VARARGS|METH_KEYWORDS, + pattern_findall_doc}, + {"finditer", (PyCFunction)pattern_finditer, METH_VARARGS|METH_KEYWORDS, + pattern_finditer_doc}, + {"scanner", (PyCFunction)pattern_scanner, METH_VARARGS|METH_KEYWORDS, + pattern_scanner_doc}, + {"__copy__", (PyCFunction)pattern_copy, METH_NOARGS}, + {"__deepcopy__", (PyCFunction)pattern_deepcopy, METH_O}, + {NULL, NULL} +}; + +PyDoc_STRVAR(pattern_doc, "Compiled regex object"); + +/* Deallocates a PatternObject. */ +static void pattern_dealloc(PyObject* self_) { + PatternObject* self; + size_t i; + + self = (PatternObject*)self_; + + /* Discard the nodes. */ + for (i = 0; i < self->node_count; i++) { + RE_Node* node; + + node = self->node_list[i]; + re_dealloc(node->values); + if (node->status & RE_STATUS_STRING) { + re_dealloc(node->string.bad_character_offset); + re_dealloc(node->string.good_suffix_offset); + } + re_dealloc(node); + } + re_dealloc(self->node_list); + + /* Discard the group info. */ + re_dealloc(self->group_info); + + /* Discard the call_ref info. */ + re_dealloc(self->call_ref_info); + + /* Discard the repeat info. */ + re_dealloc(self->repeat_info); + + dealloc_groups(self->groups_storage, self->true_group_count); + + dealloc_repeats(self->repeats_storage, self->repeat_count); + + if (self->weakreflist) + PyObject_ClearWeakRefs((PyObject*)self); + Py_XDECREF(self->pattern); + Py_XDECREF(self->groupindex); + Py_XDECREF(self->indexgroup); + Py_DECREF(self->named_lists); + Py_DECREF(self->named_list_indexes); + PyObject_DEL(self); +} + +typedef struct RE_FlagName { + char* name; + int value; +} RE_FlagName; + +/* We won't bother about the A flag in Python 2. */ +static RE_FlagName flag_names[] = { + {"B", RE_FLAG_BESTMATCH}, + {"D", RE_FLAG_DEBUG}, + {"S", RE_FLAG_DOTALL}, + {"F", RE_FLAG_FULLCASE}, + {"I", RE_FLAG_IGNORECASE}, + {"L", RE_FLAG_LOCALE}, + {"M", RE_FLAG_MULTILINE}, + {"R", RE_FLAG_REVERSE}, + {"T", RE_FLAG_TEMPLATE}, + {"U", RE_FLAG_UNICODE}, + {"X", RE_FLAG_VERBOSE}, + {"V0", RE_FLAG_VERSION0}, + {"V1", RE_FLAG_VERSION1}, + {"W", RE_FLAG_WORD}, +}; + +/* Appends a string to a list. */ +Py_LOCAL_INLINE(BOOL) append_string(PyObject* list, char* string) { + PyObject* item; + int status; + + item = Py_BuildValue("s", string); + if (!item) + return FALSE; + + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + return FALSE; + + return TRUE; +} + +/* PatternObject's '__repr__' method. */ +static PyObject* pattern_repr(PyObject* self_) { + PatternObject* self; + PyObject* list; + PyObject* item; + int status; + int flag_count; + Py_ssize_t i; + PyObject *key; + PyObject *value; + PyObject* separator; + PyObject* result; + + self = (PatternObject*)self_; + + list = PyList_New(0); + if (!list) + return NULL; + + if (!append_string(list, "regex.Regex(")) + goto error; + + item = PyObject_Repr(self->pattern); + if (!item) + goto error; + + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + + flag_count = 0; + for (i = 0; i < sizeof(flag_names) / sizeof(flag_names[0]); i++) { + if (self->flags & flag_names[i].value) { + if (flag_count == 0) { + if (!append_string(list, ", flags=")) + goto error; + } else { + if (!append_string(list, " | ")) + goto error; + } + + if (!append_string(list, "regex.")) + goto error; + + if (!append_string(list, flag_names[i].name)) + goto error; + + ++flag_count; + } + } + + i = 0; + while (PyDict_Next(self->named_lists, &i, &key, &value)) { + if (!append_string(list, ", ")) + goto error; + + status = PyList_Append(list, key); + if (status < 0) + goto error; + + item = PyObject_Repr(value); + if (!item) + goto error; + + if (!append_string(list, "=")) + goto error; + + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + } + + if (!append_string(list, ")")) + goto error; + + separator = Py_BuildValue("s", ""); + if (!separator) + goto error; + + result = PyUnicode_Join(separator, list); + Py_DECREF(separator); + + Py_DECREF(list); + + return result; + +error: + Py_DECREF(list); + return NULL; +} + +/* PatternObject's 'groupindex' method. */ +static PyObject* pattern_groupindex(PyObject* self_) { + PatternObject* self; + + self = (PatternObject*)self_; + + return PyDict_Copy(self->groupindex); +} + +static PyGetSetDef pattern_getset[] = { + {"groupindex", (getter)pattern_groupindex, (setter)NULL, + "A dictionary mapping group names to group numbers."}, + {NULL} /* Sentinel */ +}; + +static PyMemberDef pattern_members[] = { + {"pattern", T_OBJECT, offsetof(PatternObject, pattern), READONLY, + "The pattern string from which the regex object was compiled."}, + {"flags", T_PYSSIZET, offsetof(PatternObject, flags), READONLY, + "The regex matching flags."}, + {"groups", T_PYSSIZET, offsetof(PatternObject, public_group_count), + READONLY, "The number of capturing groups in the pattern."}, + {"named_lists", T_OBJECT, offsetof(PatternObject, named_lists), READONLY, + "The named lists used by the regex."}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject Pattern_Type = { + PyObject_HEAD_INIT(NULL) + 0, + "_" RE_MODULE "." "Pattern", + sizeof(PatternObject) +}; + +/* Building the nodes is made simpler by allowing branches to have a single + * exit. These need to be removed. + */ +Py_LOCAL_INLINE(void) skip_one_way_branches(PatternObject* pattern) { + BOOL modified; + + /* If a node refers to a 1-way branch then make the former refer to the + * latter's destination. Repeat until they're all done. + */ + do { + size_t i; + + modified = FALSE; + + for (i = 0; i < pattern->node_count; i++) { + RE_Node* node; + RE_Node* next; + + node = pattern->node_list[i]; + + /* Check the first destination. */ + next = node->next_1.node; + if (next && next->op == RE_OP_BRANCH && + !next->nonstring.next_2.node) { + node->next_1.node = next->next_1.node; + modified = TRUE; + } + + /* Check the second destination. */ + next = node->nonstring.next_2.node; + if (next && next->op == RE_OP_BRANCH && + !next->nonstring.next_2.node) { + node->nonstring.next_2.node = next->next_1.node; + modified = TRUE; + } + } + } while (modified); + + /* The start node might be a 1-way branch. Skip over it because it'll be + * removed. It might even be the first in a chain. + */ + while (pattern->start_node->op == RE_OP_BRANCH && + !pattern->start_node->nonstring.next_2.node) + pattern->start_node = pattern->start_node->next_1.node; +} + +/* Adds guards to repeats which are followed by a reference to a group. + * + * Returns whether a guard was added for a node at or after the given node. + */ +Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node* + node) { + RE_STATUS_T result; + + result = RE_STATUS_NEITHER; + + for (;;) { + if (node->status & RE_STATUS_VISITED_AG) + return node->status & (RE_STATUS_REPEAT | RE_STATUS_REF); + + switch (node->op) { + case RE_OP_ATOMIC: + case RE_OP_LOOKAROUND: + { + RE_STATUS_T body_result; + RE_STATUS_T tail_result; + RE_STATUS_T status; + + body_result = add_repeat_guards(pattern, + node->nonstring.next_2.node); + tail_result = add_repeat_guards(pattern, node->next_1.node); + status = (RE_STATUS_T)RE_MAX(RE_MAX(result, body_result), + tail_result); + node->status = RE_STATUS_VISITED_AG | status; + return status; + } + case RE_OP_BRANCH: + { + RE_STATUS_T branch_1_result; + RE_STATUS_T branch_2_result; + RE_STATUS_T status; + + branch_1_result = add_repeat_guards(pattern, node->next_1.node); + branch_2_result = add_repeat_guards(pattern, + node->nonstring.next_2.node); + status = (RE_STATUS_T)RE_MAX(RE_MAX(result, branch_1_result), + branch_2_result); + node->status = RE_STATUS_VISITED_AG | status; + return status; + } + case RE_OP_END_GREEDY_REPEAT: + case RE_OP_END_LAZY_REPEAT: + node->status |= RE_STATUS_VISITED_AG; + return result; + case RE_OP_GREEDY_REPEAT: + case RE_OP_LAZY_REPEAT: + { + BOOL limited; + RE_STATUS_T body_result; + RE_STATUS_T tail_result; + RE_RepeatInfo* repeat_info; + RE_STATUS_T status; + + limited = node->values[2] != RE_UNLIMITED; + if (limited) + body_result = RE_STATUS_LIMITED; + else + body_result = add_repeat_guards(pattern, node->next_1.node); + tail_result = add_repeat_guards(pattern, + node->nonstring.next_2.node); + + repeat_info = &pattern->repeat_info[node->values[0]]; + if (body_result != RE_STATUS_REF) + repeat_info->status |= RE_STATUS_BODY; + if (tail_result != RE_STATUS_REF) + repeat_info->status |= RE_STATUS_TAIL; + if (limited) + result = (RE_STATUS_T)RE_MAX(result, RE_STATUS_LIMITED); + else + result = (RE_STATUS_T)RE_MAX(result, RE_STATUS_REPEAT); + status = (RE_STATUS_T)RE_MAX(RE_MAX(result, body_result), + tail_result); + node->status |= RE_STATUS_VISITED_AG | status; + return status; + } + case RE_OP_GREEDY_REPEAT_ONE: + case RE_OP_LAZY_REPEAT_ONE: + { + BOOL limited; + RE_STATUS_T tail_result; + RE_RepeatInfo* repeat_info; + RE_STATUS_T status; + + limited = node->values[2] != RE_UNLIMITED; + tail_result = add_repeat_guards(pattern, node->next_1.node); + + repeat_info = &pattern->repeat_info[node->values[0]]; + repeat_info->status |= RE_STATUS_BODY; + if (tail_result != RE_STATUS_REF) + repeat_info->status |= RE_STATUS_TAIL; + if (limited) + result = (RE_STATUS_T)RE_MAX(result, RE_STATUS_LIMITED); + else + result = (RE_STATUS_T)RE_MAX(result, RE_STATUS_REPEAT); + status = (RE_STATUS_T)RE_MAX(RE_MAX(result, RE_STATUS_REPEAT), + tail_result); + node->status = RE_STATUS_VISITED_AG | status; + return status; + } + case RE_OP_GROUP_EXISTS: + { + RE_STATUS_T branch_1_result; + RE_STATUS_T branch_2_result; + RE_STATUS_T status; + + branch_1_result = add_repeat_guards(pattern, node->next_1.node); + branch_2_result = add_repeat_guards(pattern, + node->nonstring.next_2.node); + status = (RE_STATUS_T)RE_MAX(RE_MAX(RE_MAX(result, + branch_1_result), branch_2_result), RE_STATUS_REF); + node->status = RE_STATUS_VISITED_AG | status; + return status; + } + case RE_OP_GROUP_CALL: + case RE_OP_REF_GROUP: + case RE_OP_REF_GROUP_FLD: + case RE_OP_REF_GROUP_FLD_REV: + case RE_OP_REF_GROUP_IGN: + case RE_OP_REF_GROUP_IGN_REV: + case RE_OP_REF_GROUP_REV: + result = RE_STATUS_REF; + node = node->next_1.node; + break; + case RE_OP_SUCCESS: + node->status = RE_STATUS_VISITED_AG | result; + return result; + default: + node = node->next_1.node; + break; + } + } +} + +/* Adds an index to a node's values unless it's already present. + * + * 'offset' is the offset of the index count within the values. + */ +Py_LOCAL_INLINE(BOOL) add_index(RE_Node* node, size_t offset, RE_CODE index) { + size_t index_count; + size_t first_index; + size_t i; + RE_CODE* new_values; + + if (!node) + return TRUE; + + index_count = node->values[offset]; + first_index = offset + 1; + + /* Is the index already present? */ + for (i = 0; i < index_count; i++) { + if (node->values[first_index + i] == index) + return TRUE; + } + + /* Allocate more space for the new index. */ + new_values = re_realloc(node->values, (node->value_count + 1) * + sizeof(RE_CODE)); + if (!new_values) + return FALSE; + + ++node->value_count; + node->values = new_values; + + node->values[first_index + node->values[offset]++] = index; + + return TRUE; +} + +/* Records the index of every repeat and fuzzy section within atomic + * subpatterns and lookarounds. + */ +Py_LOCAL_INLINE(BOOL) record_subpattern_repeats_and_fuzzy_sections(RE_Node* + parent_node, size_t offset, size_t repeat_count, RE_Node* node) { + while (node) { + if (node->status & RE_STATUS_VISITED_REP) + return TRUE; + + node->status |= RE_STATUS_VISITED_REP; + + switch (node->op) { + case RE_OP_ATOMIC: + if (!record_subpattern_repeats_and_fuzzy_sections(node, 0, + repeat_count, node->nonstring.next_2.node)) + return FALSE; + node = node->next_1.node; + break; + case RE_OP_BRANCH: + if (!record_subpattern_repeats_and_fuzzy_sections(parent_node, + offset, repeat_count, node->next_1.node)) + return FALSE; + node = node->nonstring.next_2.node; + break; + case RE_OP_END_FUZZY: + node = node->next_1.node; + break; + case RE_OP_END_GREEDY_REPEAT: + case RE_OP_END_LAZY_REPEAT: + return TRUE; + case RE_OP_FUZZY: + /* Record the fuzzy index. */ + if (!add_index(parent_node, offset, (RE_CODE)repeat_count + + node->values[0])) + return FALSE; + node = node->next_1.node; + break; + case RE_OP_GREEDY_REPEAT: + case RE_OP_LAZY_REPEAT: + /* Record the repeat index. */ + if (!add_index(parent_node, offset, node->values[0])) + return FALSE; + if (!record_subpattern_repeats_and_fuzzy_sections(parent_node, + offset, repeat_count, node->next_1.node)) + return FALSE; + node = node->nonstring.next_2.node; + break; + case RE_OP_GREEDY_REPEAT_ONE: + case RE_OP_LAZY_REPEAT_ONE: + /* Record the repeat index. */ + if (!add_index(parent_node, offset, node->values[0])) + return FALSE; + node = node->next_1.node; + break; + case RE_OP_GROUP_EXISTS: + if (!record_subpattern_repeats_and_fuzzy_sections(parent_node, + offset, repeat_count, node->next_1.node)) + return FALSE; + node = node->nonstring.next_2.node; + break; + case RE_OP_LOOKAROUND: + if (!record_subpattern_repeats_and_fuzzy_sections(node, 1, + repeat_count, node->nonstring.next_2.node)) + return FALSE; + node = node->next_1.node; + break; + default: + node = node->next_1.node; + break; + } + } + + return TRUE; +} + +/* Marks nodes which are being used as used. */ +Py_LOCAL_INLINE(void) use_nodes(RE_Node* node) { + while (node && !(node->status & RE_STATUS_USED)) { + node->status |= RE_STATUS_USED; + if (!(node->status & RE_STATUS_STRING)) { + if (node->nonstring.next_2.node) + use_nodes(node->nonstring.next_2.node); + } + node = node->next_1.node; + } +} + +/* Discards any unused nodes. + * + * Optimising the nodes might result in some nodes no longer being used. + */ +Py_LOCAL_INLINE(void) discard_unused_nodes(PatternObject* pattern) { + size_t new_count; + size_t i; + + /* Mark the nodes which are being used. */ + use_nodes(pattern->start_node); + + for (i = 0; i < pattern->call_ref_info_capacity; i++) + use_nodes(pattern->call_ref_info[i].node); + + new_count = 0; + for (i = 0; i < pattern->node_count; i++) { + RE_Node* node; + + node = pattern->node_list[i]; + if (node->status & RE_STATUS_USED) + pattern->node_list[new_count++] = node; + else { + re_dealloc(node->values); + if (node->status & RE_STATUS_STRING) { + re_dealloc(node->string.bad_character_offset); + re_dealloc(node->string.good_suffix_offset); + } + re_dealloc(node); + } + } + + pattern->node_count = new_count; +} + +/* Marks all the group which are named. */ +Py_LOCAL_INLINE(BOOL) mark_named_groups(PatternObject* pattern) { + size_t i; + + for (i = 0; i < pattern->public_group_count; i++) { + RE_GroupInfo* group_info; + PyObject* index; + + group_info = &pattern->group_info[i]; + index = Py_BuildValue("n", i + 1); + if (!index) + return FALSE; + group_info->has_name = (BOOL)PyDict_Contains(pattern->indexgroup, + index); + Py_DECREF(index); + } + + return TRUE; +} + +/* Gets the test node. + * + * The test node lets the matcher look ahead in the pattern, allowing it to + * avoid the cost of housekeeping, only to find that what follows doesn't match + * anyway. + */ +Py_LOCAL_INLINE(void) set_test_node(RE_NextNode* next) { + RE_Node* node = next->node; + RE_Node* test; + + next->test = node; + next->match_next = node; + next->match_step = 0; + + if (!node) + return; + + test = node; + while (test->op == RE_OP_END_GROUP || test->op == RE_OP_START_GROUP) + test = test->next_1.node; + + next->test = test; + + if (test != node) + return; + + switch (test->op) { + case RE_OP_ANY: + case RE_OP_ANY_ALL: + case RE_OP_ANY_ALL_REV: + case RE_OP_ANY_REV: + case RE_OP_ANY_U: + case RE_OP_ANY_U_REV: + case RE_OP_BOUNDARY: + case RE_OP_CHARACTER: + case RE_OP_CHARACTER_IGN: + case RE_OP_CHARACTER_IGN_REV: + case RE_OP_CHARACTER_REV: + case RE_OP_DEFAULT_BOUNDARY: + case RE_OP_DEFAULT_END_OF_WORD: + case RE_OP_DEFAULT_START_OF_WORD: + case RE_OP_END_OF_LINE: + case RE_OP_END_OF_LINE_U: + case RE_OP_END_OF_STRING: + case RE_OP_END_OF_STRING_LINE: + case RE_OP_END_OF_STRING_LINE_U: + case RE_OP_END_OF_WORD: + case RE_OP_GRAPHEME_BOUNDARY: + case RE_OP_PROPERTY: + case RE_OP_PROPERTY_IGN: + case RE_OP_PROPERTY_IGN_REV: + case RE_OP_PROPERTY_REV: + case RE_OP_RANGE: + case RE_OP_RANGE_IGN: + case RE_OP_RANGE_IGN_REV: + case RE_OP_RANGE_REV: + case RE_OP_SEARCH_ANCHOR: + case RE_OP_SET_DIFF: + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_DIFF_IGN_REV: + case RE_OP_SET_DIFF_REV: + case RE_OP_SET_INTER: + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION: + case RE_OP_SET_UNION_IGN: + case RE_OP_SET_UNION_IGN_REV: + case RE_OP_SET_UNION_REV: + case RE_OP_START_OF_LINE: + case RE_OP_START_OF_LINE_U: + case RE_OP_START_OF_STRING: + case RE_OP_START_OF_WORD: + case RE_OP_STRING: + case RE_OP_STRING_FLD: + case RE_OP_STRING_FLD_REV: + case RE_OP_STRING_IGN: + case RE_OP_STRING_IGN_REV: + case RE_OP_STRING_REV: + next->match_next = test->next_1.node; + next->match_step = test->step; + break; + case RE_OP_GREEDY_REPEAT_ONE: + case RE_OP_LAZY_REPEAT_ONE: + if (test->values[1] > 0) + next->test = test; + break; + } +} + +/* Sets the test nodes. */ +Py_LOCAL_INLINE(void) set_test_nodes(PatternObject* pattern) { + RE_Node** node_list; + size_t i; + + node_list = pattern->node_list; + for (i = 0; i < pattern->node_count; i++) { + RE_Node* node; + + node = node_list[i]; + set_test_node(&node->next_1); + if (!(node->status & RE_STATUS_STRING)) + set_test_node(&node->nonstring.next_2); + } +} + +/* Optimises the pattern. */ +Py_LOCAL_INLINE(BOOL) optimise_pattern(PatternObject* pattern) { + size_t i; + + /* Building the nodes is made simpler by allowing branches to have a single + * exit. These need to be removed. + */ + skip_one_way_branches(pattern); + + /* Add position guards for repeat bodies containing a reference to a group + * or repeat tails followed at some point by a reference to a group. + */ + add_repeat_guards(pattern, pattern->start_node); + + /* Record the index of repeats and fuzzy sections within the body of atomic + * and lookaround nodes. + */ + if (!record_subpattern_repeats_and_fuzzy_sections(NULL, 0, + pattern->repeat_count, pattern->start_node)) + return FALSE; + + for (i = 0; i < pattern->call_ref_info_count; i++) { + RE_Node* node; + + node = pattern->call_ref_info[i].node; + if (!record_subpattern_repeats_and_fuzzy_sections(NULL, 0, + pattern->repeat_count, node)) + return FALSE; + } + + /* Discard any unused nodes. */ + discard_unused_nodes(pattern); + + /* Set the test nodes. */ + set_test_nodes(pattern); + + /* Mark all the group that are named. */ + if (!mark_named_groups(pattern)) + return FALSE; + + return TRUE; +} + +/* Creates a new pattern node. */ +Py_LOCAL_INLINE(RE_Node*) create_node(PatternObject* pattern, RE_UINT8 op, + RE_CODE flags, Py_ssize_t step, size_t value_count) { + RE_Node* node; + + node = (RE_Node*)re_alloc(sizeof(*node)); + if (!node) + return NULL; + memset(node, 0, sizeof(RE_Node)); + + node->value_count = value_count; + if (node->value_count > 0) { + node->values = (RE_CODE*)re_alloc(node->value_count * sizeof(RE_CODE)); + if (!node->values) + goto error; + } else + node->values = NULL; + + node->op = op; + node->match = (flags & RE_POSITIVE_OP) != 0; + node->status = (RE_STATUS_T)(flags << RE_STATUS_SHIFT); + node->step = step; + + /* Ensure that there's enough storage to record the new node. */ + if (pattern->node_count >= pattern->node_capacity) { + RE_Node** new_node_list; + + pattern->node_capacity *= 2; + if (pattern->node_capacity == 0) + pattern->node_capacity = RE_INIT_NODE_LIST_SIZE; + new_node_list = (RE_Node**)re_realloc(pattern->node_list, + pattern->node_capacity * sizeof(RE_Node*)); + if (!new_node_list) + goto error; + pattern->node_list = new_node_list; + } + + /* Record the new node. */ + pattern->node_list[pattern->node_count++] = node; + + return node; + +error: + re_dealloc(node->values); + re_dealloc(node); + return NULL; +} + +/* Adds a node as a next node for another node. */ +Py_LOCAL_INLINE(void) add_node(RE_Node* node_1, RE_Node* node_2) { + if (!node_1->next_1.node) + node_1->next_1.node = node_2; + else + node_1->nonstring.next_2.node = node_2; +} + +/* Ensures that the entry for a group's details actually exists. */ +Py_LOCAL_INLINE(BOOL) ensure_group(PatternObject* pattern, RE_CODE group) { + size_t old_capacity; + size_t new_capacity; + RE_GroupInfo* new_group_info; + + if (group <= pattern->true_group_count) + /* We already have an entry for the group. */ + return TRUE; + + /* Increase the storage capacity to include the new entry if it's + * insufficient. + */ + old_capacity = pattern->group_info_capacity; + new_capacity = pattern->group_info_capacity; + while (group > new_capacity) + new_capacity += RE_LIST_SIZE_INC; + + if (new_capacity > old_capacity) { + new_group_info = (RE_GroupInfo*)re_realloc(pattern->group_info, + new_capacity * sizeof(RE_GroupInfo)); + if (!new_group_info) + return FALSE; + memset(new_group_info + old_capacity, 0, (new_capacity - old_capacity) + * sizeof(RE_GroupInfo)); + + pattern->group_info = new_group_info; + pattern->group_info_capacity = new_capacity; + } + + pattern->true_group_count = group; + + return TRUE; +} + +/* Records that there's a reference to a group. */ +Py_LOCAL_INLINE(BOOL) record_ref_group(PatternObject* pattern, RE_CODE group) { + if (!ensure_group(pattern, group)) + return FALSE; + + pattern->group_info[group - 1].referenced = TRUE; + + return TRUE; +} + +/* Records that there's a new group. */ +Py_LOCAL_INLINE(BOOL) record_group(PatternObject* pattern, RE_CODE group, + RE_Node* node) { + if (!ensure_group(pattern, group)) + return FALSE; + + if (group >= 1) { + RE_GroupInfo* info; + + info = &pattern->group_info[group - 1]; + info->end_index = pattern->true_group_count; + info->node = node; + } + + return TRUE; +} + +/* Records that a group has closed. */ +Py_LOCAL_INLINE(void) record_group_end(PatternObject* pattern, RE_CODE group) { + if (group >= 1) + pattern->group_info[group - 1].end_index = ++pattern->group_end_index; +} + +/* Ensures that the entry for a call_ref's details actually exists. */ +Py_LOCAL_INLINE(BOOL) ensure_call_ref(PatternObject* pattern, RE_CODE call_ref) + { + size_t old_capacity; + size_t new_capacity; + RE_CallRefInfo* new_call_ref_info; + + if (call_ref < pattern->call_ref_info_count) + /* We already have an entry for the call_ref. */ + return TRUE; + + /* Increase the storage capacity to include the new entry if it's + * insufficient. + */ + old_capacity = pattern->call_ref_info_capacity; + new_capacity = pattern->call_ref_info_capacity; + while (call_ref >= new_capacity) + new_capacity += RE_LIST_SIZE_INC; + + if (new_capacity > old_capacity) { + new_call_ref_info = (RE_CallRefInfo*)re_realloc(pattern->call_ref_info, + new_capacity * sizeof(RE_CallRefInfo)); + if (!new_call_ref_info) + return FALSE; + memset(new_call_ref_info + old_capacity, 0, (new_capacity - + old_capacity) * sizeof(RE_CallRefInfo)); + + pattern->call_ref_info = new_call_ref_info; + pattern->call_ref_info_capacity = new_capacity; + } + + pattern->call_ref_info_count = 1 + call_ref; + + return TRUE; +} + +/* Records that a call_ref is defined. */ +Py_LOCAL_INLINE(BOOL) record_call_ref_defined(PatternObject* pattern, RE_CODE + call_ref, RE_Node* node) { + if (!ensure_call_ref(pattern, call_ref)) + return FALSE; + + pattern->call_ref_info[call_ref].defined = TRUE; + pattern->call_ref_info[call_ref].node = node; + + return TRUE; +} + +/* Records that a call_ref is used. */ +Py_LOCAL_INLINE(BOOL) record_call_ref_used(PatternObject* pattern, RE_CODE + call_ref) { + if (!ensure_call_ref(pattern, call_ref)) + return FALSE; + + pattern->call_ref_info[call_ref].used = TRUE; + + return TRUE; +} + +/* Checks whether a node matches one and only one character. */ +Py_LOCAL_INLINE(BOOL) sequence_matches_one(RE_Node* node) { + while (node->op == RE_OP_BRANCH && !node->nonstring.next_2.node) + node = node->next_1.node; + + if (node->next_1.node || (node->status & RE_STATUS_FUZZY)) + return FALSE; + + switch (node->op) { + case RE_OP_ANY: + case RE_OP_ANY_ALL: + case RE_OP_ANY_ALL_REV: + case RE_OP_ANY_REV: + case RE_OP_ANY_U: + case RE_OP_ANY_U_REV: + case RE_OP_CHARACTER: + case RE_OP_CHARACTER_IGN: + case RE_OP_CHARACTER_IGN_REV: + case RE_OP_CHARACTER_REV: + case RE_OP_PROPERTY: + case RE_OP_PROPERTY_IGN: + case RE_OP_PROPERTY_IGN_REV: + case RE_OP_PROPERTY_REV: + case RE_OP_RANGE: + case RE_OP_RANGE_IGN: + case RE_OP_RANGE_IGN_REV: + case RE_OP_RANGE_REV: + case RE_OP_SET_DIFF: + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_DIFF_IGN_REV: + case RE_OP_SET_DIFF_REV: + case RE_OP_SET_INTER: + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION: + case RE_OP_SET_UNION_IGN: + case RE_OP_SET_UNION_REV: + return TRUE; + default: + return FALSE; + } +} + +/* Records a repeat. */ +Py_LOCAL_INLINE(BOOL) record_repeat(PatternObject* pattern, size_t index, + size_t repeat_depth) { + size_t old_capacity; + size_t new_capacity; + + /* Increase the storage capacity to include the new entry if it's + * insufficient. + */ + old_capacity = pattern->repeat_info_capacity; + new_capacity = pattern->repeat_info_capacity; + while (index >= new_capacity) + new_capacity += RE_LIST_SIZE_INC; + + if (new_capacity > old_capacity) { + RE_RepeatInfo* new_repeat_info; + + new_repeat_info = (RE_RepeatInfo*)re_realloc(pattern->repeat_info, + new_capacity * sizeof(RE_RepeatInfo)); + if (!new_repeat_info) + return FALSE; + memset(new_repeat_info + old_capacity, 0, (new_capacity - old_capacity) + * sizeof(RE_RepeatInfo)); + + pattern->repeat_info = new_repeat_info; + pattern->repeat_info_capacity = new_capacity; + } + + if (index >= pattern->repeat_count) + pattern->repeat_count = index + 1; + + if (repeat_depth > 0) + pattern->repeat_info[index].status |= RE_STATUS_INNER; + + return TRUE; +} + +Py_LOCAL_INLINE(Py_ssize_t) get_step(RE_CODE op) { + switch (op) { + case RE_OP_ANY: + case RE_OP_ANY_ALL: + case RE_OP_ANY_U: + case RE_OP_CHARACTER: + case RE_OP_CHARACTER_IGN: + case RE_OP_PROPERTY: + case RE_OP_PROPERTY_IGN: + case RE_OP_RANGE: + case RE_OP_RANGE_IGN: + case RE_OP_SET_DIFF: + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_INTER: + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_UNION: + case RE_OP_SET_UNION_IGN: + case RE_OP_STRING: + case RE_OP_STRING_FLD: + case RE_OP_STRING_IGN: + return 1; + case RE_OP_ANY_ALL_REV: + case RE_OP_ANY_REV: + case RE_OP_ANY_U_REV: + case RE_OP_CHARACTER_IGN_REV: + case RE_OP_CHARACTER_REV: + case RE_OP_PROPERTY_IGN_REV: + case RE_OP_PROPERTY_REV: + case RE_OP_RANGE_IGN_REV: + case RE_OP_RANGE_REV: + case RE_OP_SET_DIFF_IGN_REV: + case RE_OP_SET_DIFF_REV: + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION_IGN_REV: + case RE_OP_SET_UNION_REV: + case RE_OP_STRING_FLD_REV: + case RE_OP_STRING_IGN_REV: + case RE_OP_STRING_REV: + return -1; + } + + return 0; +} + +Py_LOCAL_INLINE(int) build_sequence(RE_CompileArgs* args); + +/* Builds an ANY node. */ +Py_LOCAL_INLINE(int) build_ANY(RE_CompileArgs* args) { + RE_UINT8 op; + RE_CODE flags; + Py_ssize_t step; + RE_Node* node; + + /* codes: opcode, flags. */ + if (args->code + 1 > args->end_code) + return RE_ERROR_ILLEGAL; + + op = (RE_UINT8)args->code[0]; + flags = args->code[1]; + + step = get_step(op); + + /* Create the node. */ + node = create_node(args->pattern, op, flags, step, 0); + if (!node) + return RE_ERROR_MEMORY; + + args->code += 2; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + ++args->min_width; + + return RE_ERROR_SUCCESS; +} + +/* Builds a FUZZY node. */ +Py_LOCAL_INLINE(int) build_FUZZY(RE_CompileArgs* args) { + RE_CODE flags; + RE_Node* start_node; + RE_Node* end_node; + RE_CODE index; + RE_CompileArgs subargs; + int status; + + /* codes: opcode, flags, constraints, sequence, end. */ + if (args->code + 13 > args->end_code) + return RE_ERROR_ILLEGAL; + + flags = args->code[1]; + + /* Create nodes for the start and end of the fuzzy sequence. */ + start_node = create_node(args->pattern, RE_OP_FUZZY, flags, 0, 9); + end_node = create_node(args->pattern, RE_OP_END_FUZZY, flags, 0, 5); + if (!start_node || !end_node) + return RE_ERROR_MEMORY; + + index = (RE_CODE)args->pattern->fuzzy_count++; + start_node->values[0] = index; + end_node->values[0] = index; + + /* The constraints consist of 4 pairs of limits and the cost equation. */ + end_node->values[RE_FUZZY_VAL_MIN_DEL] = args->code[2]; /* Deletion minimum. */ + end_node->values[RE_FUZZY_VAL_MIN_INS] = args->code[4]; /* Insertion minimum. */ + end_node->values[RE_FUZZY_VAL_MIN_SUB] = args->code[6]; /* Substitution minimum. */ + end_node->values[RE_FUZZY_VAL_MIN_ERR] = args->code[8]; /* Error minimum. */ + + start_node->values[RE_FUZZY_VAL_MAX_DEL] = args->code[3]; /* Deletion maximum. */ + start_node->values[RE_FUZZY_VAL_MAX_INS] = args->code[5]; /* Insertion maximum. */ + start_node->values[RE_FUZZY_VAL_MAX_SUB] = args->code[7]; /* Substitution maximum. */ + start_node->values[RE_FUZZY_VAL_MAX_ERR] = args->code[9]; /* Error maximum. */ + + start_node->values[RE_FUZZY_VAL_DEL_COST] = args->code[10]; /* Deletion cost. */ + start_node->values[RE_FUZZY_VAL_INS_COST] = args->code[11]; /* Insertion cost. */ + start_node->values[RE_FUZZY_VAL_SUB_COST] = args->code[12]; /* Substitution cost. */ + start_node->values[RE_FUZZY_VAL_MAX_COST] = args->code[13]; /* Total cost. */ + + args->code += 14; + + subargs = *args; + subargs.has_captures = FALSE; + subargs.is_fuzzy = TRUE; + subargs.within_fuzzy = TRUE; + + /* Compile the sequence and check that we've reached the end of the + * subpattern. + */ + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + args->code = subargs.code; + args->min_width = subargs.min_width; + args->has_captures |= subargs.has_captures; + + ++args->code; + + /* Append the fuzzy sequence. */ + add_node(args->end, start_node); + add_node(start_node, subargs.start); + add_node(subargs.end, end_node); + args->end = end_node; + + args->is_fuzzy = TRUE; + + return RE_ERROR_SUCCESS; +} + +/* Builds an ATOMIC node. */ +Py_LOCAL_INLINE(int) build_ATOMIC(RE_CompileArgs* args) { + RE_Node* atomic_node; + RE_CompileArgs subargs; + RE_Node* success_node; + int status; + + /* codes: opcode, sequence, end. */ + if (args->code + 1 > args->end_code) + return RE_ERROR_ILLEGAL; + + atomic_node = create_node(args->pattern, RE_OP_ATOMIC, 0, 0, 1); + if (!atomic_node) + return RE_ERROR_MEMORY; + + /* The number of repeat indexes. */ + atomic_node->values[0] = 0; + + ++args->code; + + subargs = *args; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + + /* Compile the sequence and check that we've reached the end of the + * subpattern. + */ + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + /* Create the success node to terminate the subpattern. */ + success_node = create_node(subargs.pattern, RE_OP_SUCCESS, 0, 0, 0); + if (!success_node) + return RE_ERROR_MEMORY; + + /* Append the SUCCESS node. */ + add_node(subargs.end, success_node); + + /* Insert the subpattern. */ + atomic_node->nonstring.next_2.node = subargs.start; + + args->code = subargs.code; + args->min_width = subargs.min_width; + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + ++args->code; + + /* Append the node. */ + add_node(args->end, atomic_node); + args->end = atomic_node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a BOUNDARY node. */ +Py_LOCAL_INLINE(int) build_BOUNDARY(RE_CompileArgs* args) { + RE_UINT8 op; + RE_CODE flags; + RE_Node* node; + + /* codes: opcode, flags. */ + if (args->code + 1 > args->end_code) + return RE_ERROR_ILLEGAL; + + op = (RE_UINT8)args->code[0]; + flags = args->code[1]; + + args->code += 2; + + /* Create the node. */ + node = create_node(args->pattern, op, flags, 0, 0); + if (!node) + return RE_ERROR_MEMORY; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a BRANCH node. */ +Py_LOCAL_INLINE(int) build_BRANCH(RE_CompileArgs* args) { + RE_Node* branch_node; + RE_Node* join_node; + size_t smallest_min_width; + RE_CompileArgs subargs; + int status; + + /* codes: opcode, branch, next, branch, end. */ + if (args->code + 2 > args->end_code) + return RE_ERROR_ILLEGAL; + + /* Create nodes for the start and end of the branch sequence. */ + branch_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); + join_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); + if (!branch_node || !join_node) + return RE_ERROR_MEMORY; + + /* Append the node. */ + add_node(args->end, branch_node); + args->end = join_node; + + smallest_min_width = ~(size_t)0; + + subargs = *args; + + /* A branch in the regular expression is compiled into a series of 2-way + * branches. + */ + do { + RE_Node* next_branch_node; + + /* Skip over the 'BRANCH' or 'NEXT' opcode. */ + ++subargs.code; + + /* Compile the sequence until the next 'BRANCH' or 'NEXT' opcode. */ + subargs.min_width = 0; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + smallest_min_width = RE_MIN(smallest_min_width, subargs.min_width); + + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + /* Append the sequence. */ + add_node(branch_node, subargs.start); + add_node(subargs.end, join_node); + + /* Create a start node for the next sequence and append it. */ + next_branch_node = create_node(subargs.pattern, RE_OP_BRANCH, 0, 0, 0); + if (!next_branch_node) + return RE_ERROR_MEMORY; + + add_node(branch_node, next_branch_node); + branch_node = next_branch_node; + } while (subargs.code < subargs.end_code && subargs.code[0] == RE_OP_NEXT); + + /* We should have reached the end of the branch. */ + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + args->code = subargs.code; + + ++args->code; + args->min_width += smallest_min_width; + + return RE_ERROR_SUCCESS; +} + +/* Builds a CALL_REF node. */ +Py_LOCAL_INLINE(int) build_CALL_REF(RE_CompileArgs* args) { + RE_CODE call_ref; + RE_Node* start_node; + RE_Node* end_node; + RE_CompileArgs subargs; + int status; + + /* codes: opcode, call_ref. */ + if (args->code + 1 > args->end_code) + return RE_ERROR_ILLEGAL; + + call_ref = args->code[1]; + + args->code += 2; + + /* Create nodes for the start and end of the subpattern. */ + start_node = create_node(args->pattern, RE_OP_CALL_REF, 0, 0, 1); + end_node = create_node(args->pattern, RE_OP_GROUP_RETURN, 0, 0, 0); + if (!start_node || !end_node) + return RE_ERROR_MEMORY; + + start_node->values[0] = call_ref; + + /* Compile the sequence and check that we've reached the end of the + * subpattern. + */ + subargs = *args; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + args->code = subargs.code; + args->min_width = subargs.min_width; + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + ++args->code; + + /* Record that we defined a call_ref. */ + if (!record_call_ref_defined(args->pattern, call_ref, start_node)) + return RE_ERROR_MEMORY; + + /* Append the node. */ + add_node(args->end, start_node); + add_node(start_node, subargs.start); + add_node(subargs.end, end_node); + args->end = end_node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a CHARACTER or PROPERTY node. */ +Py_LOCAL_INLINE(int) build_CHARACTER_or_PROPERTY(RE_CompileArgs* args) { + RE_UINT8 op; + RE_CODE flags; + Py_ssize_t step; + RE_Node* node; + + /* codes: opcode, flags, value. */ + if (args->code + 2 > args->end_code) + return RE_ERROR_ILLEGAL; + + op = (RE_UINT8)args->code[0]; + flags = args->code[1]; + + step = get_step(op); + + if (flags & RE_ZEROWIDTH_OP) + step = 0; + + /* Create the node. */ + node = create_node(args->pattern, op, flags, step, 1); + if (!node) + return RE_ERROR_MEMORY; + + node->values[0] = args->code[2]; + + args->code += 3; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + if (step != 0) + ++args->min_width; + + return RE_ERROR_SUCCESS; +} + +/* Builds a GROUP node. */ +Py_LOCAL_INLINE(int) build_GROUP(RE_CompileArgs* args) { + RE_CODE private_group; + RE_CODE public_group; + RE_Node* start_node; + RE_Node* end_node; + RE_CompileArgs subargs; + int status; + + /* codes: opcode, private_group, public_group. */ + if (args->code + 2 > args->end_code) + return RE_ERROR_ILLEGAL; + + private_group = args->code[1]; + public_group = args->code[2]; + + args->code += 3; + + /* Create nodes for the start and end of the capture group. */ + start_node = create_node(args->pattern, args->forward ? RE_OP_START_GROUP : + RE_OP_END_GROUP, 0, 0, 3); + end_node = create_node(args->pattern, args->forward ? RE_OP_END_GROUP : + RE_OP_START_GROUP, 0, 0, 3); + if (!start_node || !end_node) + return RE_ERROR_MEMORY; + + start_node->values[0] = private_group; + end_node->values[0] = private_group; + start_node->values[1] = public_group; + end_node->values[1] = public_group; + + /* Signal that the capture should be saved when it's complete. */ + start_node->values[2] = 0; + end_node->values[2] = 1; + + /* Record that we have a new capture group. */ + if (!record_group(args->pattern, private_group, start_node)) + return RE_ERROR_MEMORY; + + /* Compile the sequence and check that we've reached the end of the capture + * group. + */ + subargs = *args; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + args->code = subargs.code; + args->min_width = subargs.min_width; + args->has_captures |= subargs.has_captures || subargs.visible_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + ++args->code; + + /* Record that the capture group has closed. */ + record_group_end(args->pattern, private_group); + + /* Append the capture group. */ + add_node(args->end, start_node); + add_node(start_node, subargs.start); + add_node(subargs.end, end_node); + args->end = end_node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a GROUP_CALL node. */ +Py_LOCAL_INLINE(int) build_GROUP_CALL(RE_CompileArgs* args) { + RE_CODE call_ref; + RE_Node* node; + + /* codes: opcode, call_ref. */ + if (args->code + 1 > args->end_code) + return RE_ERROR_ILLEGAL; + + call_ref = args->code[1]; + + /* Create the node. */ + node = create_node(args->pattern, RE_OP_GROUP_CALL, 0, 0, 1); + if (!node) + return RE_ERROR_MEMORY; + + node->values[0] = call_ref; + + args->code += 2; + + /* Record that we used a call_ref. */ + if (!record_call_ref_used(args->pattern, call_ref)) + return RE_ERROR_MEMORY; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a GROUP_EXISTS node. */ +Py_LOCAL_INLINE(int) build_GROUP_EXISTS(RE_CompileArgs* args) { + RE_CODE group; + RE_Node* start_node; + RE_Node* end_node; + RE_CompileArgs subargs; + size_t min_width; + int status; + + /* codes: opcode, sequence, next, sequence, end. */ + if (args->code + 2 > args->end_code) + return RE_ERROR_ILLEGAL; + + group = args->code[1]; + + args->code += 2; + + /* Create nodes for the start and end of the structure. */ + start_node = create_node(args->pattern, RE_OP_GROUP_EXISTS, 0, 0, 1); + end_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); + if (!start_node || !end_node) + return RE_ERROR_MEMORY; + + start_node->values[0] = group; + + subargs = *args; + subargs.min_width = 0; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + args->code = subargs.code; + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + min_width = subargs.min_width; + + /* Append the start node. */ + add_node(args->end, start_node); + add_node(start_node, subargs.start); + add_node(subargs.end, end_node); + + if (args->code[0] == RE_OP_NEXT) { + ++args->code; + + subargs.code = args->code; + subargs.min_width = 0; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + args->code = subargs.code; + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + min_width = RE_MIN(min_width, subargs.min_width); + + add_node(start_node, subargs.start); + add_node(subargs.end, end_node); + } else { + add_node(start_node, end_node); + + min_width = 0; + } + + args->min_width += min_width; + + if (args->code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + ++args->code; + + args->end = end_node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a LOOKAROUND node. */ +Py_LOCAL_INLINE(int) build_LOOKAROUND(RE_CompileArgs* args) { + RE_CODE flags; + BOOL forward; + RE_Node* lookaround_node; + RE_Node* success_node; + RE_CompileArgs subargs; + int status; + + /* codes: opcode, flags, forward, sequence, end. */ + if (args->code + 3 > args->end_code) + return RE_ERROR_ILLEGAL; + + flags = args->code[1]; + forward = (BOOL)args->code[2]; + + /* Create a node for the lookaround. */ + lookaround_node = create_node(args->pattern, RE_OP_LOOKAROUND, flags, 0, + 2); + if (!lookaround_node) + return RE_ERROR_MEMORY; + + /* The number of repeat indexes. */ + lookaround_node->values[1] = 0; + + args->code += 3; + + /* Compile the sequence and check that we've reached the end of the + * subpattern. + */ + subargs = *args; + subargs.forward = forward; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + lookaround_node->values[0] = subargs.has_captures; + + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + args->code = subargs.code; + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + ++args->code; + + /* Create the 'SUCCESS' node and append it to the subpattern. */ + success_node = create_node(args->pattern, RE_OP_SUCCESS, 0, 0, 0); + if (!success_node) + return RE_ERROR_MEMORY; + + /* Append the SUCCESS node. */ + add_node(subargs.end, success_node); + + /* Insert the subpattern into the node. */ + lookaround_node->nonstring.next_2.node = subargs.start; + + /* Append the lookaround. */ + add_node(args->end, lookaround_node); + args->end = lookaround_node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a RANGE node. */ +Py_LOCAL_INLINE(int) build_RANGE(RE_CompileArgs* args) { + RE_UINT8 op; + RE_CODE flags; + Py_ssize_t step; + RE_Node* node; + + /* codes: opcode, flags, lower, upper. */ + if (args->code + 3 > args->end_code) + return RE_ERROR_ILLEGAL; + + op = (RE_UINT8)args->code[0]; + flags = args->code[1]; + + step = get_step(op); + + if (flags & RE_ZEROWIDTH_OP) + step = 0; + + /* Create the node. */ + node = create_node(args->pattern, op, flags, step, 2); + if (!node) + return RE_ERROR_MEMORY; + + node->values[0] = args->code[2]; + node->values[1] = args->code[3]; + + args->code += 4; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + if (step != 0) + ++args->min_width; + + return RE_ERROR_SUCCESS; +} + +/* Builds a REF_GROUP node. */ +Py_LOCAL_INLINE(int) build_REF_GROUP(RE_CompileArgs* args) { + RE_CODE flags; + RE_CODE group; + RE_Node* node; + + /* codes: opcode, flags, group. */ + if (args->code + 2 > args->end_code) + return RE_ERROR_ILLEGAL; + + flags = args->code[1]; + group = args->code[2]; + node = create_node(args->pattern, (RE_UINT8)args->code[0], flags, 0, 1); + if (!node) + return RE_ERROR_MEMORY; + + node->values[0] = group; + + args->code += 3; + + /* Record that we have a reference to a group. */ + if (!record_ref_group(args->pattern, group)) + return RE_ERROR_MEMORY; + + /* Append the reference. */ + add_node(args->end, node); + args->end = node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a REPEAT node. */ +Py_LOCAL_INLINE(int) build_REPEAT(RE_CompileArgs* args) { + BOOL greedy; + RE_CODE min_count; + RE_CODE max_count; + int status; + + /* codes: opcode, min_count, max_count, sequence, end. */ + if (args->code + 3 > args->end_code) + return RE_ERROR_ILLEGAL; + + /* This includes special cases such as optional items, which we'll check + * for and treat specially. They don't need repeat counts, which helps us + * avoid unnecessary work when matching. + */ + greedy = args->code[0] == RE_OP_GREEDY_REPEAT; + min_count = args->code[1]; + max_count = args->code[2]; + if (min_count > max_count) + return RE_ERROR_ILLEGAL; + + args->code += 3; + + if (min_count == 0 && max_count == 1) { + /* Optional sequence. */ + RE_Node* branch_node; + RE_Node* join_node; + RE_CompileArgs subargs; + + /* Create the start and end nodes. */ + branch_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); + join_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); + if (!branch_node || !join_node) + return RE_ERROR_MEMORY; + + /* Compile the sequence and check that we've reached the end of it. */ + subargs = *args; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + args->code = subargs.code; + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + ++args->code; + + if (greedy) { + /* It's a greedy option. */ + add_node(branch_node, subargs.start); + add_node(branch_node, join_node); + } else { + /* It's a lazy option. */ + add_node(branch_node, join_node); + add_node(branch_node, subargs.start); + } + add_node(subargs.end, join_node); + + /* Append the optional sequence. */ + add_node(args->end, branch_node); + args->end = join_node; + } else if (min_count == 1 && max_count == 1) { + /* Singly-repeated sequence. */ + RE_CompileArgs subargs; + + subargs = *args; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + args->code = subargs.code; + args->min_width = subargs.min_width; + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + ++args->code; + + /* Append the sequence. */ + add_node(args->end, subargs.start); + args->end = subargs.end; + } else { + RE_CODE index; + RE_Node* repeat_node; + RE_CompileArgs subargs; + + index = (RE_CODE)args->pattern->repeat_count; + + /* Create the nodes for the repeat. */ + repeat_node = create_node(args->pattern, greedy ? RE_OP_GREEDY_REPEAT : + RE_OP_LAZY_REPEAT, 0, args->forward ? 1 : -1, 4); + if (!repeat_node || !record_repeat(args->pattern, index, + args->repeat_depth)) + return RE_ERROR_MEMORY; + + repeat_node->values[0] = index; + repeat_node->values[1] = min_count; + repeat_node->values[2] = max_count; + repeat_node->values[3] = args->forward; + + if (args->within_fuzzy) + args->pattern->repeat_info[index].status |= RE_STATUS_BODY; + + /* Compile the 'body' and check that we've reached the end of it. */ + subargs = *args; + subargs.min_width = 0; + subargs.visible_captures = TRUE; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + ++subargs.repeat_depth; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + args->code = subargs.code; + args->min_width += min_count * subargs.min_width; + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + ++args->code; + + /* Is it a repeat of something which will match a single character? + * + * If it's in a fuzzy section then it won't be optimised as a + * single-character repeat. + */ + if (sequence_matches_one(subargs.start)) { + repeat_node->op = greedy ? RE_OP_GREEDY_REPEAT_ONE : + RE_OP_LAZY_REPEAT_ONE; + + /* Append the new sequence. */ + add_node(args->end, repeat_node); + repeat_node->nonstring.next_2.node = subargs.start; + args->end = repeat_node; + } else { + RE_Node* end_repeat_node; + RE_Node* end_node; + + end_repeat_node = create_node(args->pattern, greedy ? + RE_OP_END_GREEDY_REPEAT : RE_OP_END_LAZY_REPEAT, 0, args->forward + ? 1 : -1, 4); + if (!end_repeat_node) + return RE_ERROR_MEMORY; + + end_repeat_node->values[0] = repeat_node->values[0]; + end_repeat_node->values[1] = repeat_node->values[1]; + end_repeat_node->values[2] = repeat_node->values[2]; + end_repeat_node->values[3] = args->forward; + + end_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); + if (!end_node) + return RE_ERROR_MEMORY; + + /* Append the new sequence. */ + add_node(args->end, repeat_node); + add_node(repeat_node, subargs.start); + add_node(repeat_node, end_node); + add_node(subargs.end, end_repeat_node); + add_node(end_repeat_node, subargs.start); + add_node(end_repeat_node, end_node); + args->end = end_node; + } + } + + return RE_ERROR_SUCCESS; +} + +/* Builds a STRING node. */ +Py_LOCAL_INLINE(int) build_STRING(RE_CompileArgs* args, BOOL is_charset) { + RE_CODE flags; + RE_CODE length; + RE_UINT8 op; + Py_ssize_t step; + RE_Node* node; + RE_CODE i; + + /* codes: opcode, flags, length, characters. */ + flags = args->code[1]; + length = args->code[2]; + if (args->code + 3 + length > args->end_code) + return RE_ERROR_ILLEGAL; + + op = (RE_UINT8)args->code[0]; + + step = get_step(op); + + /* Create the node. */ + node = create_node(args->pattern, op, flags, step * (Py_ssize_t)length, + length); + if (!node) + return RE_ERROR_MEMORY; + if (!is_charset) + node->status |= RE_STATUS_STRING; + + for (i = 0; i < length; i++) + node->values[i] = args->code[3 + i]; + + args->code += 3 + length; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + /* Because of full case-folding, one character in the text could match + * multiple characters in the pattern. + */ + if (op == RE_OP_STRING_FLD || op == RE_OP_STRING_FLD_REV) + args->min_width += possible_unfolded_length(length); + else + args->min_width += length; + + return RE_ERROR_SUCCESS; +} + +/* Builds a SET node. */ +Py_LOCAL_INLINE(int) build_SET(RE_CompileArgs* args) { + RE_UINT8 op; + RE_CODE flags; + Py_ssize_t step; + RE_Node* node; + size_t saved_min_width; + int status; + + /* codes: opcode, flags, members. */ + op = (RE_UINT8)args->code[0]; + flags = args->code[1]; + + step = get_step(op); + + if (flags & RE_ZEROWIDTH_OP) + step = 0; + + node = create_node(args->pattern, op, flags, step, 0); + if (!node) + return RE_ERROR_MEMORY; + + args->code += 2; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + saved_min_width = args->min_width; + + /* Compile the character set. */ + do { + switch (args->code[0]) { + case RE_OP_CHARACTER: + case RE_OP_PROPERTY: + status = build_CHARACTER_or_PROPERTY(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_RANGE: + status = build_RANGE(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_SET_DIFF: + case RE_OP_SET_INTER: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_UNION: + status = build_SET(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_STRING: + /* A set of characters. */ + if (!build_STRING(args, TRUE)) + return FALSE; + break; + default: + /* Illegal opcode for a character set. */ + return RE_ERROR_ILLEGAL; + } + } while (args->code < args->end_code && args->code[0] != RE_OP_END); + + /* Check that we've reached the end correctly. (The last opcode should be + * 'END'.) + */ + if (args->code >= args->end_code || args->code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + ++args->code; + + /* At this point the set's members are in the main sequence. They need to + * be moved out-of-line. + */ + node->nonstring.next_2.node = node->next_1.node; + node->next_1.node = NULL; + args->end = node; + + args->min_width = saved_min_width; + + if (step != 0) + ++args->min_width; + + return RE_ERROR_SUCCESS; +} + +/* Builds a STRING_SET node. */ +Py_LOCAL_INLINE(int) build_STRING_SET(RE_CompileArgs* args) { + RE_CODE index; + RE_CODE min_len; + RE_CODE max_len; + RE_Node* node; + + /* codes: opcode, index, min_len, max_len. */ + if (args->code + 3 > args->end_code) + return RE_ERROR_ILLEGAL; + + index = args->code[1]; + min_len = args->code[2]; + max_len = args->code[3]; + node = create_node(args->pattern, (RE_UINT8)args->code[0], 0, 0, 3); + if (!node) + return RE_ERROR_MEMORY; + + node->values[0] = index; + node->values[1] = min_len; + node->values[2] = max_len; + + args->code += 4; + + /* Append the reference. */ + add_node(args->end, node); + args->end = node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a SUCCESS node . */ +Py_LOCAL_INLINE(int) build_SUCCESS(RE_CompileArgs* args) { + RE_Node* node; + /* code: opcode. */ + + /* Create the node. */ + node = create_node(args->pattern, RE_OP_SUCCESS, 0, 0, 0); + if (!node) + return RE_ERROR_MEMORY; + + ++args->code; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a zero-width node. */ +Py_LOCAL_INLINE(int) build_zerowidth(RE_CompileArgs* args) { + RE_CODE flags; + RE_Node* node; + + /* codes: opcode, flags. */ + if (args->code + 1 > args->end_code) + return RE_ERROR_ILLEGAL; + + flags = args->code[1]; + + /* Create the node. */ + node = create_node(args->pattern, (RE_UINT8)args->code[0], flags, 0, 0); + if (!node) + return RE_ERROR_MEMORY; + + args->code += 2; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a sequence of nodes from regular expression code. */ +Py_LOCAL_INLINE(int) build_sequence(RE_CompileArgs* args) { + int status; + + /* Guarantee that there's something to attach to. */ + args->start = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); + args->end = args->start; + + /* The sequence should end with an opcode we don't understand. If it + * doesn't then the code is illegal. + */ + while (args->code < args->end_code) { + /* The following code groups opcodes by format, not function. */ + switch (args->code[0]) { + case RE_OP_ANY: + case RE_OP_ANY_ALL: + case RE_OP_ANY_ALL_REV: + case RE_OP_ANY_REV: + case RE_OP_ANY_U: + case RE_OP_ANY_U_REV: + /* A simple opcode with no trailing codewords and width of 1. */ + status = build_ANY(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_ATOMIC: + /* An atomic sequence. */ + status = build_ATOMIC(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_BOUNDARY: + case RE_OP_DEFAULT_BOUNDARY: + case RE_OP_DEFAULT_END_OF_WORD: + case RE_OP_DEFAULT_START_OF_WORD: + case RE_OP_END_OF_WORD: + case RE_OP_GRAPHEME_BOUNDARY: + case RE_OP_START_OF_WORD: + /* A word or grapheme boundary. */ + status = build_BOUNDARY(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_BRANCH: + /* A 2-way branch. */ + status = build_BRANCH(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_CALL_REF: + /* A group call ref. */ + status = build_CALL_REF(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_CHARACTER: + case RE_OP_CHARACTER_IGN: + case RE_OP_CHARACTER_IGN_REV: + case RE_OP_CHARACTER_REV: + case RE_OP_PROPERTY: + case RE_OP_PROPERTY_IGN: + case RE_OP_PROPERTY_IGN_REV: + case RE_OP_PROPERTY_REV: + /* A character literal or a property. */ + status = build_CHARACTER_or_PROPERTY(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_END_OF_LINE: + case RE_OP_END_OF_LINE_U: + case RE_OP_END_OF_STRING: + case RE_OP_END_OF_STRING_LINE: + case RE_OP_END_OF_STRING_LINE_U: + case RE_OP_SEARCH_ANCHOR: + case RE_OP_START_OF_LINE: + case RE_OP_START_OF_LINE_U: + case RE_OP_START_OF_STRING: + /* A simple opcode with no trailing codewords and width of 0. */ + status = build_zerowidth(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_FUZZY: + /* A fuzzy sequence. */ + status = build_FUZZY(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_GREEDY_REPEAT: + case RE_OP_LAZY_REPEAT: + /* A repeated sequence. */ + status = build_REPEAT(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_GROUP: + /* A capture group. */ + status = build_GROUP(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_GROUP_CALL: + /* A group call. */ + status = build_GROUP_CALL(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_GROUP_EXISTS: + /* A conditional sequence. */ + status = build_GROUP_EXISTS(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_LOOKAROUND: + /* A lookaround. */ + status = build_LOOKAROUND(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_RANGE: + case RE_OP_RANGE_IGN: + case RE_OP_RANGE_IGN_REV: + case RE_OP_RANGE_REV: + /* A range. */ + status = build_RANGE(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_REF_GROUP: + case RE_OP_REF_GROUP_FLD: + case RE_OP_REF_GROUP_FLD_REV: + case RE_OP_REF_GROUP_IGN: + case RE_OP_REF_GROUP_IGN_REV: + case RE_OP_REF_GROUP_REV: + /* A reference to a group. */ + status = build_REF_GROUP(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_SET_DIFF: + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_DIFF_IGN_REV: + case RE_OP_SET_DIFF_REV: + case RE_OP_SET_INTER: + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION: + case RE_OP_SET_UNION_IGN: + case RE_OP_SET_UNION_IGN_REV: + case RE_OP_SET_UNION_REV: + /* A set. */ + status = build_SET(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_STRING: + case RE_OP_STRING_FLD: + case RE_OP_STRING_FLD_REV: + case RE_OP_STRING_IGN: + case RE_OP_STRING_IGN_REV: + case RE_OP_STRING_REV: + /* A string literal. */ + if (!build_STRING(args, FALSE)) + return FALSE; + break; + case RE_OP_STRING_SET: + case RE_OP_STRING_SET_FLD: + case RE_OP_STRING_SET_FLD_REV: + case RE_OP_STRING_SET_IGN: + case RE_OP_STRING_SET_IGN_REV: + case RE_OP_STRING_SET_REV: + /* A reference to a list. */ + status = build_STRING_SET(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_SUCCESS: + /* Success. */ + status = build_SUCCESS(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + default: + /* We've found an opcode which we don't recognise. We'll leave it + * for the caller. + */ + return RE_ERROR_SUCCESS; + } + } + + /* If we're here then we should be at the end of the code, otherwise we + * have an error. + */ + return args->code == args->end_code; +} + +/* Compiles the regular expression code to 'nodes'. + * + * Various details about the regular expression are discovered during + * compilation and stored in the PatternObject. + */ +Py_LOCAL_INLINE(BOOL) compile_to_nodes(RE_CODE* code, RE_CODE* end_code, + PatternObject* pattern) { + RE_CompileArgs args; + int status; + + /* Compile a regex sequence and then check that we've reached the end + * correctly. (The last opcode should be 'SUCCESS'.) + * + * If successful, 'start' and 'end' will point to the start and end nodes + * of the compiled sequence. + */ + args.code = code; + args.end_code = end_code; + args.pattern = pattern; + args.forward = (pattern->flags & RE_FLAG_REVERSE) == 0; + args.min_width = 0; + args.visible_captures = FALSE; + args.has_captures = FALSE; + args.repeat_depth = 0; + args.is_fuzzy = FALSE; + args.within_fuzzy = FALSE; + status = build_sequence(&args); + if (status == RE_ERROR_ILLEGAL) + set_error(RE_ERROR_ILLEGAL, NULL); + + if (status != RE_ERROR_SUCCESS) + return FALSE; + + pattern->min_width = args.min_width; + pattern->is_fuzzy = args.is_fuzzy; + pattern->do_search_start = TRUE; + pattern->start_node = args.start; + + /* Optimise the pattern. */ + if (!optimise_pattern(pattern)) + return FALSE; + + pattern->start_test = locate_test_start(pattern->start_node); + + /* Get the call_ref for the entire pattern, if any. */ + if (pattern->start_node->op == RE_OP_CALL_REF) + pattern->pattern_call_ref = pattern->start_node->values[0]; + else + pattern->pattern_call_ref = -1; + + return TRUE; +} + +/* Gets the required characters for a regex. + * + * In the event of an error, it just pretends that there are no required + * characters. + */ +Py_LOCAL_INLINE(void) get_required_chars(PyObject* required_chars, RE_CODE** + req_chars, Py_ssize_t* req_length) { + Py_ssize_t len; + RE_CODE* chars; + Py_ssize_t i; + + *req_chars = NULL; + *req_length = 0; + + len = PyTuple_GET_SIZE(required_chars); + if (len < 1 || PyErr_Occurred()) { + PyErr_Clear(); + return; + } + + chars = (RE_CODE*)re_alloc(len * sizeof(RE_CODE)); + if (!chars) + goto error; + + for (i = 0; i < len; i++) { + PyObject* o = PyTuple_GET_ITEM(required_chars, i); + size_t value; + + value = PyLong_AsUnsignedLong(o); + if (value == -1 && PyErr_Occurred()) + goto error; + + chars[i] = (RE_CODE)value; + if (chars[i] != value) + goto error; + } + + *req_chars = chars; + *req_length = len; + + return; + +error: + PyErr_Clear(); + re_dealloc(chars); +} + +/* Makes a STRING node. */ +Py_LOCAL_INLINE(RE_Node*) make_STRING_node(PatternObject* pattern, RE_UINT8 op, + Py_ssize_t length, RE_CODE* chars) { + Py_ssize_t step; + RE_Node* node; + Py_ssize_t i; + + step = get_step(op); + + /* Create the node. */ + node = create_node(pattern, op, 0, step * length, length); + if (!node) + return NULL; + + node->status |= RE_STATUS_STRING; + + for (i = 0; i < length; i++) + node->values[i] = chars[i]; + + return node; +} + +/* Compiles regular expression code to a PatternObject. + * + * The regular expression code is provided as a list and is then compiled to + * 'nodes'. Various details about the regular expression are discovered during + * compilation and stored in the PatternObject. + */ +static PyObject* re_compile(PyObject* self_, PyObject* args) { + PyObject* pattern; + Py_ssize_t flags = 0; + PyObject* code_list; + PyObject* groupindex; + PyObject* indexgroup; + PyObject* named_lists; + PyObject* named_list_indexes; + Py_ssize_t req_offset; + PyObject* required_chars; + Py_ssize_t req_length; + RE_CODE* req_chars; + Py_ssize_t req_flags; + Py_ssize_t public_group_count; + Py_ssize_t code_len; + RE_CODE* code; + Py_ssize_t i; + PatternObject* self; + BOOL ascii; + BOOL locale; + BOOL unicode; + BOOL ok; + + if (!PyArg_ParseTuple(args, "OnOOOOOnOnn:re_compile", &pattern, &flags, + &code_list, &groupindex, &indexgroup, &named_lists, &named_list_indexes, + &req_offset, &required_chars, &req_flags, &public_group_count)) + return NULL; + + /* Read the regex code. */ + code_len = PyList_GET_SIZE(code_list); + code = (RE_CODE*)re_alloc(code_len * sizeof(RE_CODE)); + if (!code) + return NULL; + + for (i = 0; i < code_len; i++) { + PyObject* o = PyList_GET_ITEM(code_list, i); + size_t value; + + value = PyLong_AsUnsignedLong(o); + if (value == -1 && PyErr_Occurred()) + goto error; + + code[i] = (RE_CODE)value; + if (code[i] != value) + goto error; + } + + /* Get the required characters. */ + get_required_chars(required_chars, &req_chars, &req_length); + + /* Create the PatternObject. */ + self = PyObject_NEW(PatternObject, &Pattern_Type); + if (!self) { + set_error(RE_ERROR_MEMORY, NULL); + re_dealloc(req_chars); + re_dealloc(code); + return NULL; + } + + /* Initialise the PatternObject. */ + self->pattern = pattern; + self->flags = (RE_CODE)flags; + self->weakreflist = NULL; + self->start_node = NULL; + self->repeat_count = 0; + self->true_group_count = 0; + self->public_group_count = public_group_count; + self->group_end_index = 0; + self->groupindex = groupindex; + self->indexgroup = indexgroup; + self->named_lists = named_lists; + self->named_list_indexes = named_list_indexes; + self->node_capacity = 0; + self->node_count = 0; + self->node_list = NULL; + self->group_info_capacity = 0; + self->group_info = NULL; + self->call_ref_info_capacity = 0; + self->call_ref_info_count = 0; + self->call_ref_info = NULL; + self->repeat_info_capacity = 0; + self->repeat_info = NULL; + self->groups_storage = NULL; + self->repeats_storage = NULL; + self->fuzzy_count = 0; + self->recursive = FALSE; + self->req_offset = req_offset; + self->req_string = NULL; + Py_INCREF(self->pattern); + Py_INCREF(self->groupindex); + Py_INCREF(self->indexgroup); + Py_INCREF(self->named_lists); + Py_INCREF(self->named_list_indexes); + + /* Initialise the character encoding. */ + unicode = (flags & RE_FLAG_UNICODE) != 0; + locale = (flags & RE_FLAG_LOCALE) != 0; + ascii = (flags & RE_FLAG_ASCII) != 0; + if (!unicode && !locale && !ascii) { + if (PyString_Check(self->pattern)) + ascii = RE_FLAG_ASCII; + else + unicode = RE_FLAG_UNICODE; + } + if (unicode) + self->encoding = &unicode_encoding; + else if (locale) + self->encoding = &locale_encoding; + else if (ascii) + self->encoding = &ascii_encoding; + + /* Compile the regular expression code to nodes. */ + ok = compile_to_nodes(code, code + code_len, self); + + /* We no longer need the regular expression code. */ + re_dealloc(code); + + if (!ok) { + Py_DECREF(self); + re_dealloc(req_chars); + return NULL; + } + + /* Make a node for the required string, if there's one. */ + if (req_chars) { + /* Remove the FULLCASE flag if it's not a Unicode pattern. */ + if (!(self->flags & RE_FLAG_UNICODE)) + req_flags &= ~RE_FLAG_FULLCASE; + + if (self->flags & RE_FLAG_REVERSE) { + switch (req_flags) { + case 0: + self->req_string = make_STRING_node(self, RE_OP_STRING_REV, + req_length, req_chars); + break; + case RE_FLAG_IGNORECASE | RE_FLAG_FULLCASE: + self->req_string = make_STRING_node(self, RE_OP_STRING_FLD_REV, + req_length, req_chars); + break; + case RE_FLAG_IGNORECASE: + self->req_string = make_STRING_node(self, RE_OP_STRING_IGN_REV, + req_length, req_chars); + break; + } + } else { + switch (req_flags) { + case 0: + self->req_string = make_STRING_node(self, RE_OP_STRING, + req_length, req_chars); + break; + case RE_FLAG_IGNORECASE | RE_FLAG_FULLCASE: + self->req_string = make_STRING_node(self, RE_OP_STRING_FLD, + req_length, req_chars); + break; + case RE_FLAG_IGNORECASE: + self->req_string = make_STRING_node(self, RE_OP_STRING_IGN, + req_length, req_chars); + break; + } + } + + re_dealloc(req_chars); + } + + return (PyObject*)self; + +error: + re_dealloc(code); + set_error(RE_ERROR_ILLEGAL, NULL); + return NULL; +} + +/* Gets the size of the codewords. */ +static PyObject* get_code_size(PyObject* self, PyObject* unused) { + return Py_BuildValue("n", sizeof(RE_CODE)); +} + +/* Gets the property dict. */ +static PyObject* get_properties(PyObject* self_, PyObject* args) { + Py_INCREF(property_dict); + + return property_dict; +} + +/* Folds the case of a string. */ +static PyObject* fold_case(PyObject* self_, PyObject* args) { + RE_StringInfo str_info; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + Py_ssize_t folded_charsize; + void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); + RE_EncodingTable* encoding; + Py_ssize_t buf_size; + void* folded; + Py_ssize_t folded_len; + PyObject* result; + + Py_ssize_t flags; + PyObject* string; + if (!PyArg_ParseTuple(args, "nO:fold_case", &flags, &string)) + return NULL; + + if (!(flags & RE_FLAG_IGNORECASE)) { + Py_INCREF(string); + return string; + } + + /* Get the string. */ + if (!get_string(string, &str_info)) + return NULL; + + /* Get the function for reading from the original string. */ + switch (str_info.charsize) { + case 1: + char_at = bytes1_char_at; + break; + case 2: + char_at = bytes2_char_at; + break; + case 4: + char_at = bytes4_char_at; + break; + default: +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return NULL; + } + + /* What's the encoding? */ + if (flags & RE_FLAG_UNICODE) + encoding = &unicode_encoding; + else if (flags & RE_FLAG_LOCALE) + encoding = &locale_encoding; + else if (flags & RE_FLAG_ASCII) + encoding = &ascii_encoding; + else + encoding = &unicode_encoding; + + /* The folded string will have the same width as the original string. */ + folded_charsize = str_info.charsize; + + /* Get the function for writing to the folded string. */ + switch (folded_charsize) { + case 1: + set_char_at = bytes1_set_char_at; + break; + case 2: + set_char_at = bytes2_set_char_at; + break; + case 4: + set_char_at = bytes4_set_char_at; + break; + default: +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return NULL; + } + + /* Allocate a buffer for the folded string. */ + if (flags & RE_FLAG_FULLCASE) + /* When using full case-folding with Unicode, some single codepoints + * are transformed to sequences of codepoints. + */ + buf_size = str_info.length * RE_MAX_FOLDED; + else + buf_size = str_info.length; + + folded = re_alloc(buf_size * folded_charsize); + if (!folded) { +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return NULL; + } + + /* Fold the case of the string. */ + folded_len = 0; + + if (flags & RE_FLAG_FULLCASE) { + /* Full case-folding. */ + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + Py_ssize_t i; + Py_UCS4 codepoints[RE_MAX_FOLDED]; + + full_case_fold = encoding->full_case_fold; + + for (i = 0; i < str_info.length; i++) { + int count; + int j; + + count = full_case_fold(char_at(str_info.characters, i), + codepoints); + for (j = 0; j < count; j++) + set_char_at(folded, folded_len + j, codepoints[j]); + + folded_len += count; + } + } else { + /* Simple case-folding. */ + Py_UCS4 (*simple_case_fold)(Py_UCS4 ch); + Py_ssize_t i; + + simple_case_fold = encoding->simple_case_fold; + + for (i = 0; i < str_info.length; i++) { + Py_UCS4 ch; + + ch = simple_case_fold(char_at(str_info.characters, i)); + set_char_at(folded, i, ch); + } + + folded_len = str_info.length; + } + + /* Build the result string. */ + if (str_info.is_unicode) + result = build_unicode_value(folded, folded_len, folded_charsize); + else + result = build_bytes_value(folded, folded_len, folded_charsize); + + re_dealloc(folded); + +#if PY_VERSION_HEX >= 0x02060000 + /* Release the original string's buffer. */ + release_buffer(&str_info); + +#endif + return result; +} + +/* Returns a tuple of the Unicode characters which expand on full case-folding. + */ +static PyObject* get_expand_on_folding(PyObject* self, PyObject* unused) { + int count; + int i; + PyObject* result; + + /* How many characters are there? */ + count = sizeof(re_expand_on_folding) / sizeof(re_expand_on_folding[0]); + + /* Put all the characters in a tuple. */ + result = PyTuple_New(count); + if (!result) + goto error; + + for (i = 0; i < count; i++) { + Py_UNICODE codepoint; + PyObject* item; + + codepoint = re_expand_on_folding[i]; + + item = build_unicode_value(&codepoint, 1, sizeof(codepoint)); + if (!item) + goto error; + + PyTuple_SetItem(result, i, item); + } + + return result; + +error: + Py_XDECREF(result); + + return NULL; +} + +/* Returns whether a character has a given value for a Unicode property. */ +static PyObject* has_property_value(PyObject* self_, PyObject* args) { + BOOL v; + + Py_ssize_t property_value; + Py_ssize_t character; + if (!PyArg_ParseTuple(args, "nn:has_property_value", &property_value, + &character)) + return NULL; + + v = unicode_has_property((RE_CODE)property_value, (Py_UCS4)character) ? 1 : + 0; + + return Py_BuildValue("n", v); +} + +/* Returns a list all the simple cases of a character. + * + * If full case-folding is turned on and the character also expands on full + * case-folding, a None is appended to the list. + */ +static PyObject* get_all_cases(PyObject* self_, PyObject* args) { + RE_EncodingTable* encoding; + int count; + Py_UCS4 cases[RE_MAX_CASES]; + Py_UCS4 folded[RE_MAX_FOLDED]; + PyObject* result; + int i; + + Py_ssize_t flags; + Py_ssize_t character; + if (!PyArg_ParseTuple(args, "nn:get_all_cases", &flags, &character)) + return NULL; + + /* What's the encoding? */ + if (flags & RE_FLAG_UNICODE) + encoding = &unicode_encoding; + else if (flags & RE_FLAG_LOCALE) + encoding = &locale_encoding; + else if (flags & RE_FLAG_ASCII) + encoding = &ascii_encoding; + else + encoding = &ascii_encoding; + + /* Get all the simple cases. */ + count = encoding->all_cases((Py_UCS4)character, cases); + + result = PyList_New(count); + if (!result) + goto error; + + for (i = 0; i < count; i++) { + PyObject* item; + + item = Py_BuildValue("n", cases[i]); + if (!item) + goto error; + + PyList_SetItem(result, i, item); + } + + /* If the character also expands on full case-folding, append a None. */ + if ((flags & RE_FULL_CASE_FOLDING) == RE_FULL_CASE_FOLDING) { + count = encoding->full_case_fold((Py_UCS4)character, folded); + if (count > 1) + PyList_Append(result, Py_None); + } + + return result; + +error: + Py_XDECREF(result); + + return NULL; +} + +/* The table of the module's functions. */ +static PyMethodDef _functions[] = { + {"compile", (PyCFunction)re_compile, METH_VARARGS}, + {"get_code_size", (PyCFunction)get_code_size, METH_NOARGS}, + {"get_properties", (PyCFunction)get_properties, METH_VARARGS}, + {"fold_case", (PyCFunction)fold_case, METH_VARARGS}, + {"get_expand_on_folding", (PyCFunction)get_expand_on_folding, METH_NOARGS}, + {"has_property_value", (PyCFunction)has_property_value, METH_VARARGS}, + {"get_all_cases", (PyCFunction)get_all_cases, METH_VARARGS}, + {NULL, NULL} +}; + +/* Initialises the property dictionary. */ +static BOOL init_property_dict(void) { + int value_set_count; + int i; + PyObject** value_dicts; + + property_dict = NULL; + + /* How many value sets are there? */ + value_set_count = 0; + + for (i = 0; i < sizeof(re_property_values) / sizeof(re_property_values[0]); + i++) { + RE_PropertyValue* value; + + value = &re_property_values[i]; + if (value->value_set >= value_set_count) + value_set_count = value->value_set + 1; + } + + /* Quick references for the value sets. */ + value_dicts = (PyObject**)re_alloc(value_set_count * + sizeof(value_dicts[0])); + if (!value_dicts) + return FALSE; + + memset(value_dicts, 0, value_set_count * sizeof(value_dicts[0])); + + /* Build the property values dictionaries. */ + for (i = 0; i < sizeof(re_property_values) / sizeof(re_property_values[0]); + i++) { + RE_PropertyValue* value; + PyObject* v; + + value = &re_property_values[i]; + if (!value_dicts[value->value_set]) { + value_dicts[value->value_set] = PyDict_New(); + if (!value_dicts[value->value_set]) + goto error; + } + + v = Py_BuildValue("i", value->id); + if (!v) + goto error; + + PyDict_SetItemString(value_dicts[value->value_set], + re_strings[value->name], v); + } + + /* Build the property dictionary. */ + property_dict = PyDict_New(); + if (!property_dict) + goto error; + + for (i = 0; i < sizeof(re_properties) / sizeof(re_properties[0]); i++) { + RE_Property* property; + PyObject* v; + + property = &re_properties[i]; + v = Py_BuildValue("iO", property->id, + value_dicts[property->value_set]); + if (!v) + goto error; + + PyDict_SetItemString(property_dict, re_strings[property->name], v); + } + + /* DECREF the value sets. Any unused ones will be deallocated. */ + for (i = 0; i < value_set_count; i++) + Py_XDECREF(value_dicts[i]); + + re_dealloc(value_dicts); + + return TRUE; + +error: + Py_XDECREF(property_dict); + + /* DECREF the value sets. */ + for (i = 0; i < value_set_count; i++) + Py_XDECREF(value_dicts[i]); + + re_dealloc(value_dicts); + + return FALSE; +} + +/* Initialises the module. */ +PyMODINIT_FUNC init_regex(void) { + PyObject* m; + PyObject* d; + PyObject* x; +#if defined(VERBOSE) + /* Unbuffered in case it crashes! */ + setvbuf(stdout, NULL, _IONBF, 0); +#endif + + /* Initialise Pattern_Type. */ + Pattern_Type.tp_dealloc = pattern_dealloc; + Pattern_Type.tp_repr = pattern_repr; + Pattern_Type.tp_flags = Py_TPFLAGS_HAVE_WEAKREFS; + Pattern_Type.tp_doc = pattern_doc; + Pattern_Type.tp_weaklistoffset = offsetof(PatternObject, weakreflist); + Pattern_Type.tp_methods = pattern_methods; + Pattern_Type.tp_members = pattern_members; + Pattern_Type.tp_getset = pattern_getset; + + /* Initialise Match_Type. */ + Match_Type.tp_dealloc = match_dealloc; + Match_Type.tp_as_mapping = &match_as_mapping; + Match_Type.tp_flags = Py_TPFLAGS_DEFAULT; + Match_Type.tp_doc = match_doc; + Match_Type.tp_methods = match_methods; + Match_Type.tp_members = match_members; + Match_Type.tp_getset = match_getset; + + /* Initialise Scanner_Type. */ + Scanner_Type.tp_dealloc = scanner_dealloc; + Scanner_Type.tp_flags = Py_TPFLAGS_DEFAULT; + Scanner_Type.tp_doc = scanner_doc; + Scanner_Type.tp_iter = scanner_iter; + Scanner_Type.tp_iternext = scanner_iternext; + Scanner_Type.tp_methods = scanner_methods; + Scanner_Type.tp_members = scanner_members; + + /* Initialise Splitter_Type. */ + Splitter_Type.tp_dealloc = splitter_dealloc; + Splitter_Type.tp_flags = Py_TPFLAGS_DEFAULT; + Splitter_Type.tp_doc = splitter_doc; + Splitter_Type.tp_iter = splitter_iter; + Splitter_Type.tp_iternext = splitter_iternext; + Splitter_Type.tp_methods = splitter_methods; + Splitter_Type.tp_members = splitter_members; + + /* Initialize object types */ + if (PyType_Ready(&Pattern_Type) < 0) + return; + if (PyType_Ready(&Match_Type) < 0) + return; + if (PyType_Ready(&Scanner_Type) < 0) + return; + if (PyType_Ready(&Splitter_Type) < 0) + return; + + error_exception = NULL; + + m = Py_InitModule("_" RE_MODULE, _functions); + if (!m) + return; + + d = PyModule_GetDict(m); + + x = PyInt_FromLong(RE_MAGIC); + if (x) { + PyDict_SetItemString(d, "MAGIC", x); + Py_DECREF(x); + } + + x = PyInt_FromLong(sizeof(RE_CODE)); + if (x) { + PyDict_SetItemString(d, "CODE_SIZE", x); + Py_DECREF(x); + } + + x = PyString_FromString(copyright); + if (x) { + PyDict_SetItemString(d, "copyright", x); + Py_DECREF(x); + } + + /* Initialise the property dictionary. */ + if (!init_property_dict()) + return; +} + +/* vim:ts=4:sw=4:et */ diff --git a/src/regex/_regex.h b/src/regex/_regex.h new file mode 100644 index 000000000000..33dc1540b1fa --- /dev/null +++ b/src/regex/_regex.h @@ -0,0 +1,228 @@ +/* + * Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. + * + * NOTE: This file is generated by regex.py. If you need + * to change anything in here, edit regex.py and run it. + * + * 2010-01-16 mrab Re-written + */ + +/* Supports Unicode version 6.3.0. */ + +#define RE_MAGIC 20100116 + +#include "_regex_unicode.h" + +/* Operators. */ +#define RE_OP_FAILURE 0 +#define RE_OP_SUCCESS 1 +#define RE_OP_ANY 2 +#define RE_OP_ANY_ALL 3 +#define RE_OP_ANY_ALL_REV 4 +#define RE_OP_ANY_REV 5 +#define RE_OP_ANY_U 6 +#define RE_OP_ANY_U_REV 7 +#define RE_OP_ATOMIC 8 +#define RE_OP_BOUNDARY 9 +#define RE_OP_BRANCH 10 +#define RE_OP_CALL_REF 11 +#define RE_OP_CHARACTER 12 +#define RE_OP_CHARACTER_IGN 13 +#define RE_OP_CHARACTER_IGN_REV 14 +#define RE_OP_CHARACTER_REV 15 +#define RE_OP_DEFAULT_BOUNDARY 16 +#define RE_OP_DEFAULT_END_OF_WORD 17 +#define RE_OP_DEFAULT_START_OF_WORD 18 +#define RE_OP_END 19 +#define RE_OP_END_OF_LINE 20 +#define RE_OP_END_OF_LINE_U 21 +#define RE_OP_END_OF_STRING 22 +#define RE_OP_END_OF_STRING_LINE 23 +#define RE_OP_END_OF_STRING_LINE_U 24 +#define RE_OP_END_OF_WORD 25 +#define RE_OP_FUZZY 26 +#define RE_OP_GRAPHEME_BOUNDARY 27 +#define RE_OP_GREEDY_REPEAT 28 +#define RE_OP_GROUP 29 +#define RE_OP_GROUP_CALL 30 +#define RE_OP_GROUP_EXISTS 31 +#define RE_OP_LAZY_REPEAT 32 +#define RE_OP_LOOKAROUND 33 +#define RE_OP_NEXT 34 +#define RE_OP_PROPERTY 35 +#define RE_OP_PROPERTY_IGN 36 +#define RE_OP_PROPERTY_IGN_REV 37 +#define RE_OP_PROPERTY_REV 38 +#define RE_OP_RANGE 39 +#define RE_OP_RANGE_IGN 40 +#define RE_OP_RANGE_IGN_REV 41 +#define RE_OP_RANGE_REV 42 +#define RE_OP_REF_GROUP 43 +#define RE_OP_REF_GROUP_FLD 44 +#define RE_OP_REF_GROUP_FLD_REV 45 +#define RE_OP_REF_GROUP_IGN 46 +#define RE_OP_REF_GROUP_IGN_REV 47 +#define RE_OP_REF_GROUP_REV 48 +#define RE_OP_SEARCH_ANCHOR 49 +#define RE_OP_SET_DIFF 50 +#define RE_OP_SET_DIFF_IGN 51 +#define RE_OP_SET_DIFF_IGN_REV 52 +#define RE_OP_SET_DIFF_REV 53 +#define RE_OP_SET_INTER 54 +#define RE_OP_SET_INTER_IGN 55 +#define RE_OP_SET_INTER_IGN_REV 56 +#define RE_OP_SET_INTER_REV 57 +#define RE_OP_SET_SYM_DIFF 58 +#define RE_OP_SET_SYM_DIFF_IGN 59 +#define RE_OP_SET_SYM_DIFF_IGN_REV 60 +#define RE_OP_SET_SYM_DIFF_REV 61 +#define RE_OP_SET_UNION 62 +#define RE_OP_SET_UNION_IGN 63 +#define RE_OP_SET_UNION_IGN_REV 64 +#define RE_OP_SET_UNION_REV 65 +#define RE_OP_START_OF_LINE 66 +#define RE_OP_START_OF_LINE_U 67 +#define RE_OP_START_OF_STRING 68 +#define RE_OP_START_OF_WORD 69 +#define RE_OP_STRING 70 +#define RE_OP_STRING_FLD 71 +#define RE_OP_STRING_FLD_REV 72 +#define RE_OP_STRING_IGN 73 +#define RE_OP_STRING_IGN_REV 74 +#define RE_OP_STRING_REV 75 +#define RE_OP_STRING_SET 76 +#define RE_OP_STRING_SET_FLD 77 +#define RE_OP_STRING_SET_FLD_REV 78 +#define RE_OP_STRING_SET_IGN 79 +#define RE_OP_STRING_SET_IGN_REV 80 +#define RE_OP_STRING_SET_REV 81 +#define RE_OP_BODY_END 82 +#define RE_OP_BODY_START 83 +#define RE_OP_END_FUZZY 84 +#define RE_OP_END_GREEDY_REPEAT 85 +#define RE_OP_END_GROUP 86 +#define RE_OP_END_LAZY_REPEAT 87 +#define RE_OP_GREEDY_REPEAT_ONE 88 +#define RE_OP_GROUP_RETURN 89 +#define RE_OP_LAZY_REPEAT_ONE 90 +#define RE_OP_MATCH_BODY 91 +#define RE_OP_MATCH_TAIL 92 +#define RE_OP_START_GROUP 93 + +char* re_op_text[] = { + "RE_OP_FAILURE", + "RE_OP_SUCCESS", + "RE_OP_ANY", + "RE_OP_ANY_ALL", + "RE_OP_ANY_ALL_REV", + "RE_OP_ANY_REV", + "RE_OP_ANY_U", + "RE_OP_ANY_U_REV", + "RE_OP_ATOMIC", + "RE_OP_BOUNDARY", + "RE_OP_BRANCH", + "RE_OP_CALL_REF", + "RE_OP_CHARACTER", + "RE_OP_CHARACTER_IGN", + "RE_OP_CHARACTER_IGN_REV", + "RE_OP_CHARACTER_REV", + "RE_OP_DEFAULT_BOUNDARY", + "RE_OP_DEFAULT_END_OF_WORD", + "RE_OP_DEFAULT_START_OF_WORD", + "RE_OP_END", + "RE_OP_END_OF_LINE", + "RE_OP_END_OF_LINE_U", + "RE_OP_END_OF_STRING", + "RE_OP_END_OF_STRING_LINE", + "RE_OP_END_OF_STRING_LINE_U", + "RE_OP_END_OF_WORD", + "RE_OP_FUZZY", + "RE_OP_GRAPHEME_BOUNDARY", + "RE_OP_GREEDY_REPEAT", + "RE_OP_GROUP", + "RE_OP_GROUP_CALL", + "RE_OP_GROUP_EXISTS", + "RE_OP_LAZY_REPEAT", + "RE_OP_LOOKAROUND", + "RE_OP_NEXT", + "RE_OP_PROPERTY", + "RE_OP_PROPERTY_IGN", + "RE_OP_PROPERTY_IGN_REV", + "RE_OP_PROPERTY_REV", + "RE_OP_RANGE", + "RE_OP_RANGE_IGN", + "RE_OP_RANGE_IGN_REV", + "RE_OP_RANGE_REV", + "RE_OP_REF_GROUP", + "RE_OP_REF_GROUP_FLD", + "RE_OP_REF_GROUP_FLD_REV", + "RE_OP_REF_GROUP_IGN", + "RE_OP_REF_GROUP_IGN_REV", + "RE_OP_REF_GROUP_REV", + "RE_OP_SEARCH_ANCHOR", + "RE_OP_SET_DIFF", + "RE_OP_SET_DIFF_IGN", + "RE_OP_SET_DIFF_IGN_REV", + "RE_OP_SET_DIFF_REV", + "RE_OP_SET_INTER", + "RE_OP_SET_INTER_IGN", + "RE_OP_SET_INTER_IGN_REV", + "RE_OP_SET_INTER_REV", + "RE_OP_SET_SYM_DIFF", + "RE_OP_SET_SYM_DIFF_IGN", + "RE_OP_SET_SYM_DIFF_IGN_REV", + "RE_OP_SET_SYM_DIFF_REV", + "RE_OP_SET_UNION", + "RE_OP_SET_UNION_IGN", + "RE_OP_SET_UNION_IGN_REV", + "RE_OP_SET_UNION_REV", + "RE_OP_START_OF_LINE", + "RE_OP_START_OF_LINE_U", + "RE_OP_START_OF_STRING", + "RE_OP_START_OF_WORD", + "RE_OP_STRING", + "RE_OP_STRING_FLD", + "RE_OP_STRING_FLD_REV", + "RE_OP_STRING_IGN", + "RE_OP_STRING_IGN_REV", + "RE_OP_STRING_REV", + "RE_OP_STRING_SET", + "RE_OP_STRING_SET_FLD", + "RE_OP_STRING_SET_FLD_REV", + "RE_OP_STRING_SET_IGN", + "RE_OP_STRING_SET_IGN_REV", + "RE_OP_STRING_SET_REV", + "RE_OP_BODY_END", + "RE_OP_BODY_START", + "RE_OP_END_FUZZY", + "RE_OP_END_GREEDY_REPEAT", + "RE_OP_END_GROUP", + "RE_OP_END_LAZY_REPEAT", + "RE_OP_GREEDY_REPEAT_ONE", + "RE_OP_GROUP_RETURN", + "RE_OP_LAZY_REPEAT_ONE", + "RE_OP_MATCH_BODY", + "RE_OP_MATCH_TAIL", + "RE_OP_START_GROUP", +}; + +#define RE_FLAG_ASCII 0x80 +#define RE_FLAG_BESTMATCH 0x1000 +#define RE_FLAG_DEBUG 0x200 +#define RE_FLAG_DOTALL 0x10 +#define RE_FLAG_ENHANCEMATCH 0x8000 +#define RE_FLAG_FULLCASE 0x4000 +#define RE_FLAG_IGNORECASE 0x2 +#define RE_FLAG_LOCALE 0x4 +#define RE_FLAG_MULTILINE 0x8 +#define RE_FLAG_REVERSE 0x400 +#define RE_FLAG_TEMPLATE 0x1 +#define RE_FLAG_UNICODE 0x20 +#define RE_FLAG_VERBOSE 0x40 +#define RE_FLAG_VERSION0 0x2000 +#define RE_FLAG_VERSION1 0x100 +#define RE_FLAG_WORD 0x800 diff --git a/src/regex/_regex_core.py b/src/regex/_regex_core.py new file mode 100644 index 000000000000..5366d70a7632 --- /dev/null +++ b/src/regex/_regex_core.py @@ -0,0 +1,4004 @@ +# +# Secret Labs' Regular Expression Engine core module +# +# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. +# +# This version of the SRE library can be redistributed under CNRI's +# Python 1.6 license. For any other use, please contact Secret Labs +# AB (info@pythonware.com). +# +# Portions of this engine have been developed in cooperation with +# CNRI. Hewlett-Packard provided funding for 1.6 integration and +# other compatibility work. +# +# 2010-01-16 mrab Python front-end re-written and extended + +import string +import sys +import unicodedata +from collections import defaultdict + +from calibre.constants import plugins +_regex = plugins['_regex'][0] +if _regex is None: + raise RuntimeError('Failed to load regex module with error: ' + plugins['_regex'][1]) + +__all__ = ["A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E", "ENHANCEMATCH", + "F", "FULLCASE", "I", "IGNORECASE", "L", "LOCALE", "M", "MULTILINE", "R", + "REVERSE", "S", "DOTALL", "T", "TEMPLATE", "U", "UNICODE", "V0", "VERSION0", + "V1", "VERSION1", "W", "WORD", "X", "VERBOSE", "error", + "Scanner"] + +# The regex exception. +class error(Exception): + def __init__(self, message, set_error=False): + Exception.__init__(self, message) + self.set_error = set_error + +# The exception for when a positional flag has been turned on in the old +# behaviour. +class _UnscopedFlagSet(Exception): + pass + +# The exception for when parsing fails and we want to try something else. +class ParseError(Exception): + pass + +# The exception for when there isn't a valid first set. +class _FirstSetError(Exception): + pass + +# Flags. +A = ASCII = 0x80 # Assume ASCII locale. +B = BESTMATCH = 0x1000 # Best fuzzy match. +D = DEBUG = 0x200 # Print parsed pattern. +E = ENHANCEMATCH = 0x8000 # Attempt to improve the fit after finding the first + # fuzzy match. +F = FULLCASE = 0x4000 # Unicode full case-folding. +I = IGNORECASE = 0x2 # Ignore case. +L = LOCALE = 0x4 # Assume current 8-bit locale. +M = MULTILINE = 0x8 # Make anchors look for newline. +R = REVERSE = 0x400 # Search backwards. +S = DOTALL = 0x10 # Make dot match newline. +U = UNICODE = 0x20 # Assume Unicode locale. +V0 = VERSION0 = 0x2000 # Old legacy behaviour. +V1 = VERSION1 = 0x100 # New enhanced behaviour. +W = WORD = 0x800 # Default Unicode word breaks. +X = VERBOSE = 0x40 # Ignore whitespace and comments. +T = TEMPLATE = 0x1 # Template (present because re module has it). + +DEFAULT_VERSION = VERSION1 + +_ALL_VERSIONS = VERSION0 | VERSION1 +_ALL_ENCODINGS = ASCII | LOCALE | UNICODE + +# The default flags for the various versions. +DEFAULT_FLAGS = {VERSION0: 0, VERSION1: FULLCASE} + +# The mask for the flags. +GLOBAL_FLAGS = (_ALL_ENCODINGS | _ALL_VERSIONS | BESTMATCH | DEBUG | + ENHANCEMATCH | REVERSE) +SCOPED_FLAGS = FULLCASE | IGNORECASE | MULTILINE | DOTALL | WORD | VERBOSE + +ALPHA = frozenset(string.ascii_letters) +DIGITS = frozenset(string.digits) +ALNUM = ALPHA | DIGITS +OCT_DIGITS = frozenset(string.octdigits) +HEX_DIGITS = frozenset(string.hexdigits) +SPECIAL_CHARS = frozenset("()|?*+{^$.[\\#") | frozenset([""]) +NAMED_CHAR_PART = ALNUM | frozenset(" -") +PROPERTY_NAME_PART = ALNUM | frozenset(" &_-.") +SET_OPS = ("||", "~~", "&&", "--") + +# The width of the code words inside the regex engine. +BYTES_PER_CODE = _regex.get_code_size() +BITS_PER_CODE = BYTES_PER_CODE * 8 + +# The repeat count which represents infinity. +UNLIMITED = (1 << BITS_PER_CODE) - 1 + +# The regular expression flags. +REGEX_FLAGS = {"a": ASCII, "b": BESTMATCH, "e": ENHANCEMATCH, "f": FULLCASE, + "i": IGNORECASE, "L": LOCALE, "m": MULTILINE, "r": REVERSE, "s": DOTALL, "u": + UNICODE, "V0": VERSION0, "V1": VERSION1, "w": WORD, "x": VERBOSE} + +# The case flags. +CASE_FLAGS = FULLCASE | IGNORECASE +NOCASE = 0 +FULLIGNORECASE = FULLCASE | IGNORECASE + +FULL_CASE_FOLDING = UNICODE | FULLIGNORECASE + +# The number of digits in hexadecimal escapes. +HEX_ESCAPES = {"x": 2, "u": 4, "U": 8} + +# A singleton which indicates a comment within a pattern. +COMMENT = object() + +# The names of the opcodes. +OPCODES = """ +FAILURE +SUCCESS +ANY +ANY_ALL +ANY_ALL_REV +ANY_REV +ANY_U +ANY_U_REV +ATOMIC +BOUNDARY +BRANCH +CALL_REF +CHARACTER +CHARACTER_IGN +CHARACTER_IGN_REV +CHARACTER_REV +DEFAULT_BOUNDARY +DEFAULT_END_OF_WORD +DEFAULT_START_OF_WORD +END +END_OF_LINE +END_OF_LINE_U +END_OF_STRING +END_OF_STRING_LINE +END_OF_STRING_LINE_U +END_OF_WORD +FUZZY +GRAPHEME_BOUNDARY +GREEDY_REPEAT +GROUP +GROUP_CALL +GROUP_EXISTS +LAZY_REPEAT +LOOKAROUND +NEXT +PROPERTY +PROPERTY_IGN +PROPERTY_IGN_REV +PROPERTY_REV +RANGE +RANGE_IGN +RANGE_IGN_REV +RANGE_REV +REF_GROUP +REF_GROUP_FLD +REF_GROUP_FLD_REV +REF_GROUP_IGN +REF_GROUP_IGN_REV +REF_GROUP_REV +SEARCH_ANCHOR +SET_DIFF +SET_DIFF_IGN +SET_DIFF_IGN_REV +SET_DIFF_REV +SET_INTER +SET_INTER_IGN +SET_INTER_IGN_REV +SET_INTER_REV +SET_SYM_DIFF +SET_SYM_DIFF_IGN +SET_SYM_DIFF_IGN_REV +SET_SYM_DIFF_REV +SET_UNION +SET_UNION_IGN +SET_UNION_IGN_REV +SET_UNION_REV +START_OF_LINE +START_OF_LINE_U +START_OF_STRING +START_OF_WORD +STRING +STRING_FLD +STRING_FLD_REV +STRING_IGN +STRING_IGN_REV +STRING_REV +STRING_SET +STRING_SET_FLD +STRING_SET_FLD_REV +STRING_SET_IGN +STRING_SET_IGN_REV +STRING_SET_REV +""" + +# Define the opcodes in a namespace. +class Namespace(object): + pass + +OP = Namespace() +for i, op in enumerate(OPCODES.split()): + setattr(OP, op, i) + +def _shrink_cache(cache_dict, args_dict, max_length, divisor=5): + """Make room in the given cache. + + Args: + cache_dict: The cache dictionary to modify. + args_dict: The dictionary of named list args used by patterns. + max_length: Maximum # of entries in cache_dict before it is shrunk. + divisor: Cache will shrink to max_length - 1/divisor*max_length items. + """ + # Toss out a fraction of the entries at random to make room for new ones. + # A random algorithm was chosen as opposed to simply cache_dict.popitem() + # as popitem could penalize the same regular expression repeatedly based + # on its internal hash value. Being random should spread the cache miss + # love around. + cache_keys = tuple(cache_dict.keys()) + overage = len(cache_keys) - max_length + if overage < 0: + # Cache is already within limits. Normally this should not happen + # but it could due to multithreading. + return + + number_to_toss = max_length // divisor + overage + + # The import is done here to avoid a circular dependency. + import random + if not hasattr(random, 'sample'): + # Do nothing while resolving the circular dependency: + # re->random->warnings->tokenize->string->re + return + + for doomed_key in random.sample(cache_keys, number_to_toss): + try: + del cache_dict[doomed_key] + except KeyError: + # Ignore problems if the cache changed from another thread. + pass + + # Rebuild the arguments dictionary. + args_dict.clear() + for pattern, pattern_type, flags, args, default_version in cache_dict: + args_dict[pattern, pattern_type, flags, default_version] = args + +def _fold_case(info, string): + "Folds the case of a string." + flags = info.flags + if (flags & _ALL_ENCODINGS) == 0: + flags |= info.guess_encoding + + return _regex.fold_case(flags, string) + +def is_cased(info, char): + "Checks whether a character is cased." + return len(_regex.get_all_cases(info.flags, char)) > 1 + +def _compile_firstset(info, fs): + "Compiles the firstset for the pattern." + if not fs or None in fs: + return [] + + # If we ignore the case, for simplicity we won't build a firstset. + members = set() + for i in fs: + if i.case_flags: + if isinstance(i, Character): + if is_cased(info, i.value): + return [] + elif isinstance(i, SetBase): + return [] + + members.add(i.with_flags(case_flags=NOCASE)) + + # Build the firstset. + fs = SetUnion(info, list(members), zerowidth=True) + fs = fs.optimise(info, in_set=True) + + # Compile the firstset. + return fs.compile(bool(info.flags & REVERSE)) + +def _flatten_code(code): + "Flattens the code from a list of tuples." + flat_code = [] + for c in code: + flat_code.extend(c) + + return flat_code + +def make_character(info, value, in_set=False): + "Makes a character literal." + if in_set: + # A character set is built case-sensitively. + return Character(value) + + return Character(value, case_flags=info.flags & CASE_FLAGS) + +def make_ref_group(info, name, position): + "Makes a group reference." + return RefGroup(info, name, position, case_flags=info.flags & CASE_FLAGS) + +def make_string_set(info, name): + "Makes a string set." + return StringSet(info, name, case_flags=info.flags & CASE_FLAGS) + +def make_property(info, prop, in_set): + "Makes a property." + if in_set: + return prop + + return prop.with_flags(case_flags=info.flags & CASE_FLAGS) + +def _parse_pattern(source, info): + "Parses a pattern, eg. 'a|b|c'." + branches = [parse_sequence(source, info)] + while source.match("|"): + branches.append(parse_sequence(source, info)) + + if len(branches) == 1: + return branches[0] + return Branch(branches) + +def parse_sequence(source, info): + "Parses a sequence, eg. 'abc'." + sequence = [] + item = parse_item(source, info) + while item: + sequence.append(item) + item = parse_item(source, info) + + return make_sequence(sequence) + +def PossessiveRepeat(element, min_count, max_count): + "Builds a possessive repeat." + return Atomic(GreedyRepeat(element, min_count, max_count)) + +def parse_item(source, info): + "Parses an item, which might be repeated. Returns None if there's no item." + element = parse_element(source, info) + counts = parse_quantifier(source, info) + if counts: + min_count, max_count = counts + saved_pos = source.pos + ch = source.get() + if ch == "?": + # The "?" suffix that means it's a lazy repeat. + repeated = LazyRepeat + elif ch == "+": + # The "+" suffix that means it's a possessive repeat. + repeated = PossessiveRepeat + else: + # No suffix means that it's a greedy repeat. + source.pos = saved_pos + repeated = GreedyRepeat + + if element.is_empty() or min_count == max_count == 1: + return element + + return repeated(element, min_count, max_count) + + # No quantifier, but maybe there's a fuzzy constraint. + constraints = parse_fuzzy(source) + if not constraints: + # No fuzzy constraint. + return element + + # If a group is marked as fuzzy then put all of the fuzzy part in the + # group. + if isinstance(element, Group): + element.subpattern = Fuzzy(element.subpattern, constraints) + return element + + return Fuzzy(element, constraints) + +_QUANTIFIERS = {"?": (0, 1), "*": (0, None), "+": (1, None)} + +def parse_quantifier(source, info): + "Parses a quantifier." + while True: + saved_pos = source.pos + ch = source.get() + q = _QUANTIFIERS.get(ch) + if q: + # It's a quantifier. + return q + if ch == "{": + # Looks like a limited repeated element, eg. 'a{2,3}'. + counts = parse_limited_quantifier(source) + if counts: + return counts + elif ch == "(" and source.match("?#"): + # A comment. + parse_comment(source) + continue + + # Neither a quantifier nor a comment. + break + + # Parse it later, perhaps as a literal. + source.pos = saved_pos + return None + +def is_above_limit(count): + "Checks whether a count is above the maximum." + return count is not None and count >= UNLIMITED + +def parse_limited_quantifier(source): + "Parses a limited quantifier." + saved_pos = source.pos + min_count = parse_count(source) + if source.match(","): + max_count = parse_count(source) + + # No minimum means 0 and no maximum means unlimited. + min_count = int(min_count or 0) + max_count = int(max_count) if max_count else None + + if max_count is not None and min_count > max_count: + raise error("min repeat greater than max repeat at position %d" % saved_pos) + else: + if not min_count: + source.pos = saved_pos + return None + + min_count = max_count = int(min_count) + + if is_above_limit(min_count) or is_above_limit(max_count): + raise error("repeat count too big at position %d" % saved_pos) + + if not source.match ("}"): + source.pos = saved_pos + return None + + return min_count, max_count + +def parse_fuzzy(source): + "Parses a fuzzy setting, if present." + saved_pos = source.pos + if not source.match("{"): + source.pos = saved_pos + return None + + constraints = {} + try: + parse_fuzzy_item(source, constraints) + while source.match(","): + parse_fuzzy_item(source, constraints) + except ParseError: + source.pos = saved_pos + + return None + + if not source.match("}"): + raise error("expected } at position %d" % source.pos) + + return constraints + +def parse_fuzzy_item(source, constraints): + "Parses a fuzzy setting item." + saved_pos = source.pos + try: + parse_cost_constraint(source, constraints) + except ParseError: + source.pos = saved_pos + + parse_cost_equation(source, constraints) + +def parse_cost_constraint(source, constraints): + "Parses a cost constraint." + saved_pos = source.pos + ch = source.get() + if ch in ALPHA: + # Syntax: constraint [("<=" | "<") cost] + constraint = parse_constraint(source, constraints, ch) + + max_inc = parse_fuzzy_compare(source) + + if max_inc is None: + # No maximum cost. + constraints[constraint] = 0, None + else: + # There's a maximum cost. + cost_pos = source.pos + max_cost = int(parse_count(source)) + + # Inclusive or exclusive limit? + if not max_inc: + max_cost -= 1 + + if max_cost < 0: + raise error("bad fuzzy cost limit at position %d" % cost_pos) + + constraints[constraint] = 0, max_cost + elif ch in DIGITS: + # Syntax: cost ("<=" | "<") constraint ("<=" | "<") cost + source.pos = saved_pos + try: + # Minimum cost. + min_cost = int(parse_count(source)) + + min_inc = parse_fuzzy_compare(source) + if min_inc is None: + raise ParseError() + + constraint = parse_constraint(source, constraints, source.get()) + + max_inc = parse_fuzzy_compare(source) + if max_inc is None: + raise ParseError() + + # Maximum cost. + cost_pos = source.pos + max_cost = int(parse_count(source)) + + # Inclusive or exclusive limits? + if not min_inc: + min_cost += 1 + if not max_inc: + max_cost -= 1 + + if not 0 <= min_cost <= max_cost: + raise error("bad fuzzy cost limit at position %d" % cost_pos) + + constraints[constraint] = min_cost, max_cost + except ValueError: + raise ParseError() + else: + raise ParseError() + +def parse_constraint(source, constraints, ch): + "Parses a constraint." + if ch not in "deis": + raise error("bad fuzzy constraint at position %d" % source.pos) + + if ch in constraints: + raise error("repeated fuzzy constraint at position %d" % source.pos) + + return ch + +def parse_fuzzy_compare(source): + "Parses a cost comparator." + if source.match("<="): + return True + elif source.match("<"): + return False + else: + return None + +def parse_cost_equation(source, constraints): + "Parses a cost equation." + if "cost" in constraints: + raise error("more than one cost equation at position %d" % source.pos) + + cost = {} + + parse_cost_term(source, cost) + while source.match("+"): + parse_cost_term(source, cost) + + max_inc = parse_fuzzy_compare(source) + if max_inc is None: + raise error("missing fuzzy cost limit at position %d" % source.pos) + + max_cost = int(parse_count(source)) + + if not max_inc: + max_cost -= 1 + + if max_cost < 0: + raise error("bad fuzzy cost limit at position %d" % source.pos) + + cost["max"] = max_cost + + constraints["cost"] = cost + +def parse_cost_term(source, cost): + "Parses a cost equation term." + coeff = parse_count(source) + ch = source.get() + if ch not in "dis": + raise ParseError() + + if ch in cost: + raise error("repeated fuzzy cost at position %d" % source.pos) + + cost[ch] = int(coeff or 1) + +def parse_count(source): + "Parses a quantifier's count, which can be empty." + return source.get_while(DIGITS) + +def parse_element(source, info): + """Parses an element. An element might actually be a flag, eg. '(?i)', in + which case it returns None. + """ + while True: + saved_pos = source.pos + ch = source.get() + if ch in SPECIAL_CHARS: + if ch in ")|": + # The end of a sequence. At the end of the pattern ch is "". + source.pos = saved_pos + return None + elif ch == "\\": + # An escape sequence outside a set. + return parse_escape(source, info, False) + elif ch == "(": + # A parenthesised subpattern or a flag. + element = parse_paren(source, info) + if element and element is not COMMENT: + return element + elif ch == ".": + # Any character. + if info.flags & DOTALL: + return AnyAll() + elif info.flags & WORD: + return AnyU() + else: + return Any() + elif ch == "[": + # A character set. + return parse_set(source, info) + elif ch == "^": + # The start of a line or the string. + if info.flags & MULTILINE: + if info.flags & WORD: + return StartOfLineU() + else: + return StartOfLine() + else: + return StartOfString() + elif ch == "$": + # The end of a line or the string. + if info.flags & MULTILINE: + if info.flags & WORD: + return EndOfLineU() + else: + return EndOfLine() + else: + if info.flags & WORD: + return EndOfStringLineU() + else: + return EndOfStringLine() + elif ch == "{": + # Looks like a limited quantifier. + saved_pos_2 = source.pos + source.pos = saved_pos + counts = parse_quantifier(source, info) + if counts: + # A quantifier where we expected an element. + raise error("nothing to repeat at position %d" % saved_pos_2) + + # Not a quantifier, so it's a literal. + source.pos = saved_pos_2 + return make_character(info, ord(ch)) + elif ch in "?*+": + # A quantifier where we expected an element. + raise error("nothing to repeat at position %d" % saved_pos) + else: + # A literal. + return make_character(info, ord(ch)) + else: + # A literal. + return make_character(info, ord(ch)) + +def parse_paren(source, info): + "Parses a parenthesised subpattern or a flag." + saved_pos = source.pos + ch = source.get() + if ch == "?": + # (?... + saved_pos_2 = source.pos + ch = source.get() + if ch == "<": + # (?<... + saved_pos_3 = source.pos + ch = source.get() + if ch in ("=", "!"): + # (?<=... or (?") + saved_flags = info.flags + try: + subpattern = _parse_pattern(source, info) + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + info.close_group() + return Group(info, group, subpattern) + if ch in ("=", "!"): + # (?=... or (?!...: lookahead. + return parse_lookaround(source, info, False, ch == "=") + if ch == "P": + # (?P...: a Python extension. + return parse_extension(source, info) + if ch == "#": + # (?#...: a comment. + return parse_comment(source) + if ch == "(": + # (?(...: a conditional subpattern. + return parse_conditional(source, info) + if ch == ">": + # (?>...: an atomic subpattern. + return parse_atomic(source, info) + if ch == "|": + # (?|...: a common/reset groups branch. + return parse_common(source, info) + if ch == "R" or "0" <= ch <= "9": + # (?R...: probably a call to a group. + return parse_call_group(source, info, ch, saved_pos_2) + if ch == "&": + # (?&...: a call to a named group. + return parse_call_named_group(source, info, saved_pos_2) + + # (?...: probably a flags subpattern. + source.pos = saved_pos_2 + return parse_flags_subpattern(source, info) + + # (...: an unnamed capture group. + source.pos = saved_pos + group = info.open_group() + saved_flags = info.flags + try: + subpattern = _parse_pattern(source, info) + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + info.close_group() + + return Group(info, group, subpattern) + +def parse_extension(source, info): + "Parses a Python extension." + saved_pos = source.pos + ch = source.get() + if ch == "<": + # (?P<...: a named capture group. + name = parse_name(source) + group = info.open_group(name) + source.expect(">") + saved_flags = info.flags + try: + subpattern = _parse_pattern(source, info) + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + info.close_group() + + return Group(info, group, subpattern) + if ch == "=": + # (?P=...: a named group reference. + name = parse_name(source) + source.expect(")") + if info.is_open_group(name): + raise error("can't refer to an open group at position %d" % saved_pos) + + return make_ref_group(info, name, saved_pos) + if ch == ">" or ch == "&": + # (?P>...: a call to a group. + return parse_call_named_group(source, info, saved_pos) + + source.pos = saved_pos + raise error("unknown extension at position %d" % saved_pos) + +def parse_comment(source): + "Parses a comment." + source.skip_while(set(")"), include=False) + source.expect(")") + + return COMMENT + +def parse_lookaround(source, info, behind, positive): + "Parses a lookaround." + saved_flags = info.flags + try: + subpattern = _parse_pattern(source, info) + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + return LookAround(behind, positive, subpattern) + +def parse_conditional(source, info): + "Parses a conditional subpattern." + saved_flags = info.flags + saved_pos = source.pos + try: + group = parse_name(source, True) + source.expect(")") + yes_branch = parse_sequence(source, info) + if source.match("|"): + no_branch = parse_sequence(source, info) + else: + no_branch = Sequence() + + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + if yes_branch.is_empty() and no_branch.is_empty(): + return Sequence() + + return Conditional(info, group, yes_branch, no_branch, saved_pos) + +def parse_atomic(source, info): + "Parses an atomic subpattern." + saved_flags = info.flags + try: + subpattern = _parse_pattern(source, info) + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + return Atomic(subpattern) + +def parse_common(source, info): + "Parses a common groups branch." + # Capture group numbers in different branches can reuse the group numbers. + initial_group_count = info.group_count + branches = [parse_sequence(source, info)] + final_group_count = info.group_count + while source.match("|"): + info.group_count = initial_group_count + branches.append(parse_sequence(source, info)) + final_group_count = max(final_group_count, info.group_count) + + info.group_count = final_group_count + source.expect(")") + + if len(branches) == 1: + return branches[0] + return Branch(branches) + +def parse_call_group(source, info, ch, pos): + "Parses a call to a group." + if ch == "R": + group = "0" + else: + group = ch + source.get_while(DIGITS) + + source.expect(")") + + return CallGroup(info, group, pos) + +def parse_call_named_group(source, info, pos): + "Parses a call to a named group." + group = parse_name(source) + source.expect(")") + + return CallGroup(info, group, pos) + +def parse_flag_set(source): + "Parses a set of inline flags." + flags = 0 + + try: + while True: + saved_pos = source.pos + ch = source.get() + if ch == "V": + ch += source.get() + flags |= REGEX_FLAGS[ch] + except KeyError: + source.pos = saved_pos + + return flags + +def parse_flags(source, info): + "Parses flags being turned on/off." + flags_on = parse_flag_set(source) + if source.match("-"): + flags_off = parse_flag_set(source) + if not flags_off: + raise error("bad inline flags: no flags after '-' at position %d" % source.pos) + else: + flags_off = 0 + + return flags_on, flags_off + +def parse_subpattern(source, info, flags_on, flags_off): + "Parses a subpattern with scoped flags." + saved_flags = info.flags + info.flags = (info.flags | flags_on) & ~flags_off + source.ignore_space = bool(info.flags & VERBOSE) + try: + subpattern = _parse_pattern(source, info) + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + return subpattern + +def parse_positional_flags(source, info, flags_on, flags_off): + "Parses positional flags." + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + if version == VERSION0: + # Positional flags are global and can only be turned on. + if flags_off: + raise error("bad inline flags: can't turn flags off at position %d" % source.pos) + + new_global_flags = flags_on & ~info.global_flags + if new_global_flags: + info.global_flags |= new_global_flags + + # A global has been turned on, so reparse the pattern. + raise _UnscopedFlagSet(info.global_flags) + else: + info.flags = (info.flags | flags_on) & ~flags_off + + source.ignore_space = bool(info.flags & VERBOSE) + + return None + +def parse_flags_subpattern(source, info): + """Parses a flags subpattern. It could be inline flags or a subpattern + possibly with local flags. + """ + flags_on, flags_off = parse_flags(source, info) + + if flags_off & GLOBAL_FLAGS: + raise error("bad inline flags: can't turn off global flag at position %d" % source.pos) + + if flags_on & flags_off: + raise error("bad inline flags: flag turned on and off at position %d" % source.pos) + + # Handle flags which are global in all regex behaviours. + new_global_flags = (flags_on & ~info.global_flags) & GLOBAL_FLAGS + if new_global_flags: + info.global_flags |= new_global_flags + + # A global has been turned on, so reparse the pattern. + raise _UnscopedFlagSet(info.global_flags) + + # Ensure that from now on we have only scoped flags. + flags_on &= ~GLOBAL_FLAGS + + if source.match(":"): + return parse_subpattern(source, info, flags_on, flags_off) + + if source.match(")"): + return parse_positional_flags(source, info, flags_on, flags_off) + + raise error("unknown extension at position %d" % source.pos) + +def parse_name(source, allow_numeric=False): + "Parses a name." + name = source.get_while(set(")>"), include=False) + + if not name: + raise error("bad group name at position %d" % source.pos) + + if name.isdigit(): + if not allow_numeric: + raise error("bad group name at position %d" % source.pos) + else: + if not is_identifier(name): + raise error("bad group name at position %d" % source.pos) + + return name + +def is_identifier(name): + if not name: + return False + + if name[0] not in ALPHA and name[0] != "_": + return False + + name = name.replace("_", "") + + return not name or all(c in ALNUM for c in name) + +def is_octal(string): + "Checks whether a string is octal." + return all(ch in OCT_DIGITS for ch in string) + +def is_decimal(string): + "Checks whether a string is decimal." + return all(ch in DIGITS for ch in string) + +def is_hexadecimal(string): + "Checks whether a string is hexadecimal." + return all(ch in HEX_DIGITS for ch in string) + +def parse_escape(source, info, in_set): + "Parses an escape sequence." + saved_ignore = source.ignore_space + source.ignore_space = False + ch = source.get() + source.ignore_space = saved_ignore + if not ch: + # A backslash at the end of the pattern. + raise error("bad escape at position %d" % source.pos) + if ch in HEX_ESCAPES: + # A hexadecimal escape sequence. + return parse_hex_escape(source, info, HEX_ESCAPES[ch], in_set) + elif ch == "g" and not in_set: + # A group reference. + saved_pos = source.pos + try: + return parse_group_ref(source, info) + except error: + # Invalid as a group reference, so assume it's a literal. + source.pos = saved_pos + + return make_character(info, ord(ch), in_set) + elif ch == "G" and not in_set: + # A search anchor. + return SearchAnchor() + elif ch == "L" and not in_set: + # A string set. + return parse_string_set(source, info) + elif ch == "N": + # A named codepoint. + return parse_named_char(source, info, in_set) + elif ch in "pP": + # A Unicode property, positive or negative. + return parse_property(source, info, ch == "p", in_set) + elif ch == "X" and not in_set: + # A grapheme cluster. + return Grapheme() + elif ch in ALPHA: + # An alphabetic escape sequence. + # Positional escapes aren't allowed inside a character set. + if not in_set: + if info.flags & WORD: + value = WORD_POSITION_ESCAPES.get(ch) + else: + value = POSITION_ESCAPES.get(ch) + + if value: + return value + + value = CHARSET_ESCAPES.get(ch) + if value: + return value + + value = CHARACTER_ESCAPES.get(ch) + if value: + return Character(ord(value)) + + return make_character(info, ord(ch), in_set) + elif ch in DIGITS: + # A numeric escape sequence. + return parse_numeric_escape(source, info, ch, in_set) + else: + # A literal. + return make_character(info, ord(ch), in_set) + +def parse_numeric_escape(source, info, ch, in_set): + "Parses a numeric escape sequence." + if in_set or ch == "0": + # Octal escape sequence, max 3 digits. + return parse_octal_escape(source, info, [ch], in_set) + + # At least 1 digit, so either octal escape or group. + digits = ch + saved_pos = source.pos + ch = source.get() + if ch in DIGITS: + # At least 2 digits, so either octal escape or group. + digits += ch + saved_pos = source.pos + ch = source.get() + if is_octal(digits) and ch in OCT_DIGITS: + # 3 octal digits, so octal escape sequence. + encoding = info.flags & _ALL_ENCODINGS + if encoding == ASCII or encoding == LOCALE: + octal_mask = 0xFF + else: + octal_mask = 0x1FF + + value = int(digits + ch, 8) & octal_mask + return make_character(info, value) + + # Group reference. + source.pos = saved_pos + if info.is_open_group(digits): + raise error("can't refer to an open group at position %d" % source.pos) + + return make_ref_group(info, digits, source.pos) + +def parse_octal_escape(source, info, digits, in_set): + "Parses an octal escape sequence." + saved_pos = source.pos + ch = source.get() + while len(digits) < 3 and ch in OCT_DIGITS: + digits.append(ch) + saved_pos = source.pos + ch = source.get() + + source.pos = saved_pos + try: + value = int("".join(digits), 8) + return make_character(info, value, in_set) + except ValueError: + raise error("bad octal escape at position %d" % source.pos) + +def parse_hex_escape(source, info, expected_len, in_set): + "Parses a hex escape sequence." + digits = [] + for i in range(expected_len): + ch = source.get() + if ch not in HEX_DIGITS: + raise error("bad hex escape at position %d" % source.pos) + digits.append(ch) + + value = int("".join(digits), 16) + return make_character(info, value, in_set) + +def parse_group_ref(source, info): + "Parses a group reference." + source.expect("<") + saved_pos = source.pos + name = parse_name(source, True) + source.expect(">") + if info.is_open_group(name): + raise error("can't refer to an open group at position %d" % source.pos) + + return make_ref_group(info, name, saved_pos) + +def parse_string_set(source, info): + "Parses a string set reference." + source.expect("<") + name = parse_name(source, True) + source.expect(">") + if name is None or name not in info.kwargs: + raise error("undefined named list at position %d" % source.pos) + + return make_string_set(info, name) + +def parse_named_char(source, info, in_set): + "Parses a named character." + saved_pos = source.pos + if source.match("{"): + name = source.get_while(NAMED_CHAR_PART) + if source.match("}"): + try: + value = unicodedata.lookup(name) + return make_character(info, ord(value), in_set) + except KeyError: + raise error("undefined character name at position %d" % source.pos) + + source.pos = saved_pos + return make_character(info, ord("N"), in_set) + +def parse_property(source, info, positive, in_set): + "Parses a Unicode property." + saved_pos = source.pos + ch = source.get() + if ch == "{": + negate = source.match("^") + prop_name, name = parse_property_name(source) + if source.match("}"): + # It's correctly delimited. + prop = lookup_property(prop_name, name, positive != negate) + return make_property(info, prop, in_set) + elif ch and ch in "CLMNPSZ": + # An abbreviated property, eg \pL. + prop = lookup_property(None, ch, positive) + return make_property(info, prop, in_set) + + # Not a property, so treat as a literal "p" or "P". + source.pos = saved_pos + ch = "p" if positive else "P" + return make_character(info, ord(ch), in_set) + +def parse_property_name(source): + "Parses a property name, which may be qualified." + name = source.get_while(PROPERTY_NAME_PART) + saved_pos = source.pos + + ch = source.get() + if ch and ch in ":=": + prop_name = name + name = source.get_while(ALNUM | set(" &_-./")).strip() + + if name: + # Name after the ":" or "=", so it's a qualified name. + saved_pos = source.pos + else: + # No name after the ":" or "=", so assume it's an unqualified name. + prop_name, name = None, prop_name + else: + prop_name = None + + source.pos = saved_pos + return prop_name, name + +def parse_set(source, info): + "Parses a character set." + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + + saved_ignore = source.ignore_space + source.ignore_space = False + # Negative set? + negate = source.match("^") + try: + if version == VERSION0: + item = parse_set_imp_union(source, info) + else: + item = parse_set_union(source, info) + + if not source.match("]"): + raise error("missing ] at position %d" % source.pos) + finally: + source.ignore_space = saved_ignore + + if negate: + item = item.with_flags(positive=not item.positive) + + item = item.with_flags(case_flags=info.flags & CASE_FLAGS) + + return item + +def parse_set_union(source, info): + "Parses a set union ([x||y])." + items = [parse_set_symm_diff(source, info)] + while source.match("||"): + items.append(parse_set_symm_diff(source, info)) + + if len(items) == 1: + return items[0] + return SetUnion(info, items) + +def parse_set_symm_diff(source, info): + "Parses a set symmetric difference ([x~~y])." + items = [parse_set_inter(source, info)] + while source.match("~~"): + items.append(parse_set_inter(source, info)) + + if len(items) == 1: + return items[0] + return SetSymDiff(info, items) + +def parse_set_inter(source, info): + "Parses a set intersection ([x&&y])." + items = [parse_set_diff(source, info)] + while source.match("&&"): + items.append(parse_set_diff(source, info)) + + if len(items) == 1: + return items[0] + return SetInter(info, items) + +def parse_set_diff(source, info): + "Parses a set difference ([x--y])." + items = [parse_set_imp_union(source, info)] + while source.match("--"): + items.append(parse_set_imp_union(source, info)) + + if len(items) == 1: + return items[0] + return SetDiff(info, items) + +def parse_set_imp_union(source, info): + "Parses a set implicit union ([xy])." + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + + items = [parse_set_member(source, info)] + while True: + saved_pos = source.pos + if source.match("]"): + # End of the set. + source.pos = saved_pos + break + + if version == VERSION1 and any(source.match(op) for op in SET_OPS): + # The new behaviour has set operators. + source.pos = saved_pos + break + + items.append(parse_set_member(source, info)) + + if len(items) == 1: + return items[0] + return SetUnion(info, items) + +def parse_set_member(source, info): + "Parses a member in a character set." + # Parse a set item. + start = parse_set_item(source, info) + if (not isinstance(start, Character) or not start.positive or not + source.match("-")): + # It's not the start of a range. + return start + + # It looks like the start of a range of characters. + saved_pos = source.pos + if source.match("]"): + # We've reached the end of the set, so return both the character and + # hyphen. + source.pos = saved_pos + return SetUnion(info, [start, Character(ord("-"))]) + + # Parse a set item. + end = parse_set_item(source, info) + if not isinstance(end, Character) or not end.positive: + # It's not a range, so return the character, hyphen and property. + return SetUnion(info, [start, Character(ord("-")), end]) + + # It _is_ a range. + if start.value > end.value: + raise error("bad character range at position %d" % source.pos) + + if start.value == end.value: + return start + + return Range(start.value, end.value) + +def parse_set_item(source, info): + "Parses an item in a character set." + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + + if source.match("\\"): + # An escape sequence in a set. + return parse_escape(source, info, True) + + saved_pos = source.pos + if source.match("[:"): + # Looks like a POSIX character class. + try: + return parse_posix_class(source, info) + except ParseError: + # Not a POSIX character class. + source.pos = saved_pos + + if version == VERSION1 and source.match("["): + # It's the start of a nested set. + + # Negative set? + negate = source.match("^") + item = parse_set_union(source, info) + + if not source.match("]"): + raise error("missing ] at position %d" % source.pos) + + if negate: + item = item.with_flags(positive=not item.positive) + + return item + + ch = source.get() + if not ch: + raise error("bad set at position %d" % source.pos, True) + + return Character(ord(ch)) + +def parse_posix_class(source, info): + "Parses a POSIX character class." + negate = source.match("^") + prop_name, name = parse_property_name(source) + if not source.match(":]"): + raise ParseError() + + return lookup_property(prop_name, name, positive=not negate) + +def float_to_rational(flt): + "Converts a float to a rational pair." + int_part = int(flt) + error = flt - int_part + if abs(error) < 0.0001: + return int_part, 1 + + den, num = float_to_rational(1.0 / error) + + return int_part * den + num, den + +def numeric_to_rational(numeric): + "Converts a numeric string to a rational string, if possible." + if numeric[0] == "-": + sign, numeric = numeric[0], numeric[1 : ] + else: + sign = "" + + parts = numeric.split("/") + if len(parts) == 2: + num, den = float_to_rational(float(parts[0]) / float(parts[1])) + elif len(parts) == 1: + num, den = float_to_rational(float(parts[0])) + else: + raise ValueError() + + result = "%s%s/%s" % (sign, num, den) + if result.endswith("/1"): + return result[ : -2] + + return result + +def standardise_name(name): + "Standardises a property or value name." + try: + return numeric_to_rational("".join(name)) + except (ValueError, ZeroDivisionError): + return "".join(ch for ch in name if ch not in "_- ").upper() + +def lookup_property(property, value, positive): + "Looks up a property." + # Normalise the names (which may still be lists). + property = standardise_name(property) if property else None + value = standardise_name(value) + if property: + # Both the property and the value are provided. + prop = PROPERTIES.get(property) + if not prop: + raise error("unknown property at position %d" % source.pos) + + prop_id, value_dict = prop + val_id = value_dict.get(value) + if val_id is None: + raise error("unknown property value at position %d" % source.pos) + + if "YES" in value_dict and val_id == 0: + positive, val_id = not positive, 1 + + return Property((prop_id << 16) | val_id, positive) + + # Only the value is provided. + # It might be the name of a GC, script or block value. + for property in ("GC", "SCRIPT", "BLOCK"): + prop_id, value_dict = PROPERTIES.get(property) + val_id = value_dict.get(value) + if val_id is not None: + return Property((prop_id << 16) | val_id, positive) + + # It might be the name of a binary property. + prop = PROPERTIES.get(value) + if prop: + prop_id, value_dict = prop + + if "YES" in value_dict: + return Property((prop_id << 16) | 1, positive) + + # It might be the name of a binary property starting with a prefix. + if value.startswith("IS"): + prop = PROPERTIES.get(value[2 : ]) + if prop: + prop_id, value_dict = prop + if "YES" in value_dict: + return Property((prop_id << 16) | 1, positive) + + # It might be the name of a script or block starting with a prefix. + for prefix, property in (("IS", "SCRIPT"), ("IN", "BLOCK")): + if value.startswith(prefix): + prop_id, value_dict = PROPERTIES.get(property) + val_id = value_dict.get(value[2 : ]) + if val_id is not None: + return Property((prop_id << 16) | val_id, positive) + + # Unknown property. + raise error("unknown property at position %d" % source.pos) + +def _compile_replacement(source, pattern, is_unicode): + "Compiles a replacement template escape sequence." + ch = source.get() + if ch in ALPHA: + # An alphabetic escape sequence. + value = CHARACTER_ESCAPES.get(ch) + if value: + return False, [ord(value)] + + if ch in HEX_ESCAPES and (ch == "x" or is_unicode): + # A hexadecimal escape sequence. + return False, [parse_repl_hex_escape(source, HEX_ESCAPES[ch])] + + if ch == "g": + # A group preference. + return True, [compile_repl_group(source, pattern)] + + if ch == "N" and is_unicode: + # A named character. + value = parse_repl_named_char(source) + if value is not None: + return False, [value] + + return False, [ord("\\"), ord(ch)] + + if isinstance(source.sep, str): + octal_mask = 0xFF + else: + octal_mask = 0x1FF + + if ch == "0": + # An octal escape sequence. + digits = ch + while len(digits) < 3: + saved_pos = source.pos + ch = source.get() + if ch not in OCT_DIGITS: + source.pos = saved_pos + break + digits += ch + + return False, [int(digits, 8) & octal_mask] + + if ch in DIGITS: + # Either an octal escape sequence (3 digits) or a group reference (max + # 2 digits). + digits = ch + saved_pos = source.pos + ch = source.get() + if ch in DIGITS: + digits += ch + saved_pos = source.pos + ch = source.get() + if ch and is_octal(digits + ch): + # An octal escape sequence. + return False, [int(digits + ch, 8) & octal_mask] + + # A group reference. + source.pos = saved_pos + return True, [int(digits)] + + if ch == "\\": + # An escaped backslash is a backslash. + return False, [ord("\\")] + + if not ch: + # A trailing backslash. + raise error("bad escape at position %d" % source.pos) + + # An escaped non-backslash is a backslash followed by the literal. + return False, [ord("\\"), ord(ch)] + +def parse_repl_hex_escape(source, expected_len): + "Parses a hex escape sequence in a replacement string." + digits = [] + for i in range(expected_len): + ch = source.get() + if ch not in HEX_DIGITS: + raise error("bad hex escape at position %d" % source.pos) + digits.append(ch) + + return int("".join(digits), 16) + +def parse_repl_named_char(source): + "Parses a named character in a replacement string." + saved_pos = source.pos + if source.match("{"): + name = source.get_while(ALPHA | set(" ")) + + if source.match("}"): + try: + value = unicodedata.lookup(name) + return ord(value) + except KeyError: + raise error("undefined character name at position %d" % source.pos) + + source.pos = saved_pos + return None + +def compile_repl_group(source, pattern): + "Compiles a replacement template group reference." + source.expect("<") + name = parse_name(source, True) + + source.expect(">") + if name.isdigit(): + index = int(name) + if not 0 <= index <= pattern.groups: + raise error("invalid group at position %d" % source.pos) + + return index + + try: + return pattern.groupindex[name] + except KeyError: + raise IndexError("unknown group") + +# The regular expression is parsed into a syntax tree. The different types of +# node are defined below. + +INDENT = " " +POSITIVE_OP = 0x1 +ZEROWIDTH_OP = 0x2 +FUZZY_OP = 0x4 +REVERSE_OP = 0x8 +REQUIRED_OP = 0x10 + +POS_TEXT = {False: "NON-MATCH", True: "MATCH"} +CASE_TEXT = {NOCASE: "", IGNORECASE: " SIMPLE_IGNORE_CASE", FULLCASE: "", + FULLIGNORECASE: " FULL_IGNORE_CASE"} + +def make_sequence(items): + if len(items) == 1: + return items[0] + return Sequence(items) + +# Common base class for all nodes. +class RegexBase(object): + def __init__(self): + self._key = self.__class__ + + def with_flags(self, positive=None, case_flags=None, zerowidth=None): + if positive is None: + positive = self.positive + else: + positive = bool(positive) + if case_flags is None: + case_flags = self.case_flags + else: + case_flags = case_flags & CASE_FLAGS + if zerowidth is None: + zerowidth = self.zerowidth + else: + zerowidth = bool(zerowidth) + + if (positive == self.positive and case_flags == self.case_flags and + zerowidth == self.zerowidth): + return self + + return self.rebuild(positive, case_flags, zerowidth) + + def fix_groups(self, reverse, fuzzy): + pass + + def optimise(self, info): + return self + + def pack_characters(self, info): + return self + + def remove_captures(self): + return self + + def is_atomic(self): + return True + + def can_be_affix(self): + return True + + def contains_group(self): + return False + + def get_firstset(self, reverse): + raise _FirstSetError() + + def has_simple_start(self): + return False + + def is_empty(self): + return False + + def __hash__(self): + return hash(self._key) + + def __eq__(self, other): + return type(self) is type(other) and self._key == other._key + + def __ne__(self, other): + return not self.__eq__(other) + + def get_required_string(self, reverse): + return self.max_width(), None + +# Base class for zero-width nodes. +class ZeroWidthBase(RegexBase): + def __init__(self, positive=True): + RegexBase.__init__(self) + self.positive = bool(positive) + + self._key = self.__class__, self.positive + + def get_firstset(self, reverse): + return set([None]) + + def compile(self, reverse=False, fuzzy=False): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if fuzzy: + flags |= FUZZY_OP + if reverse: + flags |= REVERSE_OP + return [(self._opcode, flags)] + + def dump(self, indent=0, reverse=False): + print "%s%s %s" % (INDENT * indent, self._op_name, + POS_TEXT[self.positive]) + + def max_width(self): + return 0 + +class Any(RegexBase): + _opcode = {False: OP.ANY, True: OP.ANY_REV} + _op_name = "ANY" + + def has_simple_start(self): + return True + + def compile(self, reverse=False, fuzzy=False): + flags = 0 + if fuzzy: + flags |= FUZZY_OP + return [(self._opcode[reverse], flags)] + + def dump(self, indent=0, reverse=False): + print "%s%s" % (INDENT * indent, self._op_name) + + def max_width(self): + return 1 + +class AnyAll(Any): + _opcode = {False: OP.ANY_ALL, True: OP.ANY_ALL_REV} + _op_name = "ANY_ALL" + +class AnyU(Any): + _opcode = {False: OP.ANY_U, True: OP.ANY_U_REV} + _op_name = "ANY_U" + +class Atomic(RegexBase): + def __init__(self, subpattern): + RegexBase.__init__(self) + self.subpattern = subpattern + + def fix_groups(self, reverse, fuzzy): + self.subpattern.fix_groups(reverse, fuzzy) + + def optimise(self, info): + self.subpattern = self.subpattern.optimise(info) + + if self.subpattern.is_empty(): + return self.subpattern + return self + + def pack_characters(self, info): + self.subpattern = self.subpattern.pack_characters(info) + return self + + def remove_captures(self): + self.subpattern = self.subpattern.remove_captures() + return self + + def can_be_affix(self): + return self.subpattern.can_be_affix() + + def contains_group(self): + return self.subpattern.contains_group() + + def get_firstset(self, reverse): + return self.subpattern.get_firstset(reverse) + + def has_simple_start(self): + return self.subpattern.has_simple_start() + + def compile(self, reverse=False, fuzzy=False): + return ([(OP.ATOMIC, )] + self.subpattern.compile(reverse, fuzzy) + + [(OP.END, )]) + + def dump(self, indent=0, reverse=False): + print "%sATOMIC" % (INDENT * indent) + self.subpattern.dump(indent + 1, reverse) + + def is_empty(self): + return self.subpattern.is_empty() + + def __eq__(self, other): + return (type(self) is type(other) and self.subpattern == + other.subpattern) + + def max_width(self): + return self.subpattern.max_width() + + def get_required_string(self, reverse): + return self.subpattern.get_required_string(reverse) + +class Boundary(ZeroWidthBase): + _opcode = OP.BOUNDARY + _op_name = "BOUNDARY" + +class Branch(RegexBase): + def __init__(self, branches): + RegexBase.__init__(self) + self.branches = branches + + def fix_groups(self, reverse, fuzzy): + for b in self.branches: + b.fix_groups(reverse, fuzzy) + + def optimise(self, info): + # Flatten branches within branches. + branches = Branch._flatten_branches(info, self.branches) + + # Move any common prefix or suffix out of the branches. + prefix, branches = Branch._split_common_prefix(info, branches) + suffix, branches = Branch._split_common_suffix(info, branches) + + # Merge branches starting with the same character. (If a character + # prefix doesn't match in one branch, it won't match in any of the + # others starting with that same character.) + branches = Branch._merge_common_prefixes(info, branches) + + # Try to reduce adjacent single-character branches to sets. + branches = Branch._reduce_to_set(info, branches) + + if len(branches) > 1: + sequence = prefix + [Branch(branches)] + suffix + else: + sequence = prefix + branches + suffix + + return make_sequence(sequence) + + def pack_characters(self, info): + self.branches = [b.pack_characters(info) for b in self.branches] + return self + + def remove_captures(self): + self.branches = [b.remove_captures() for b in self.branches] + return self + + def is_atomic(self): + return all(b.is_atomic() for b in self.branches) + + def can_be_affix(self): + return all(b.can_be_affix() for b in self.branches) + + def contains_group(self): + return any(b.contains_group() for b in self.branches) + + def get_firstset(self, reverse): + fs = set() + for b in self.branches: + fs |= b.get_firstset(reverse) + + return fs or set([None]) + + def compile(self, reverse=False, fuzzy=False): + code = [(OP.BRANCH, )] + for b in self.branches: + code.extend(b.compile(reverse, fuzzy)) + code.append((OP.NEXT, )) + + code[-1] = (OP.END, ) + + return code + + def dump(self, indent=0, reverse=False): + print "%sBRANCH" % (INDENT * indent) + self.branches[0].dump(indent + 1, reverse) + for b in self.branches[1 : ]: + print "%sOR" % (INDENT * indent) + b.dump(indent + 1, reverse) + + @staticmethod + def _flatten_branches(info, branches): + # Flatten the branches so that there aren't branches of branches. + new_branches = [] + for b in branches: + b = b.optimise(info) + if isinstance(b, Branch): + new_branches.extend(b.branches) + else: + new_branches.append(b) + + return new_branches + + @staticmethod + def _split_common_prefix(info, branches): + # Common leading items can be moved out of the branches. + # Get the items in the branches. + alternatives = [] + for b in branches: + if isinstance(b, Sequence): + alternatives.append(b.items) + else: + alternatives.append([b]) + + # What is the maximum possible length of the prefix? + max_count = min(len(a) for a in alternatives) + + # What is the longest common prefix? + prefix = alternatives[0] + pos = 0 + end_pos = max_count + while pos < end_pos and prefix[pos].can_be_affix() and all(a[pos] == + prefix[pos] for a in alternatives): + pos += 1 + count = pos + + if info.flags & UNICODE: + # We need to check that we're not splitting a sequence of + # characters which could form part of full case-folding. + count = pos + while count > 0 and not all(Branch._can_split(a, count) for a in + alternatives): + count -= 1 + + # No common prefix is possible. + if count == 0: + return [], branches + + # Rebuild the branches. + new_branches = [] + for a in alternatives: + new_branches.append(make_sequence(a[count : ])) + + return prefix[ : count], new_branches + + @staticmethod + def _split_common_suffix(info, branches): + # Common trailing items can be moved out of the branches. + # Get the items in the branches. + alternatives = [] + for b in branches: + if isinstance(b, Sequence): + alternatives.append(b.items) + else: + alternatives.append([b]) + + # What is the maximum possible length of the suffix? + max_count = min(len(a) for a in alternatives) + + # What is the longest common suffix? + suffix = alternatives[0] + pos = -1 + end_pos = -1 - max_count + while pos > end_pos and suffix[pos].can_be_affix() and all(a[pos] == + suffix[pos] for a in alternatives): + pos -= 1 + count = -1 - pos + + if info.flags & UNICODE: + # We need to check that we're not splitting a sequence of + # characters which could form part of full case-folding. + while count > 0 and not all(Branch._can_split_rev(a, count) for a + in alternatives): + count -= 1 + + # No common suffix is possible. + if count == 0: + return [], branches + + # Rebuild the branches. + new_branches = [] + for a in alternatives: + new_branches.append(make_sequence(a[ : -count])) + + return suffix[-count : ], new_branches + + @staticmethod + def _can_split(items, count): + # Check the characters either side of the proposed split. + if not Branch._is_full_case(items, count - 1): + return True + + if not Branch._is_full_case(items, count): + return True + + # Check whether a 1-1 split would be OK. + if Branch._is_folded(items[count - 1 : count + 1]): + return False + + # Check whether a 1-2 split would be OK. + if (Branch._is_full_case(items, count + 2) and + Branch._is_folded(items[count - 1 : count + 2])): + return False + + # Check whether a 2-1 split would be OK. + if (Branch._is_full_case(items, count - 2) and + Branch._is_folded(items[count - 2 : count + 1])): + return False + + return True + + @staticmethod + def _can_split_rev(items, count): + end = len(items) + + # Check the characters either side of the proposed split. + if not Branch._is_full_case(items, end - count): + return True + + if not Branch._is_full_case(items, end - count - 1): + return True + + # Check whether a 1-1 split would be OK. + if Branch._is_folded(items[end - count - 1 : end - count + 1]): + return False + + # Check whether a 1-2 split would be OK. + if (Branch._is_full_case(items, end - count + 2) and + Branch._is_folded(items[end - count - 1 : end - count + 2])): + return False + + # Check whether a 2-1 split would be OK. + if (Branch._is_full_case(items, end - count - 2) and + Branch._is_folded(items[end - count - 2 : end - count + 1])): + return False + + return True + + @staticmethod + def _merge_common_prefixes(info, branches): + # Branches with the same case-sensitive character prefix can be grouped + # together if they are separated only by other branches with a + # character prefix. + prefixed = defaultdict(list) + order = {} + new_branches = [] + for b in branches: + if Branch._is_simple_character(b): + # Branch starts with a simple character. + prefixed[b.value].append([b]) + order.setdefault(b.value, len(order)) + elif (isinstance(b, Sequence) and b.items and + Branch._is_simple_character(b.items[0])): + # Branch starts with a simple character. + prefixed[b.items[0].value].append(b.items) + order.setdefault(b.items[0].value, len(order)) + else: + Branch._flush_char_prefix(info, prefixed, order, new_branches) + + new_branches.append(b) + + Branch._flush_char_prefix(info, prefixed, order, new_branches) + + return new_branches + + @staticmethod + def _is_simple_character(c): + return isinstance(c, Character) and c.positive and not c.case_flags + + @staticmethod + def _reduce_to_set(info, branches): + # Can the branches be reduced to a set? + new_branches = [] + items = set() + case_flags = NOCASE + for b in branches: + if isinstance(b, (Character, Property, SetBase)): + # Branch starts with a single character. + if b.case_flags != case_flags: + # Different case sensitivity, so flush. + Branch._flush_set_members(info, items, case_flags, + new_branches) + + case_flags = b.case_flags + + items.add(b.with_flags(case_flags=NOCASE)) + else: + Branch._flush_set_members(info, items, case_flags, + new_branches) + + new_branches.append(b) + + Branch._flush_set_members(info, items, case_flags, new_branches) + + return new_branches + + @staticmethod + def _flush_char_prefix(info, prefixed, order, new_branches): + # Flush the prefixed branches. + if not prefixed: + return + + for value, branches in sorted(prefixed.items(), key=lambda pair: + order[pair[0]]): + if len(branches) == 1: + new_branches.append(make_sequence(branches[0])) + else: + subbranches = [] + optional = False + for b in branches: + if len(b) > 1: + subbranches.append(make_sequence(b[1 : ])) + elif not optional: + subbranches.append(Sequence()) + optional = True + + sequence = Sequence([Character(value), Branch(subbranches)]) + new_branches.append(sequence.optimise(info)) + + prefixed.clear() + order.clear() + + @staticmethod + def _flush_set_members(info, items, case_flags, new_branches): + # Flush the set members. + if not items: + return + + if len(items) == 1: + item = list(items)[0] + else: + item = SetUnion(info, list(items)).optimise(info) + + new_branches.append(item.with_flags(case_flags=case_flags)) + + items.clear() + + @staticmethod + def _is_full_case(items, i): + if not 0 <= i < len(items): + return False + + item = items[i] + return (isinstance(item, Character) and item.positive and + (item.case_flags & FULLIGNORECASE) == FULLIGNORECASE) + + @staticmethod + def _is_folded(items): + if len(items) < 2: + return False + + for i in items: + if (not isinstance(i, Character) or not i.positive or not + i.case_flags): + return False + + folded = u"".join(unichr(i.value) for i in items) + folded = _regex.fold_case(FULL_CASE_FOLDING, folded) + + # Get the characters which expand to multiple codepoints on folding. + expanding_chars = _regex.get_expand_on_folding() + + for c in expanding_chars: + if folded == _regex.fold_case(FULL_CASE_FOLDING, c): + return True + + return False + + def is_empty(self): + return all(b.is_empty() for b in self.branches) + + def __eq__(self, other): + return type(self) is type(other) and self.branches == other.branches + + def max_width(self): + return max(b.max_width() for b in self.branches) + +class CallGroup(RegexBase): + def __init__(self, info, group, position): + RegexBase.__init__(self) + self.info = info + self.group = group + self.position = position + + self._key = self.__class__, self.group + + def fix_groups(self, reverse, fuzzy): + try: + self.group = int(self.group) + except ValueError: + try: + self.group = self.info.group_index[self.group] + except KeyError: + raise error("unknown group at position %d" % self.position) + + if not 0 <= self.group <= self.info.group_count: + raise error("unknown group at position %d" % self.position) + + if self.group > 0 and self.info.open_group_count[self.group] > 1: + raise error("ambiguous group reference at position %d" % self.position) + + self.info.group_calls.append((self, reverse, fuzzy)) + + self._key = self.__class__, self.group + + def remove_captures(self): + raise error("group reference not allowed at position %d" % self.position) + + def compile(self, reverse=False, fuzzy=False): + return [(OP.GROUP_CALL, self.call_ref)] + + def dump(self, indent=0, reverse=False): + print "%sGROUP_CALL %s" % (INDENT * indent, self.group) + + def __eq__(self, other): + return type(self) is type(other) and self.group == other.group + + def max_width(self): + return UNLIMITED + +class Character(RegexBase): + _opcode = {(NOCASE, False): OP.CHARACTER, (IGNORECASE, False): + OP.CHARACTER_IGN, (FULLCASE, False): OP.CHARACTER, (FULLIGNORECASE, + False): OP.CHARACTER_IGN, (NOCASE, True): OP.CHARACTER_REV, (IGNORECASE, + True): OP.CHARACTER_IGN_REV, (FULLCASE, True): OP.CHARACTER_REV, + (FULLIGNORECASE, True): OP.CHARACTER_IGN_REV} + + def __init__(self, value, positive=True, case_flags=NOCASE, + zerowidth=False): + RegexBase.__init__(self) + self.value = value + self.positive = bool(positive) + self.case_flags = case_flags + self.zerowidth = bool(zerowidth) + + if (self.positive and (self.case_flags & FULLIGNORECASE) == + FULLIGNORECASE): + self.folded = _regex.fold_case(FULL_CASE_FOLDING, unichr(self.value)) + else: + self.folded = unichr(self.value) + + self._key = (self.__class__, self.value, self.positive, + self.case_flags, self.zerowidth) + + def rebuild(self, positive, case_flags, zerowidth): + return Character(self.value, positive, case_flags, zerowidth) + + def optimise(self, info, in_set=False): + return self + + def get_firstset(self, reverse): + return set([self]) + + def has_simple_start(self): + return True + + def compile(self, reverse=False, fuzzy=False): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if self.zerowidth: + flags |= ZEROWIDTH_OP + if fuzzy: + flags |= FUZZY_OP + + code = PrecompiledCode([self._opcode[self.case_flags, reverse], flags, + self.value]) + + if len(self.folded) > 1: + # The character expands on full case-folding. + code = Branch([code, String([ord(c) for c in self.folded], + case_flags=self.case_flags)]) + + return code.compile(reverse, fuzzy) + + def dump(self, indent=0, reverse=False): + display = repr(unichr(self.value)).lstrip("bu") + print "%sCHARACTER %s %s%s" % (INDENT * indent, + POS_TEXT[self.positive], display, CASE_TEXT[self.case_flags]) + + def matches(self, ch): + return (ch == self.value) == self.positive + + def max_width(self): + return len(self.folded) + + def get_required_string(self, reverse): + if not self.positive: + return 1, None + + self.folded_characters = tuple(ord(c) for c in self.folded) + + return 0, self + +class Conditional(RegexBase): + def __init__(self, info, group, yes_item, no_item, position): + RegexBase.__init__(self) + self.info = info + self.group = group + self.yes_item = yes_item + self.no_item = no_item + self.position = position + + def fix_groups(self, reverse, fuzzy): + try: + self.group = int(self.group) + except ValueError: + try: + self.group = self.info.group_index[self.group] + except KeyError: + raise error("unknown group at position %d" % self.position) + + if not 1 <= self.group <= self.info.group_count: + raise error("unknown group at position %d" % self.position) + + self.yes_item.fix_groups(reverse, fuzzy) + self.no_item.fix_groups(reverse, fuzzy) + + def optimise(self, info): + yes_item = self.yes_item.optimise(info) + no_item = self.no_item.optimise(info) + + return Conditional(info, self.group, yes_item, no_item, self.position) + + def pack_characters(self, info): + self.yes_item = self.yes_item.pack_characters(info) + self.no_item = self.no_item.pack_characters(info) + return self + + def remove_captures(self): + self.yes_item = self.yes_item.remove_captures() + self.no_item = self.no_item.remove_captures() + + def is_atomic(self): + return self.yes_item.is_atomic() and self.no_item.is_atomic() + + def can_be_affix(self): + return self.yes_item.can_be_affix() and self.no_item.can_be_affix() + + def contains_group(self): + return self.yes_item.contains_group() or self.no_item.contains_group() + + def get_firstset(self, reverse): + return (self.yes_item.get_firstset(reverse) | + self.no_item.get_firstset(reverse)) + + def compile(self, reverse=False, fuzzy=False): + code = [(OP.GROUP_EXISTS, self.group)] + code.extend(self.yes_item.compile(reverse, fuzzy)) + add_code = self.no_item.compile(reverse, fuzzy) + if add_code: + code.append((OP.NEXT, )) + code.extend(add_code) + + code.append((OP.END, )) + + return code + + def dump(self, indent=0, reverse=False): + print "%sGROUP_EXISTS %s" % (INDENT * indent, self.group) + self.yes_item.dump(indent + 1, reverse) + if self.no_item: + print "%sOR" % (INDENT * indent) + self.no_item.dump(indent + 1, reverse) + + def is_empty(self): + return self.yes_item.is_empty() and self.no_item.is_empty() + + def __eq__(self, other): + return type(self) is type(other) and (self.group, self.yes_item, + self.no_item) == (other.group, other.yes_item, other.no_item) + + def max_width(self): + return max(self.yes_item.max_width(), self.no_item.max_width()) + +class DefaultBoundary(ZeroWidthBase): + _opcode = OP.DEFAULT_BOUNDARY + _op_name = "DEFAULT_BOUNDARY" + +class DefaultEndOfWord(ZeroWidthBase): + _opcode = OP.DEFAULT_END_OF_WORD + _op_name = "DEFAULT_END_OF_WORD" + +class DefaultStartOfWord(ZeroWidthBase): + _opcode = OP.DEFAULT_START_OF_WORD + _op_name = "DEFAULT_START_OF_WORD" + +class EndOfLine(ZeroWidthBase): + _opcode = OP.END_OF_LINE + _op_name = "END_OF_LINE" + +class EndOfLineU(EndOfLine): + _opcode = OP.END_OF_LINE_U + _op_name = "END_OF_LINE_U" + +class EndOfString(ZeroWidthBase): + _opcode = OP.END_OF_STRING + _op_name = "END_OF_STRING" + +class EndOfStringLine(ZeroWidthBase): + _opcode = OP.END_OF_STRING_LINE + _op_name = "END_OF_STRING_LINE" + +class EndOfStringLineU(EndOfStringLine): + _opcode = OP.END_OF_STRING_LINE_U + _op_name = "END_OF_STRING_LINE_U" + +class EndOfWord(ZeroWidthBase): + _opcode = OP.END_OF_WORD + _op_name = "END_OF_WORD" + +class Fuzzy(RegexBase): + def __init__(self, subpattern, constraints=None): + RegexBase.__init__(self) + if constraints is None: + constraints = {} + self.subpattern = subpattern + self.constraints = constraints + + # If an error type is mentioned in the cost equation, then its maximum + # defaults to unlimited. + if "cost" in constraints: + for e in "dis": + if e in constraints["cost"]: + constraints.setdefault(e, (0, None)) + + # If any error type is mentioned, then all the error maxima default to + # 0, otherwise they default to unlimited. + if set(constraints) & set("dis"): + for e in "dis": + constraints.setdefault(e, (0, 0)) + else: + for e in "dis": + constraints.setdefault(e, (0, None)) + + # The maximum of the generic error type defaults to unlimited. + constraints.setdefault("e", (0, None)) + + # The cost equation defaults to equal costs. Also, the cost of any + # error type not mentioned in the cost equation defaults to 0. + if "cost" in constraints: + for e in "dis": + constraints["cost"].setdefault(e, 0) + else: + constraints["cost"] = {"d": 1, "i": 1, "s": 1, "max": + constraints["e"][1]} + + def fix_groups(self, reverse, fuzzy): + self.subpattern.fix_groups(reverse, True) + + def pack_characters(self, info): + self.subpattern = self.subpattern.pack_characters(info) + return self + + def remove_captures(self): + self.subpattern = self.subpattern.remove_captures() + return self + + def is_atomic(self): + return self.subpattern.is_atomic() + + def contains_group(self): + return self.subpattern.contains_group() + + def compile(self, reverse=False, fuzzy=False): + # The individual limits. + arguments = [] + for e in "dise": + v = self.constraints[e] + arguments.append(v[0]) + arguments.append(UNLIMITED if v[1] is None else v[1]) + + # The coeffs of the cost equation. + for e in "dis": + arguments.append(self.constraints["cost"][e]) + + # The maximum of the cost equation. + v = self.constraints["cost"]["max"] + arguments.append(UNLIMITED if v is None else v) + + flags = 0 + if reverse: + flags |= REVERSE_OP + + return ([(OP.FUZZY, flags) + tuple(arguments)] + + self.subpattern.compile(reverse, True) + [(OP.END,)]) + + def dump(self, indent=0, reverse=False): + constraints = self._constraints_to_string() + if constraints: + constraints = " " + constraints + print "%sFUZZY%s" % (INDENT * indent, constraints) + self.subpattern.dump(indent + 1, reverse) + + def is_empty(self): + return self.subpattern.is_empty() + + def __eq__(self, other): + return (type(self) is type(other) and self.subpattern == + other.subpattern) + + def max_width(self): + return UNLIMITED + + def _constraints_to_string(self): + constraints = [] + + for name in "ids": + min, max = self.constraints[name] + if max == 0: + continue + + con = "" + + if min > 0: + con = "%s<=" % min + + con += name + + if max is not None: + con += "<=%s" % max + + constraints.append(con) + + cost = [] + for name in "ids": + coeff = self.constraints["cost"][name] + if coeff > 0: + cost.append("%s%s" % (coeff, name)) + + limit = self.constraints["cost"]["max"] + if limit is not None and limit > 0: + cost = "%s<=%s" % ("+".join(cost), limit) + constraints.append(cost) + + return ",".join(constraints) + +class Grapheme(RegexBase): + def compile(self, reverse=False, fuzzy=False): + # Match at least 1 character until a grapheme boundary is reached. Note + # that this is the same whether matching forwards or backwards. + character_matcher = LazyRepeat(AnyAll(), 1, None).compile(reverse, + fuzzy) + boundary_matcher = [(OP.GRAPHEME_BOUNDARY, 1)] + + return character_matcher + boundary_matcher + + def dump(self, indent=0, reverse=False): + print "%sGRAPHEME" % (INDENT * indent) + + def max_width(self): + return UNLIMITED + +class GreedyRepeat(RegexBase): + _opcode = OP.GREEDY_REPEAT + _op_name = "GREEDY_REPEAT" + + def __init__(self, subpattern, min_count, max_count): + RegexBase.__init__(self) + self.subpattern = subpattern + self.min_count = min_count + self.max_count = max_count + + def fix_groups(self, reverse, fuzzy): + self.subpattern.fix_groups(reverse, fuzzy) + + def optimise(self, info): + subpattern = self.subpattern.optimise(info) + + return type(self)(subpattern, self.min_count, self.max_count) + + def pack_characters(self, info): + self.subpattern = self.subpattern.pack_characters(info) + return self + + def remove_captures(self): + self.subpattern = self.subpattern.remove_captures() + return self + + def is_atomic(self): + return self.min_count == self.max_count and self.subpattern.is_atomic() + + def contains_group(self): + return self.subpattern.contains_group() + + def get_firstset(self, reverse): + fs = self.subpattern.get_firstset(reverse) + if self.min_count == 0: + fs.add(None) + + return fs + + def compile(self, reverse=False, fuzzy=False): + repeat = [self._opcode, self.min_count] + if self.max_count is None: + repeat.append(UNLIMITED) + else: + repeat.append(self.max_count) + + subpattern = self.subpattern.compile(reverse, fuzzy) + if not subpattern: + return [] + + return ([tuple(repeat)] + subpattern + [(OP.END, )]) + + def dump(self, indent=0, reverse=False): + if self.max_count is None: + limit = "INF" + else: + limit = self.max_count + print "%s%s %s %s" % (INDENT * indent, self._op_name, self.min_count, + limit) + + self.subpattern.dump(indent + 1, reverse) + + def is_empty(self): + return self.subpattern.is_empty() + + def __eq__(self, other): + return type(self) is type(other) and (self.subpattern, self.min_count, + self.max_count) == (other.subpattern, other.min_count, + other.max_count) + + def max_width(self): + if self.max_count is None: + return UNLIMITED + + return self.subpattern.max_width() * self.max_count + + def get_required_string(self, reverse): + max_count = UNLIMITED if self.max_count is None else self.max_count + if self.min_count == 0: + w = self.subpattern.max_width() * max_count + return min(w, UNLIMITED), None + + ofs, req = self.subpattern.get_required_string(reverse) + if req: + return ofs, req + + w = self.subpattern.max_width() * max_count + return min(w, UNLIMITED), None + +class Group(RegexBase): + def __init__(self, info, group, subpattern): + RegexBase.__init__(self) + self.info = info + self.group = group + self.subpattern = subpattern + + self.call_ref = None + + def fix_groups(self, reverse, fuzzy): + self.info.defined_groups[self.group] = (self, reverse, fuzzy) + self.subpattern.fix_groups(reverse, fuzzy) + + def optimise(self, info): + subpattern = self.subpattern.optimise(info) + + return Group(self.info, self.group, subpattern) + + def pack_characters(self, info): + self.subpattern = self.subpattern.pack_characters(info) + return self + + def remove_captures(self): + return self.subpattern.remove_captures() + + def is_atomic(self): + return self.subpattern.is_atomic() + + def can_be_affix(self): + return False + + def contains_group(self): + return True + + def get_firstset(self, reverse): + return self.subpattern.get_firstset(reverse) + + def has_simple_start(self): + return self.subpattern.has_simple_start() + + def compile(self, reverse=False, fuzzy=False): + code = [] + + key = self.group, reverse, fuzzy + ref = self.info.call_refs.get(key) + if ref is not None: + code += [(OP.CALL_REF, ref)] + + public_group = private_group = self.group + if private_group < 0: + public_group = self.info.private_groups[private_group] + private_group = self.info.group_count - private_group + + code += ([(OP.GROUP, private_group, public_group)] + + self.subpattern.compile(reverse, fuzzy) + [(OP.END, )]) + + if ref is not None: + code += [(OP.END, )] + + return code + + def dump(self, indent=0, reverse=False): + group = self.group + if group < 0: + group = private_groups[group] + print "%sGROUP %s" % (INDENT * indent, group) + self.subpattern.dump(indent + 1, reverse) + + def __eq__(self, other): + return (type(self) is type(other) and (self.group, self.subpattern) == + (other.group, other.subpattern)) + + def max_width(self): + return self.subpattern.max_width() + + def get_required_string(self, reverse): + return self.subpattern.get_required_string(reverse) + +class LazyRepeat(GreedyRepeat): + _opcode = OP.LAZY_REPEAT + _op_name = "LAZY_REPEAT" + +class LookAround(RegexBase): + _dir_text = {False: "AHEAD", True: "BEHIND"} + + def __new__(cls, behind, positive, subpattern): + if positive and subpattern.is_empty(): + return subpattern + + return RegexBase.__new__(cls) + + def __init__(self, behind, positive, subpattern): + RegexBase.__init__(self) + self.behind = bool(behind) + self.positive = bool(positive) + self.subpattern = subpattern + + def fix_groups(self, reverse, fuzzy): + self.subpattern.fix_groups(self.behind, fuzzy) + + def optimise(self, info): + subpattern = self.subpattern.optimise(info) + + return LookAround(self.behind, self.positive, subpattern) + + def pack_characters(self, info): + self.subpattern = self.subpattern.pack_characters(info) + return self + + def remove_captures(self): + return self.subpattern.remove_captures() + + def is_atomic(self): + return self.subpattern.is_atomic() + + def can_be_affix(self): + return self.subpattern.can_be_affix() + + def contains_group(self): + return self.subpattern.contains_group() + + def compile(self, reverse=False, fuzzy=False): + return ([(OP.LOOKAROUND, int(self.positive), int(not self.behind))] + + self.subpattern.compile(self.behind) + [(OP.END, )]) + + def dump(self, indent=0, reverse=False): + print "%sLOOK%s %s" % (INDENT * indent, self._dir_text[self.behind], + POS_TEXT[self.positive]) + self.subpattern.dump(indent + 1, self.behind) + + def is_empty(self): + return self.subpattern.is_empty() + + def __eq__(self, other): + return type(self) is type(other) and (self.behind, self.positive, + self.subpattern) == (other.behind, other.positive, other.subpattern) + + def max_width(self): + return 0 + +class PrecompiledCode(RegexBase): + def __init__(self, code): + self.code = code + + def compile(self, reverse=False, fuzzy=False): + return [tuple(self.code)] + +class Property(RegexBase): + _opcode = {(NOCASE, False): OP.PROPERTY, (IGNORECASE, False): + OP.PROPERTY_IGN, (FULLCASE, False): OP.PROPERTY, (FULLIGNORECASE, False): + OP.PROPERTY_IGN, (NOCASE, True): OP.PROPERTY_REV, (IGNORECASE, True): + OP.PROPERTY_IGN_REV, (FULLCASE, True): OP.PROPERTY_REV, (FULLIGNORECASE, + True): OP.PROPERTY_IGN_REV} + + def __init__(self, value, positive=True, case_flags=NOCASE, + zerowidth=False): + RegexBase.__init__(self) + self.value = value + self.positive = bool(positive) + self.case_flags = case_flags + self.zerowidth = bool(zerowidth) + + self._key = (self.__class__, self.value, self.positive, + self.case_flags, self.zerowidth) + + def rebuild(self, positive, case_flags, zerowidth): + return Property(self.value, positive, case_flags, zerowidth) + + def optimise(self, info, in_set=False): + return self + + def get_firstset(self, reverse): + return set([self]) + + def has_simple_start(self): + return True + + def compile(self, reverse=False, fuzzy=False): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if self.zerowidth: + flags |= ZEROWIDTH_OP + if fuzzy: + flags |= FUZZY_OP + return [(self._opcode[self.case_flags, reverse], flags, self.value)] + + def dump(self, indent=0, reverse=False): + prop = PROPERTY_NAMES[self.value >> 16] + name, value = prop[0], prop[1][self.value & 0xFFFF] + print "%sPROPERTY %s %s:%s%s" % (INDENT * indent, + POS_TEXT[self.positive], name, value, CASE_TEXT[self.case_flags]) + + def matches(self, ch): + return _regex.has_property_value(self.value, ch) == self.positive + + def max_width(self): + return 1 + +class Range(RegexBase): + _opcode = {(NOCASE, False): OP.RANGE, (IGNORECASE, False): OP.RANGE_IGN, + (FULLCASE, False): OP.RANGE, (FULLIGNORECASE, False): OP.RANGE_IGN, + (NOCASE, True): OP.RANGE_REV, (IGNORECASE, True): OP.RANGE_IGN_REV, + (FULLCASE, True): OP.RANGE_REV, (FULLIGNORECASE, True): OP.RANGE_IGN_REV} + _op_name = "RANGE" + + def __init__(self, lower, upper, positive=True, case_flags=NOCASE, + zerowidth=False): + RegexBase.__init__(self) + self.lower = lower + self.upper = upper + self.positive = bool(positive) + self.case_flags = case_flags + self.zerowidth = bool(zerowidth) + + self._key = (self.__class__, self.lower, self.upper, self.positive, + self.case_flags, self.zerowidth) + + def rebuild(self, positive, case_flags, zerowidth): + return Range(self.lower, self.upper, positive, case_flags, zerowidth) + + def optimise(self, info, in_set=False): + # Is the range case-sensitive? + if not self.positive or not (self.case_flags & IGNORECASE) or in_set: + return self + + # Is full case-folding possible? + if (not (info.flags & UNICODE) or (self.case_flags & FULLIGNORECASE) != + FULLIGNORECASE): + return self + + # Get the characters which expand to multiple codepoints on folding. + expanding_chars = _regex.get_expand_on_folding() + + # Get the folded characters in the range. + items = [] + for ch in expanding_chars: + if self.lower <= ord(ch) <= self.upper: + folded = _regex.fold_case(FULL_CASE_FOLDING, ch) + items.append(String([ord(c) for c in folded], + case_flags=self.case_flags)) + + if not items: + # We can fall back to simple case-folding. + return self + + if len(items) < self.upper - self.lower + 1: + # Not all the characters are covered by the full case-folding. + items.insert(0, self) + + return Branch(items) + + def compile(self, reverse=False, fuzzy=False): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if self.zerowidth: + flags |= ZEROWIDTH_OP + if fuzzy: + flags |= FUZZY_OP + return [(self._opcode[self.case_flags, reverse], flags, self.lower, + self.upper)] + + def dump(self, indent=0, reverse=False): + display_lower = repr(unichr(self.lower)).lstrip("bu") + display_upper = repr(unichr(self.upper)).lstrip("bu") + print "%sRANGE %s %s %s%s" % (INDENT * indent, POS_TEXT[self.positive], + display_lower, display_upper, CASE_TEXT[self.case_flags]) + + def matches(self, ch): + return (self.lower <= ch <= self.upper) == self.positive + + def max_width(self): + return 1 + +class RefGroup(RegexBase): + _opcode = {(NOCASE, False): OP.REF_GROUP, (IGNORECASE, False): + OP.REF_GROUP_IGN, (FULLCASE, False): OP.REF_GROUP, (FULLIGNORECASE, + False): OP.REF_GROUP_FLD, (NOCASE, True): OP.REF_GROUP_REV, (IGNORECASE, + True): OP.REF_GROUP_IGN_REV, (FULLCASE, True): OP.REF_GROUP_REV, + (FULLIGNORECASE, True): OP.REF_GROUP_FLD_REV} + + def __init__(self, info, group, position, case_flags=NOCASE): + RegexBase.__init__(self) + self.info = info + self.group = group + self.position = position + self.case_flags = case_flags + + self._key = self.__class__, self.group, self.case_flags + + def fix_groups(self, reverse, fuzzy): + try: + self.group = int(self.group) + except ValueError: + try: + self.group = self.info.group_index[self.group] + except KeyError: + raise error("unknown group at position %d" % self.position) + + if not 1 <= self.group <= self.info.group_count: + raise error("unknown group at position %d" % self.position) + + self._key = self.__class__, self.group, self.case_flags + + def remove_captures(self): + raise error("group reference not allowed at position %d" % self.position) + + def compile(self, reverse=False, fuzzy=False): + flags = 0 + if fuzzy: + flags |= FUZZY_OP + return [(self._opcode[self.case_flags, reverse], flags, self.group)] + + def dump(self, indent=0, reverse=False): + print "%sREF_GROUP %s%s" % (INDENT * indent, self.group, + CASE_TEXT[self.case_flags]) + + def max_width(self): + return UNLIMITED + +class SearchAnchor(ZeroWidthBase): + _opcode = OP.SEARCH_ANCHOR + _op_name = "SEARCH_ANCHOR" + +class Sequence(RegexBase): + def __init__(self, items=None): + RegexBase.__init__(self) + if items is None: + items = [] + + self.items = items + + def fix_groups(self, reverse, fuzzy): + for s in self.items: + s.fix_groups(reverse, fuzzy) + + def optimise(self, info): + # Flatten the sequences. + items = [] + for s in self.items: + s = s.optimise(info) + if isinstance(s, Sequence): + items.extend(s.items) + else: + items.append(s) + + return make_sequence(items) + + def pack_characters(self, info): + "Packs sequences of characters into strings." + items = [] + characters = [] + case_flags = NOCASE + for s in self.items: + if type(s) is Character and s.positive: + if s.case_flags != case_flags: + # Different case sensitivity, so flush, unless neither the + # previous nor the new character are cased. + if case_flags or is_cased(info, s.value): + Sequence._flush_characters(info, characters, + case_flags, items) + + case_flags = s.case_flags + + characters.append(s.value) + elif type(s) is String: + if s.case_flags != case_flags: + # Different case sensitivity, so flush, unless the neither + # the previous nor the new string are cased. + if not s.case_flags or any(is_cased(info, c) for c in + characters): + Sequence._flush_characters(info, characters, + case_flags, items) + + case_flags = s.case_flags + + characters.extend(s.characters) + else: + Sequence._flush_characters(info, characters, case_flags, items) + + items.append(s.pack_characters(info)) + + Sequence._flush_characters(info, characters, case_flags, items) + + return make_sequence(items) + + def remove_captures(self): + self.items = [s.remove_captures() for s in self.items] + return self + + def is_atomic(self): + return all(s.is_atomic() for s in self.items) + + def can_be_affix(self): + return False + + def contains_group(self): + return any(s.contains_group() for s in self.items) + + def get_firstset(self, reverse): + fs = set() + items = self.items + if reverse: + items.reverse() + for s in items: + fs |= s.get_firstset(reverse) + if None not in fs: + return fs + fs.discard(None) + + return fs | set([None]) + + def has_simple_start(self): + return self.items and self.items[0].has_simple_start() + + def compile(self, reverse=False, fuzzy=False): + seq = self.items + if reverse: + seq = seq[::-1] + + code = [] + for s in seq: + code.extend(s.compile(reverse, fuzzy)) + + return code + + def dump(self, indent=0, reverse=False): + for s in self.items: + s.dump(indent, reverse) + + @staticmethod + def _flush_characters(info, characters, case_flags, items): + if not characters: + return + + # Disregard case_flags if all of the characters are case-less. + if case_flags & IGNORECASE: + if not any(is_cased(info, c) for c in characters): + case_flags = NOCASE + + if len(characters) == 1: + items.append(Character(characters[0], case_flags=case_flags)) + else: + items.append(String(characters, case_flags=case_flags)) + + characters[:] = [] + + def is_empty(self): + return all(i.is_empty() for i in self.items) + + def __eq__(self, other): + return type(self) is type(other) and self.items == other.items + + def max_width(self): + return sum(s.max_width() for s in self.items) + + def get_required_string(self, reverse): + seq = self.items + if reverse: + seq = seq[::-1] + + offset = 0 + + for s in seq: + ofs, req = s.get_required_string(reverse) + offset += ofs + if req: + return offset, req + + return offset, None + +class SetBase(RegexBase): + def __init__(self, info, items, positive=True, case_flags=NOCASE, + zerowidth=False): + RegexBase.__init__(self) + self.info = info + self.items = tuple(items) + self.positive = bool(positive) + self.case_flags = case_flags + self.zerowidth = bool(zerowidth) + + self.char_width = 1 + + self._key = (self.__class__, self.items, self.positive, + self.case_flags, self.zerowidth) + + def rebuild(self, positive, case_flags, zerowidth): + return type(self)(self.info, self.items, positive, case_flags, + zerowidth).optimise(self.info) + + def get_firstset(self, reverse): + return set([self]) + + def has_simple_start(self): + return True + + def compile(self, reverse=False, fuzzy=False): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if self.zerowidth: + flags |= ZEROWIDTH_OP + if fuzzy: + flags |= FUZZY_OP + code = [(self._opcode[self.case_flags, reverse], flags)] + for m in self.items: + code.extend(m.compile()) + + code.append((OP.END, )) + + return code + + def dump(self, indent=0, reverse=False): + print "%s%s %s%s" % (INDENT * indent, self._op_name, + POS_TEXT[self.positive], CASE_TEXT[self.case_flags]) + for i in self.items: + i.dump(indent + 1) + + def _handle_case_folding(self, info, in_set): + # Is the set case-sensitive? + if not self.positive or not (self.case_flags & IGNORECASE) or in_set: + return self + + # Is full case-folding possible? + if (not (self.info.flags & UNICODE) or (self.case_flags & + FULLIGNORECASE) != + FULLIGNORECASE): + return self + + # Get the characters which expand to multiple codepoints on folding. + expanding_chars = _regex.get_expand_on_folding() + + # Get the folded characters in the set. + items = [] + seen = set() + for ch in expanding_chars: + if self.matches(ord(ch)): + folded = _regex.fold_case(FULL_CASE_FOLDING, ch) + if folded not in seen: + items.append(String([ord(c) for c in folded], + case_flags=self.case_flags)) + seen.add(folded) + + if not items: + # We can fall back to simple case-folding. + return self + + return Branch([self] + items) + + def max_width(self): + # Is the set case-sensitive? + if not self.positive or not (self.case_flags & IGNORECASE): + return 1 + + # Is full case-folding possible? + if (not (self.info.flags & UNICODE) or (self.case_flags & + FULLIGNORECASE) != FULLIGNORECASE): + return 1 + + # Get the characters which expand to multiple codepoints on folding. + expanding_chars = _regex.get_expand_on_folding() + + # Get the folded characters in the set. + seen = set() + for ch in expanding_chars: + if self.matches(ord(ch)): + folded = _regex.fold_case(FULL_CASE_FOLDING, ch) + seen.add(folded) + + if not seen: + return 1 + + return max(len(folded) for folded in seen) + +class SetDiff(SetBase): + _opcode = {(NOCASE, False): OP.SET_DIFF, (IGNORECASE, False): + OP.SET_DIFF_IGN, (FULLCASE, False): OP.SET_DIFF, (FULLIGNORECASE, False): + OP.SET_DIFF_IGN, (NOCASE, True): OP.SET_DIFF_REV, (IGNORECASE, True): + OP.SET_DIFF_IGN_REV, (FULLCASE, True): OP.SET_DIFF_REV, (FULLIGNORECASE, + True): OP.SET_DIFF_IGN_REV} + _op_name = "SET_DIFF" + + def optimise(self, info, in_set=False): + items = self.items + if len(items) > 2: + items = [items[0], SetUnion(info, items[1 : ])] + + if len(items) == 1: + return items[0].with_flags(case_flags=self.case_flags, + zerowidth=self.zerowidth).optimise(info, in_set) + + self.items = tuple(m.optimise(info, in_set=True) for m in items) + + return self._handle_case_folding(info, in_set) + + def matches(self, ch): + m = self.items[0].matches(ch) and not self.items[1].matches(ch) + return m == self.positive + +class SetInter(SetBase): + _opcode = {(NOCASE, False): OP.SET_INTER, (IGNORECASE, False): + OP.SET_INTER_IGN, (FULLCASE, False): OP.SET_INTER, (FULLIGNORECASE, + False): OP.SET_INTER_IGN, (NOCASE, True): OP.SET_INTER_REV, (IGNORECASE, + True): OP.SET_INTER_IGN_REV, (FULLCASE, True): OP.SET_INTER_REV, + (FULLIGNORECASE, True): OP.SET_INTER_IGN_REV} + _op_name = "SET_INTER" + + def optimise(self, info, in_set=False): + items = [] + for m in self.items: + m = m.optimise(info, in_set=True) + if isinstance(m, SetInter) and m.positive: + # Intersection in intersection. + items.extend(m.items) + else: + items.append(m) + + if len(items) == 1: + return items[0].with_flags(case_flags=self.case_flags, + zerowidth=self.zerowidth).optimise(info, in_set) + + self.items = tuple(items) + + return self._handle_case_folding(info, in_set) + + def matches(self, ch): + m = all(i.matches(ch) for i in self.items) + return m == self.positive + +class SetSymDiff(SetBase): + _opcode = {(NOCASE, False): OP.SET_SYM_DIFF, (IGNORECASE, False): + OP.SET_SYM_DIFF_IGN, (FULLCASE, False): OP.SET_SYM_DIFF, (FULLIGNORECASE, + False): OP.SET_SYM_DIFF_IGN, (NOCASE, True): OP.SET_SYM_DIFF_REV, + (IGNORECASE, True): OP.SET_SYM_DIFF_IGN_REV, (FULLCASE, True): + OP.SET_SYM_DIFF_REV, (FULLIGNORECASE, True): OP.SET_SYM_DIFF_IGN_REV} + _op_name = "SET_SYM_DIFF" + + def optimise(self, info, in_set=False): + items = [] + for m in self.items: + m = m.optimise(info, in_set=True) + if isinstance(m, SetSymDiff) and m.positive: + # Symmetric difference in symmetric difference. + items.extend(m.items) + else: + items.append(m) + + if len(items) == 1: + return items[0].with_flags(case_flags=self.case_flags, + zerowidth=self.zerowidth).optimise(info, in_set) + + self.items = tuple(items) + + return self._handle_case_folding(info, in_set) + + def matches(self, ch): + m = False + for i in self.items: + m = m != i.matches(ch) + + return m == self.positive + +class SetUnion(SetBase): + _opcode = {(NOCASE, False): OP.SET_UNION, (IGNORECASE, False): + OP.SET_UNION_IGN, (FULLCASE, False): OP.SET_UNION, (FULLIGNORECASE, + False): OP.SET_UNION_IGN, (NOCASE, True): OP.SET_UNION_REV, (IGNORECASE, + True): OP.SET_UNION_IGN_REV, (FULLCASE, True): OP.SET_UNION_REV, + (FULLIGNORECASE, True): OP.SET_UNION_IGN_REV} + _op_name = "SET_UNION" + + def optimise(self, info, in_set=False): + items = [] + for m in self.items: + m = m.optimise(info, in_set=True) + if isinstance(m, SetUnion) and m.positive: + # Union in union. + items.extend(m.items) + else: + items.append(m) + + if len(items) == 1: + i = items[0] + return i.with_flags(positive=i.positive == self.positive, + case_flags=self.case_flags, + zerowidth=self.zerowidth).optimise(info, in_set) + + self.items = tuple(items) + + return self._handle_case_folding(info, in_set) + + def compile(self, reverse=False, fuzzy=False): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if self.zerowidth: + flags |= ZEROWIDTH_OP + if fuzzy: + flags |= FUZZY_OP + + characters, others = defaultdict(list), [] + for m in self.items: + if isinstance(m, Character): + characters[m.positive].append(m.value) + else: + others.append(m) + + code = [(self._opcode[self.case_flags, reverse], flags)] + + for positive, values in characters.items(): + flags = 0 + if positive: + flags |= POSITIVE_OP + if len(values) == 1: + code.append((OP.CHARACTER, flags, values[0])) + else: + code.append((OP.STRING, flags, len(values)) + tuple(values)) + + for m in others: + code.extend(m.compile()) + + code.append((OP.END, )) + + return code + + def matches(self, ch): + m = any(i.matches(ch) for i in self.items) + return m == self.positive + +class StartOfLine(ZeroWidthBase): + _opcode = OP.START_OF_LINE + _op_name = "START_OF_LINE" + +class StartOfLineU(StartOfLine): + _opcode = OP.START_OF_LINE_U + _op_name = "START_OF_LINE_U" + +class StartOfString(ZeroWidthBase): + _opcode = OP.START_OF_STRING + _op_name = "START_OF_STRING" + +class StartOfWord(ZeroWidthBase): + _opcode = OP.START_OF_WORD + _op_name = "START_OF_WORD" + +class String(RegexBase): + _opcode = {(NOCASE, False): OP.STRING, (IGNORECASE, False): OP.STRING_IGN, + (FULLCASE, False): OP.STRING, (FULLIGNORECASE, False): OP.STRING_FLD, + (NOCASE, True): OP.STRING_REV, (IGNORECASE, True): OP.STRING_IGN_REV, + (FULLCASE, True): OP.STRING_REV, (FULLIGNORECASE, True): + OP.STRING_FLD_REV} + + def __init__(self, characters, case_flags=NOCASE): + self.characters = tuple(characters) + self.case_flags = case_flags + + if (self.case_flags & FULLIGNORECASE) == FULLIGNORECASE: + folded_characters = [] + for char in self.characters: + folded = _regex.fold_case(FULL_CASE_FOLDING, unichr(char)) + folded_characters.extend(ord(c) for c in folded) + else: + folded_characters = self.characters + + self.folded_characters = tuple(folded_characters) + self.required = False + + self._key = self.__class__, self.characters, self.case_flags + + def get_firstset(self, reverse): + if reverse: + pos = -1 + else: + pos = 0 + return set([Character(self.characters[pos], + case_flags=self.case_flags)]) + + def has_simple_start(self): + return True + + def compile(self, reverse=False, fuzzy=False): + flags = 0 + if fuzzy: + flags |= FUZZY_OP + if self.required: + flags |= REQUIRED_OP + return [(self._opcode[self.case_flags, reverse], flags, + len(self.folded_characters)) + self.folded_characters] + + def dump(self, indent=0, reverse=False): + display = repr("".join(unichr(c) for c in self.characters)).lstrip("bu") + print "%sSTRING %s%s" % (INDENT * indent, display, + CASE_TEXT[self.case_flags]) + + def max_width(self): + return len(self.folded_characters) + + def get_required_string(self, reverse): + return 0, self + +class StringSet(RegexBase): + _opcode = {(NOCASE, False): OP.STRING_SET, (IGNORECASE, False): + OP.STRING_SET_IGN, (FULLCASE, False): OP.STRING_SET, (FULLIGNORECASE, + False): OP.STRING_SET_FLD, (NOCASE, True): OP.STRING_SET_REV, + (IGNORECASE, True): OP.STRING_SET_IGN_REV, (FULLCASE, True): + OP.STRING_SET_REV, (FULLIGNORECASE, True): OP.STRING_SET_FLD_REV} + + def __init__(self, info, name, case_flags=NOCASE): + self.info = info + self.name = name + self.case_flags = case_flags + + self._key = self.__class__, self.name, self.case_flags + + self.set_key = (name, self.case_flags) + if self.set_key not in info.named_lists_used: + info.named_lists_used[self.set_key] = len(info.named_lists_used) + + def compile(self, reverse=False, fuzzy=False): + index = self.info.named_lists_used[self.set_key] + items = self.info.kwargs[self.name] + + case_flags = self.case_flags + + if not items: + return [] + + encoding = self.info.flags & _ALL_ENCODINGS + fold_flags = encoding | case_flags + + if fuzzy: + choices = [self._folded(fold_flags, i) for i in items] + + # Sort from longest to shortest. + choices.sort(key=lambda s: (-len(s), s)) + + branches = [] + for string in choices: + branches.append(Sequence([Character(c, case_flags=case_flags) + for c in string])) + + if len(branches) > 1: + branch = Branch(branches) + else: + branch = branches[0] + branch = branch.optimise(self.info).pack_characters(self.info) + + return branch.compile(reverse, fuzzy) + else: + min_len = min(len(i) for i in items) + max_len = max(len(self._folded(fold_flags, i)) for i in items) + return [(self._opcode[case_flags, reverse], index, min_len, + max_len)] + + def dump(self, indent=0, reverse=False): + print "%sSTRING_SET %s%s" % (INDENT * indent, self.name, + CASE_TEXT[self.case_flags]) + + def _folded(self, fold_flags, item): + if isinstance(item, unicode): + return [ord(c) for c in _regex.fold_case(fold_flags, item)] + else: + return [ord(c) for c in item] + + def _flatten(self, s): + # Flattens the branches. + if isinstance(s, Branch): + for b in s.branches: + self._flatten(b) + elif isinstance(s, Sequence) and s.items: + seq = s.items + + while isinstance(seq[-1], Sequence): + seq[-1 : ] = seq[-1].items + + n = 0 + while n < len(seq) and isinstance(seq[n], Character): + n += 1 + + if n > 1: + seq[ : n] = [String([c.value for c in seq[ : n]], + case_flags=self.case_flags)] + + self._flatten(seq[-1]) + + def max_width(self): + if not self.info.kwargs[self.name]: + return 0 + + if self.case_flags & IGNORECASE: + fold_flags = (self.info.flags & _ALL_ENCODINGS) | self.case_flags + return max(len(_regex.fold_case(fold_flags, i)) for i in + self.info.kwargs[self.name]) + else: + return max(len(i) for i in self.info.kwargs[self.name]) + +class Source(object): + "Scanner for the regular expression source string." + def __init__(self, string): + if isinstance(string, unicode): + self.string = string + self.char_type = unichr + else: + self.string = string + self.char_type = chr + + self.pos = 0 + self.ignore_space = False + self.sep = string[ : 0] + + def get(self): + string = self.string + pos = self.pos + + try: + if self.ignore_space: + while True: + if string[pos].isspace(): + # Skip over the whitespace. + pos += 1 + elif string[pos] == "#": + # Skip over the comment to the end of the line. + pos = string.index("\n", pos) + else: + break + + ch = string[pos] + self.pos = pos + 1 + return ch + except IndexError: + # We've reached the end of the string. + self.pos = pos + return string[ : 0] + except ValueError: + # The comment extended to the end of the string. + self.pos = len(string) + return string[ : 0] + + def get_many(self, count=1): + string = self.string + pos = self.pos + + try: + if self.ignore_space: + substring = [] + + while len(substring) < count: + while True: + if string[pos].isspace(): + # Skip over the whitespace. + pos += 1 + elif string[pos] == "#": + # Skip over the comment to the end of the line. + pos = string.index("\n", pos) + else: + break + + substring.append(string[pos]) + pos += 1 + + substring = "".join(substring) + else: + substring = string[pos : pos + count] + pos += len(substring) + + self.pos = pos + return substring + except IndexError: + # We've reached the end of the string. + self.pos = len(string) + return "".join(substring) + except ValueError: + # The comment extended to the end of the string. + self.pos = len(string) + return "".join(substring) + + def get_while(self, test_set, include=True): + string = self.string + pos = self.pos + + if self.ignore_space: + try: + substring = [] + + while True: + if string[pos].isspace(): + # Skip over the whitespace. + pos += 1 + elif string[pos] == "#": + # Skip over the comment to the end of the line. + pos = string.index("\n", pos) + elif (string[pos] in test_set) == include: + substring.append(string[pos]) + pos += 1 + else: + break + + self.pos = pos + except IndexError: + # We've reached the end of the string. + self.pos = len(string) + except ValueError: + # The comment extended to the end of the string. + self.pos = len(string) + + return "".join(substring) + else: + try: + while (string[pos] in test_set) == include: + pos += 1 + + substring = string[self.pos : pos] + + self.pos = pos + + return substring + except IndexError: + # We've reached the end of the string. + substring = string[self.pos : pos] + + self.pos = pos + + return substring + + def skip_while(self, test_set, include=True): + string = self.string + pos = self.pos + + try: + if self.ignore_space: + while True: + if string[pos].isspace(): + # Skip over the whitespace. + pos += 1 + elif string[pos] == "#": + # Skip over the comment to the end of the line. + pos = string.index("\n", pos) + elif (string[pos] in test_set) == include: + pos += 1 + else: + break + else: + while (string[pos] in test_set) == include: + pos += 1 + + self.pos = pos + except IndexError: + # We've reached the end of the string. + self.pos = len(string) + except ValueError: + # The comment extended to the end of the string. + self.pos = len(string) + + def match(self, substring): + string = self.string + pos = self.pos + + if self.ignore_space: + try: + for c in substring: + while True: + if string[pos].isspace(): + # Skip over the whitespace. + pos += 1 + elif string[pos] == "#": + # Skip over the comment to the end of the line. + pos = string.index("\n", pos) + else: + break + + if string[pos] != c: + return False + + pos += 1 + + self.pos = pos + + return True + except IndexError: + # We've reached the end of the string. + return False + except ValueError: + # The comment extended to the end of the string. + return False + else: + if not string.startswith(substring, pos): + return False + + self.pos = pos + len(substring) + + return True + + def expect(self, substring): + if not self.match(substring): + raise error("missing %s at position %d" % (substring, self.pos)) + + def at_end(self): + string = self.string + pos = self.pos + + try: + if self.ignore_space: + while True: + if string[pos].isspace(): + pos += 1 + elif string[pos] == "#": + pos = string.index("\n", pos) + else: + break + + return pos >= len(string) + except IndexError: + # We've reached the end of the string. + return True + except ValueError: + # The comment extended to the end of the string. + return True + +class Info(object): + "Info about the regular expression." + + def __init__(self, flags=0, char_type=None, kwargs={}): + flags |= DEFAULT_FLAGS[(flags & _ALL_VERSIONS) or DEFAULT_VERSION] + self.flags = flags + self.global_flags = flags + + self.kwargs = kwargs + + self.group_count = 0 + self.group_index = {} + self.group_name = {} + self.char_type = char_type + self.named_lists_used = {} + self.open_groups = [] + self.open_group_count = {} + self.defined_groups = {} + self.group_calls = [] + self.private_groups = {} + + def open_group(self, name=None): + group = self.group_index.get(name) + if group is None: + while True: + self.group_count += 1 + if name is None or self.group_count not in self.group_name: + break + + group = self.group_count + if name: + self.group_index[name] = group + self.group_name[group] = name + + if group in self.open_groups: + # We have a nested named group. We'll assign it a private group + # number, initially negative until we can assign a proper + # (positive) number. + group_alias = -(len(self.private_groups) + 1) + self.private_groups[group_alias] = group + group = group_alias + + self.open_groups.append(group) + self.open_group_count[group] = self.open_group_count.get(group, 0) + 1 + + return group + + def close_group(self): + self.open_groups.pop() + + def is_open_group(self, name): + # In version 1, a group reference can refer to an open group. We'll + # just pretend the group isn't open. + version = (self.flags & _ALL_VERSIONS) or DEFAULT_VERSION + if version == VERSION1: + return False + + if name.isdigit(): + group = int(name) + else: + group = self.group_index.get(name) + + return group in self.open_groups + +def _check_group_features(info, parsed): + """Checks whether the reverse and fuzzy features of the group calls match + the groups which they call.""" + + call_refs = {} + additional_groups = [] + for call, reverse, fuzzy in info.group_calls: + # Look up the reference of this group call. + key = (call.group, reverse, fuzzy) + ref = call_refs.get(key) + if ref is None: + # This group doesn't have a reference yet, so look up its features. + if call.group == 0: + # Calling the pattern as a whole. + rev = bool(info.flags & REVERSE) + fuz = isinstance(parsed, Fuzzy) + if (rev, fuz) != (reverse, fuzzy): + # The pattern as a whole doesn't have the features we want, + # so we'll need to make a copy of it with the desired + # features. + additional_groups.append((parsed, reverse, fuzzy)) + else: + # Calling a capture group. + def_info = info.defined_groups[call.group] + group = def_info[0] + if def_info[1 : ] != (reverse, fuzzy): + # The group doesn't have the features we want, so we'll + # need to make a copy of it with the desired features. + additional_groups.append((group, reverse, fuzzy)) + + ref = len(call_refs) + call_refs[key] = ref + + call.call_ref = ref + + info.call_refs = call_refs + info.additional_groups = additional_groups + +def _get_required_string(parsed, flags): + "Gets the required string and related info of a parsed pattern." + + req_offset, required = parsed.get_required_string(bool(flags & REVERSE)) + if required: + required.required = True + if req_offset >= UNLIMITED: + req_offset = -1 + + req_flags = required.case_flags + if not (flags & UNICODE): + req_flags &= ~UNICODE + + req_chars = required.folded_characters + else: + req_offset = 0 + req_chars = () + req_flags = 0 + + return req_offset, req_chars, req_flags + +class Scanner: + def __init__(self, lexicon, flags=0): + self.lexicon = lexicon + + # Combine phrases into a compound pattern. + patterns = [] + for phrase, action in lexicon: + # Parse the regular expression. + source = Source(phrase) + info = Info(flags, source.char_type) + source.ignore_space = bool(info.flags & VERBOSE) + parsed = _parse_pattern(source, info) + if not source.at_end(): + raise error("trailing characters at position %d" % source.pos) + + # We want to forbid capture groups within each phrase. + patterns.append(parsed.remove_captures()) + + # Combine all the subpatterns into one pattern. + info = Info(flags) + patterns = [Group(info, g + 1, p) for g, p in enumerate(patterns)] + parsed = Branch(patterns) + + # Optimise the compound pattern. + parsed = parsed.optimise(info) + parsed = parsed.pack_characters(info) + + # Get the required string. + req_offset, req_chars, req_flags = _get_required_string(parsed, + info.flags) + + # Check the features of the groups. + _check_group_features(info, parsed) + + # Complain if there are any group calls. They are not supported by the + # Scanner class. + if info.call_refs: + raise error("recursive regex not supported by Scanner") + + reverse = bool(info.flags & REVERSE) + + # Compile the compound pattern. The result is a list of tuples. + code = parsed.compile(reverse) + [(OP.SUCCESS, )] + + # Flatten the code into a list of ints. + code = _flatten_code(code) + + if not parsed.has_simple_start(): + # Get the first set, if possible. + try: + fs_code = _compile_firstset(info, parsed.get_firstset(reverse)) + fs_code = _flatten_code(fs_code) + code = fs_code + code + except _FirstSetError: + pass + + # Check the global flags for conflicts. + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + if version not in (0, VERSION0, VERSION1): + raise ValueError("VERSION0 and VERSION1 flags are mutually incompatible") + + # Create the PatternObject. + # + # Local flags like IGNORECASE affect the code generation, but aren't + # needed by the PatternObject itself. Conversely, global flags like + # LOCALE _don't_ affect the code generation but _are_ needed by the + # PatternObject. + self.scanner = _regex.compile(None, (flags & GLOBAL_FLAGS) | version, + code, {}, {}, {}, [], req_offset, req_chars, req_flags, + len(patterns)) + + def scan(self, string): + result = [] + append = result.append + match = self.scanner.scanner(string).match + i = 0 + while True: + m = match() + if not m: + break + j = m.end() + if i == j: + break + action = self.lexicon[m.lastindex - 1][1] + if hasattr(action, '__call__'): + self.match = m + action = action(self, m.group()) + if action is not None: + append(action) + i = j + + return result, string[i : ] + +# Get the known properties dict. +PROPERTIES = _regex.get_properties() + +# Build the inverse of the properties dict. +PROPERTY_NAMES = {} +for prop_name, (prop_id, values) in PROPERTIES.items(): + name, prop_values = PROPERTY_NAMES.get(prop_id, ("", {})) + name = max(name, prop_name, key=len) + PROPERTY_NAMES[prop_id] = name, prop_values + + for val_name, val_id in values.items(): + prop_values[val_id] = max(prop_values.get(val_id, ""), val_name, + key=len) + +# Character escape sequences. +CHARACTER_ESCAPES = { + "a": "\a", + "b": "\b", + "f": "\f", + "n": "\n", + "r": "\r", + "t": "\t", + "v": "\v", +} + +# Predefined character set escape sequences. +CHARSET_ESCAPES = { + "d": lookup_property(None, "DIGIT", True), + "D": lookup_property(None, "DIGIT", False), + "s": lookup_property(None, "SPACE", True), + "S": lookup_property(None, "SPACE", False), + "w": lookup_property(None, "WORD", True), + "W": lookup_property(None, "WORD", False), +} + +# Positional escape sequences. +POSITION_ESCAPES = { + "A": StartOfString(), + "b": Boundary(), + "B": Boundary(False), + "m": StartOfWord(), + "M": EndOfWord(), + "Z": EndOfString(), +} + +# Positional escape sequences when WORD flag set. +WORD_POSITION_ESCAPES = dict(POSITION_ESCAPES) +WORD_POSITION_ESCAPES.update({ + "b": DefaultBoundary(), + "B": DefaultBoundary(False), + "m": DefaultStartOfWord(), + "M": DefaultEndOfWord(), +}) diff --git a/src/regex/_regex_unicode.c b/src/regex/_regex_unicode.c new file mode 100644 index 000000000000..6b64e0062a01 --- /dev/null +++ b/src/regex/_regex_unicode.c @@ -0,0 +1,11997 @@ +#include "_regex_unicode.h" + +#define RE_BLANK_MASK ((1 << RE_PROP_ZL) | (1 << RE_PROP_ZP)) +#define RE_GRAPH_MASK ((1 << RE_PROP_CC) | (1 << RE_PROP_CS) | (1 << RE_PROP_CN)) +#define RE_WORD_MASK (RE_PROP_M_MASK | (1 << RE_PROP_ND) | (1 << RE_PROP_PC)) + +typedef struct RE_AllCases { + RE_INT32 diffs[RE_MAX_CASES - 1]; +} RE_AllCases; + +typedef struct RE_FullCaseFolding { + RE_INT32 diff; + RE_UINT16 codepoints[RE_MAX_FOLDED - 1]; +} RE_FullCaseFolding; + +/* strings. */ + +char* re_strings[] = { + "-1/2", + "0", + "1", + "1/10", + "1/16", + "1/2", + "1/3", + "1/4", + "1/5", + "1/6", + "1/7", + "1/8", + "1/9", + "10", + "100", + "1000", + "10000", + "100000", + "100000000", + "1000000000000", + "103", + "107", + "11", + "11/2", + "118", + "12", + "122", + "129", + "13", + "13/2", + "130", + "132", + "14", + "15", + "15/2", + "16", + "17", + "17/2", + "18", + "19", + "2", + "2/3", + "2/5", + "20", + "200", + "2000", + "20000", + "202", + "21", + "214", + "216", + "216000", + "218", + "22", + "220", + "222", + "224", + "226", + "228", + "23", + "230", + "232", + "233", + "234", + "24", + "240", + "25", + "26", + "27", + "28", + "29", + "3", + "3/16", + "3/2", + "3/4", + "3/5", + "3/8", + "30", + "300", + "3000", + "30000", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "4", + "4/5", + "40", + "400", + "4000", + "40000", + "41", + "42", + "43", + "432000", + "44", + "45", + "46", + "47", + "48", + "49", + "5", + "5/2", + "5/6", + "5/8", + "50", + "500", + "5000", + "50000", + "6", + "60", + "600", + "6000", + "60000", + "7", + "7/2", + "7/8", + "70", + "700", + "7000", + "70000", + "8", + "80", + "800", + "8000", + "80000", + "84", + "9", + "9/2", + "90", + "900", + "9000", + "90000", + "91", + "A", + "ABOVE", + "ABOVELEFT", + "ABOVERIGHT", + "AEGEANNUMBERS", + "AHEX", + "AI", + "AIN", + "AL", + "ALAPH", + "ALCHEMICAL", + "ALCHEMICALSYMBOLS", + "ALEF", + "ALETTER", + "ALNUM", + "ALPHA", + "ALPHABETIC", + "ALPHABETICPF", + "ALPHABETICPRESENTATIONFORMS", + "ALPHANUMERIC", + "AMBIGUOUS", + "AN", + "ANCIENTGREEKMUSIC", + "ANCIENTGREEKMUSICALNOTATION", + "ANCIENTGREEKNUMBERS", + "ANCIENTSYMBOLS", + "ANY", + "AR", + "ARAB", + "ARABIC", + "ARABICEXTA", + "ARABICEXTENDEDA", + "ARABICLETTER", + "ARABICMATH", + "ARABICMATHEMATICALALPHABETICSYMBOLS", + "ARABICNUMBER", + "ARABICPFA", + "ARABICPFB", + "ARABICPRESENTATIONFORMSA", + "ARABICPRESENTATIONFORMSB", + "ARABICSUP", + "ARABICSUPPLEMENT", + "ARMENIAN", + "ARMI", + "ARMN", + "ARROWS", + "ASCII", + "ASCIIHEXDIGIT", + "ASSIGNED", + "AT", + "ATA", + "ATAR", + "ATB", + "ATERM", + "ATTACHEDABOVE", + "ATTACHEDABOVERIGHT", + "ATTACHEDBELOW", + "ATTACHEDBELOWLEFT", + "AVAGRAHA", + "AVESTAN", + "AVST", + "B", + "B2", + "BA", + "BALI", + "BALINESE", + "BAMU", + "BAMUM", + "BAMUMSUP", + "BAMUMSUPPLEMENT", + "BASICLATIN", + "BATAK", + "BATK", + "BB", + "BC", + "BEH", + "BELOW", + "BELOWLEFT", + "BELOWRIGHT", + "BENG", + "BENGALI", + "BETH", + "BIDIC", + "BIDICLASS", + "BIDICONTROL", + "BIDIM", + "BIDIMIRRORED", + "BINDU", + "BK", + "BL", + "BLANK", + "BLK", + "BLOCK", + "BLOCKELEMENTS", + "BN", + "BOPO", + "BOPOMOFO", + "BOPOMOFOEXT", + "BOPOMOFOEXTENDED", + "BOTTOM", + "BOTTOMANDRIGHT", + "BOUNDARYNEUTRAL", + "BOXDRAWING", + "BR", + "BRAH", + "BRAHMI", + "BRAI", + "BRAILLE", + "BRAILLEPATTERNS", + "BREAKAFTER", + "BREAKBEFORE", + "BREAKBOTH", + "BREAKSYMBOLS", + "BUGI", + "BUGINESE", + "BUHD", + "BUHID", + "BURUSHASKIYEHBARREE", + "BYZANTINEMUSIC", + "BYZANTINEMUSICALSYMBOLS", + "C", + "C&", + "CAKM", + "CAN", + "CANADIANABORIGINAL", + "CANADIANSYLLABICS", + "CANONICAL", + "CANONICALCOMBININGCLASS", + "CANS", + "CARI", + "CARIAN", + "CARRIAGERETURN", + "CASED", + "CASEDLETTER", + "CASEIGNORABLE", + "CB", + "CC", + "CCC", + "CCC10", + "CCC103", + "CCC107", + "CCC11", + "CCC118", + "CCC12", + "CCC122", + "CCC129", + "CCC13", + "CCC130", + "CCC132", + "CCC133", + "CCC14", + "CCC15", + "CCC16", + "CCC17", + "CCC18", + "CCC19", + "CCC20", + "CCC21", + "CCC22", + "CCC23", + "CCC24", + "CCC25", + "CCC26", + "CCC27", + "CCC28", + "CCC29", + "CCC30", + "CCC31", + "CCC32", + "CCC33", + "CCC34", + "CCC35", + "CCC36", + "CCC84", + "CCC91", + "CF", + "CHAKMA", + "CHAM", + "CHANGESWHENCASEFOLDED", + "CHANGESWHENCASEMAPPED", + "CHANGESWHENLOWERCASED", + "CHANGESWHENTITLECASED", + "CHANGESWHENUPPERCASED", + "CHER", + "CHEROKEE", + "CI", + "CIRCLE", + "CJ", + "CJK", + "CJKCOMPAT", + "CJKCOMPATFORMS", + "CJKCOMPATIBILITY", + "CJKCOMPATIBILITYFORMS", + "CJKCOMPATIBILITYIDEOGRAPHS", + "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT", + "CJKCOMPATIDEOGRAPHS", + "CJKCOMPATIDEOGRAPHSSUP", + "CJKEXTA", + "CJKEXTB", + "CJKEXTC", + "CJKEXTD", + "CJKRADICALSSUP", + "CJKRADICALSSUPPLEMENT", + "CJKSTROKES", + "CJKSYMBOLS", + "CJKSYMBOLSANDPUNCTUATION", + "CJKUNIFIEDIDEOGRAPHS", + "CJKUNIFIEDIDEOGRAPHSEXTENSIONA", + "CJKUNIFIEDIDEOGRAPHSEXTENSIONB", + "CJKUNIFIEDIDEOGRAPHSEXTENSIONC", + "CJKUNIFIEDIDEOGRAPHSEXTENSIOND", + "CL", + "CLOSE", + "CLOSEPARENTHESIS", + "CLOSEPUNCTUATION", + "CM", + "CN", + "CNTRL", + "CO", + "COM", + "COMBININGDIACRITICALMARKS", + "COMBININGDIACRITICALMARKSFORSYMBOLS", + "COMBININGDIACRITICALMARKSSUPPLEMENT", + "COMBININGHALFMARKS", + "COMBININGMARK", + "COMBININGMARKSFORSYMBOLS", + "COMMON", + "COMMONINDICNUMBERFORMS", + "COMMONSEPARATOR", + "COMPAT", + "COMPATJAMO", + "COMPLEXCONTEXT", + "CONDITIONALJAPANESESTARTER", + "CONNECTORPUNCTUATION", + "CONSONANT", + "CONSONANTDEAD", + "CONSONANTFINAL", + "CONSONANTHEADLETTER", + "CONSONANTMEDIAL", + "CONSONANTPLACEHOLDER", + "CONSONANTREPHA", + "CONSONANTSUBJOINED", + "CONTINGENTBREAK", + "CONTROL", + "CONTROLPICTURES", + "COPT", + "COPTIC", + "COUNTINGROD", + "COUNTINGRODNUMERALS", + "CP", + "CPRT", + "CR", + "CS", + "CUNEIFORM", + "CUNEIFORMNUMBERS", + "CUNEIFORMNUMBERSANDPUNCTUATION", + "CURRENCYSYMBOL", + "CURRENCYSYMBOLS", + "CWCF", + "CWCM", + "CWL", + "CWT", + "CWU", + "CYPRIOT", + "CYPRIOTSYLLABARY", + "CYRILLIC", + "CYRILLICEXTA", + "CYRILLICEXTB", + "CYRILLICEXTENDEDA", + "CYRILLICEXTENDEDB", + "CYRILLICSUP", + "CYRILLICSUPPLEMENT", + "CYRILLICSUPPLEMENTARY", + "CYRL", + "D", + "DA", + "DAL", + "DALATHRISH", + "DASH", + "DASHPUNCTUATION", + "DB", + "DE", + "DECIMAL", + "DECIMALNUMBER", + "DECOMPOSITIONTYPE", + "DEFAULTIGNORABLECODEPOINT", + "DEP", + "DEPRECATED", + "DESERET", + "DEVA", + "DEVANAGARI", + "DEVANAGARIEXT", + "DEVANAGARIEXTENDED", + "DI", + "DIA", + "DIACRITIC", + "DIACRITICALS", + "DIACRITICALSFORSYMBOLS", + "DIACRITICALSSUP", + "DIGIT", + "DINGBATS", + "DOMINO", + "DOMINOTILES", + "DOUBLEABOVE", + "DOUBLEBELOW", + "DOUBLEQUOTE", + "DQ", + "DSRT", + "DT", + "DUALJOINING", + "E", + "EA", + "EASTASIANWIDTH", + "EGYP", + "EGYPTIANHIEROGLYPHS", + "EMOTICONS", + "EN", + "ENC", + "ENCLOSEDALPHANUM", + "ENCLOSEDALPHANUMERICS", + "ENCLOSEDALPHANUMERICSUPPLEMENT", + "ENCLOSEDALPHANUMSUP", + "ENCLOSEDCJK", + "ENCLOSEDCJKLETTERSANDMONTHS", + "ENCLOSEDIDEOGRAPHICSUP", + "ENCLOSEDIDEOGRAPHICSUPPLEMENT", + "ENCLOSINGMARK", + "ES", + "ET", + "ETHI", + "ETHIOPIC", + "ETHIOPICEXT", + "ETHIOPICEXTA", + "ETHIOPICEXTENDED", + "ETHIOPICEXTENDEDA", + "ETHIOPICSUP", + "ETHIOPICSUPPLEMENT", + "EUROPEANNUMBER", + "EUROPEANSEPARATOR", + "EUROPEANTERMINATOR", + "EX", + "EXCLAMATION", + "EXT", + "EXTEND", + "EXTENDER", + "EXTENDNUMLET", + "F", + "FALSE", + "FARSIYEH", + "FE", + "FEH", + "FIN", + "FINAL", + "FINALPUNCTUATION", + "FINALSEMKATH", + "FIRSTSTRONGISOLATE", + "FO", + "FONT", + "FORMAT", + "FRA", + "FRACTION", + "FSI", + "FULLWIDTH", + "GAF", + "GAMAL", + "GC", + "GCB", + "GENERALCATEGORY", + "GENERALPUNCTUATION", + "GEOMETRICSHAPES", + "GEOR", + "GEORGIAN", + "GEORGIANSUP", + "GEORGIANSUPPLEMENT", + "GL", + "GLAG", + "GLAGOLITIC", + "GLUE", + "GOTH", + "GOTHIC", + "GRAPH", + "GRAPHEMEBASE", + "GRAPHEMECLUSTERBREAK", + "GRAPHEMEEXTEND", + "GRAPHEMELINK", + "GRBASE", + "GREEK", + "GREEKANDCOPTIC", + "GREEKEXT", + "GREEKEXTENDED", + "GREK", + "GREXT", + "GRLINK", + "GUJARATI", + "GUJR", + "GURMUKHI", + "GURU", + "H", + "H2", + "H3", + "HAH", + "HALFANDFULLFORMS", + "HALFMARKS", + "HALFWIDTH", + "HALFWIDTHANDFULLWIDTHFORMS", + "HAMZAONHEHGOAL", + "HAN", + "HANG", + "HANGUL", + "HANGULCOMPATIBILITYJAMO", + "HANGULJAMO", + "HANGULJAMOEXTENDEDA", + "HANGULJAMOEXTENDEDB", + "HANGULSYLLABLES", + "HANGULSYLLABLETYPE", + "HANI", + "HANO", + "HANUNOO", + "HE", + "HEBR", + "HEBREW", + "HEBREWLETTER", + "HEH", + "HEHGOAL", + "HETH", + "HEX", + "HEXDIGIT", + "HIGHPRIVATEUSESURROGATES", + "HIGHPUSURROGATES", + "HIGHSURROGATES", + "HIRA", + "HIRAGANA", + "HL", + "HST", + "HY", + "HYPHEN", + "ID", + "IDC", + "IDCONTINUE", + "IDEO", + "IDEOGRAPHIC", + "IDEOGRAPHICDESCRIPTIONCHARACTERS", + "IDS", + "IDSB", + "IDSBINARYOPERATOR", + "IDST", + "IDSTART", + "IDSTRINARYOPERATOR", + "IMPERIALARAMAIC", + "IN", + "INDICMATRACATEGORY", + "INDICNUMBERFORMS", + "INDICSYLLABICCATEGORY", + "INFIXNUMERIC", + "INHERITED", + "INIT", + "INITIAL", + "INITIALPUNCTUATION", + "INMC", + "INSC", + "INSCRIPTIONALPAHLAVI", + "INSCRIPTIONALPARTHIAN", + "INSEPARABLE", + "INSEPERABLE", + "INVISIBLE", + "IOTASUBSCRIPT", + "IPAEXT", + "IPAEXTENSIONS", + "IS", + "ISO", + "ISOLATED", + "ITAL", + "JAMO", + "JAMOEXTA", + "JAMOEXTB", + "JAVA", + "JAVANESE", + "JG", + "JL", + "JOINC", + "JOINCAUSING", + "JOINCONTROL", + "JOININGGROUP", + "JOININGTYPE", + "JT", + "JV", + "KA", + "KAF", + "KAITHI", + "KALI", + "KANA", + "KANASUP", + "KANASUPPLEMENT", + "KANAVOICING", + "KANBUN", + "KANGXI", + "KANGXIRADICALS", + "KANNADA", + "KAPH", + "KATAKANA", + "KATAKANAEXT", + "KATAKANAORHIRAGANA", + "KATAKANAPHONETICEXTENSIONS", + "KAYAHLI", + "KHAPH", + "KHAR", + "KHAROSHTHI", + "KHMER", + "KHMERSYMBOLS", + "KHMR", + "KNDA", + "KNOTTEDHEH", + "KTHI", + "KV", + "L", + "L&", + "LAM", + "LAMADH", + "LANA", + "LAO", + "LAOO", + "LATIN", + "LATIN1", + "LATIN1SUP", + "LATIN1SUPPLEMENT", + "LATINEXTA", + "LATINEXTADDITIONAL", + "LATINEXTB", + "LATINEXTC", + "LATINEXTD", + "LATINEXTENDEDA", + "LATINEXTENDEDADDITIONAL", + "LATINEXTENDEDB", + "LATINEXTENDEDC", + "LATINEXTENDEDD", + "LATN", + "LB", + "LE", + "LEADINGJAMO", + "LEFT", + "LEFTANDRIGHT", + "LEFTJOINING", + "LEFTTORIGHT", + "LEFTTORIGHTEMBEDDING", + "LEFTTORIGHTISOLATE", + "LEFTTORIGHTOVERRIDE", + "LEPC", + "LEPCHA", + "LETTER", + "LETTERLIKESYMBOLS", + "LETTERNUMBER", + "LF", + "LIMB", + "LIMBU", + "LINB", + "LINEARB", + "LINEARBIDEOGRAMS", + "LINEARBSYLLABARY", + "LINEBREAK", + "LINEFEED", + "LINESEPARATOR", + "LISU", + "LL", + "LM", + "LO", + "LOE", + "LOGICALORDEREXCEPTION", + "LOWER", + "LOWERCASE", + "LOWERCASELETTER", + "LOWSURROGATES", + "LRE", + "LRI", + "LRO", + "LT", + "LU", + "LV", + "LVSYLLABLE", + "LVT", + "LVTSYLLABLE", + "LYCI", + "LYCIAN", + "LYDI", + "LYDIAN", + "M", + "M&", + "MAHJONG", + "MAHJONGTILES", + "MALAYALAM", + "MAND", + "MANDAIC", + "MANDATORYBREAK", + "MARK", + "MATH", + "MATHALPHANUM", + "MATHEMATICALALPHANUMERICSYMBOLS", + "MATHEMATICALOPERATORS", + "MATHOPERATORS", + "MATHSYMBOL", + "MB", + "MC", + "ME", + "MED", + "MEDIAL", + "MEEM", + "MEETEIMAYEK", + "MEETEIMAYEKEXT", + "MEETEIMAYEKEXTENSIONS", + "MERC", + "MERO", + "MEROITICCURSIVE", + "MEROITICHIEROGLYPHS", + "MIAO", + "MIDLETTER", + "MIDNUM", + "MIDNUMLET", + "MIM", + "MISCARROWS", + "MISCELLANEOUSMATHEMATICALSYMBOLSA", + "MISCELLANEOUSMATHEMATICALSYMBOLSB", + "MISCELLANEOUSSYMBOLS", + "MISCELLANEOUSSYMBOLSANDARROWS", + "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS", + "MISCELLANEOUSTECHNICAL", + "MISCMATHSYMBOLSA", + "MISCMATHSYMBOLSB", + "MISCPICTOGRAPHS", + "MISCSYMBOLS", + "MISCTECHNICAL", + "ML", + "MLYM", + "MN", + "MODIFIERLETTER", + "MODIFIERLETTERS", + "MODIFIERSYMBOL", + "MODIFIERTONELETTERS", + "MODIFYINGLETTER", + "MONG", + "MONGOLIAN", + "MTEI", + "MUSIC", + "MUSICALSYMBOLS", + "MYANMAR", + "MYANMAREXTA", + "MYANMAREXTENDEDA", + "MYMR", + "N", + "N&", + "NA", + "NAN", + "NAR", + "NARROW", + "NB", + "NCHAR", + "ND", + "NEUTRAL", + "NEWLINE", + "NEWTAILUE", + "NEXTLINE", + "NK", + "NKO", + "NKOO", + "NL", + "NO", + "NOBLOCK", + "NOBREAK", + "NOJOININGGROUP", + "NONCHARACTERCODEPOINT", + "NONE", + "NONJOINING", + "NONSPACINGMARK", + "NONSTARTER", + "NOON", + "NOTAPPLICABLE", + "NOTREORDERED", + "NR", + "NS", + "NSM", + "NT", + "NU", + "NUKTA", + "NUMBER", + "NUMBERFORMS", + "NUMERIC", + "NUMERICTYPE", + "NUMERICVALUE", + "NUN", + "NV", + "NYA", + "OALPHA", + "OCR", + "ODI", + "OGAM", + "OGHAM", + "OGREXT", + "OIDC", + "OIDS", + "OLCHIKI", + "OLCK", + "OLDITALIC", + "OLDPERSIAN", + "OLDSOUTHARABIAN", + "OLDTURKIC", + "OLETTER", + "OLOWER", + "OMATH", + "ON", + "OP", + "OPENPUNCTUATION", + "OPTICALCHARACTERRECOGNITION", + "ORIYA", + "ORKH", + "ORYA", + "OSMA", + "OSMANYA", + "OTHER", + "OTHERALPHABETIC", + "OTHERDEFAULTIGNORABLECODEPOINT", + "OTHERGRAPHEMEEXTEND", + "OTHERIDCONTINUE", + "OTHERIDSTART", + "OTHERLETTER", + "OTHERLOWERCASE", + "OTHERMATH", + "OTHERNEUTRAL", + "OTHERNUMBER", + "OTHERPUNCTUATION", + "OTHERSYMBOL", + "OTHERUPPERCASE", + "OUPPER", + "OV", + "OVERLAY", + "OVERSTRUCK", + "P", + "P&", + "PARAGRAPHSEPARATOR", + "PATSYN", + "PATTERNSYNTAX", + "PATTERNWHITESPACE", + "PATWS", + "PC", + "PD", + "PDF", + "PDI", + "PE", + "PF", + "PHAG", + "PHAGSPA", + "PHAISTOS", + "PHAISTOSDISC", + "PHLI", + "PHNX", + "PHOENICIAN", + "PHONETICEXT", + "PHONETICEXTENSIONS", + "PHONETICEXTENSIONSSUPPLEMENT", + "PHONETICEXTSUP", + "PI", + "PLAYINGCARDS", + "PLRD", + "PO", + "POPDIRECTIONALFORMAT", + "POPDIRECTIONALISOLATE", + "POSTFIXNUMERIC", + "PR", + "PREFIXNUMERIC", + "PREPEND", + "PRINT", + "PRIVATEUSE", + "PRIVATEUSEAREA", + "PRTI", + "PS", + "PUA", + "PUNCT", + "PUNCTUATION", + "QAAC", + "QAAI", + "QAF", + "QAPH", + "QMARK", + "QU", + "QUOTATION", + "QUOTATIONMARK", + "R", + "RADICAL", + "REGIONALINDICATOR", + "REGISTERSHIFTER", + "REH", + "REJANG", + "REVERSEDPE", + "RI", + "RIGHT", + "RIGHTJOINING", + "RIGHTTOLEFT", + "RIGHTTOLEFTEMBEDDING", + "RIGHTTOLEFTISOLATE", + "RIGHTTOLEFTOVERRIDE", + "RJNG", + "RLE", + "RLI", + "RLO", + "ROHINGYAYEH", + "RUMI", + "RUMINUMERALSYMBOLS", + "RUNIC", + "RUNR", + "S", + "S&", + "SA", + "SAD", + "SADHE", + "SAMARITAN", + "SAMR", + "SARB", + "SAUR", + "SAURASHTRA", + "SB", + "SC", + "SCONTINUE", + "SCRIPT", + "SD", + "SE", + "SEEN", + "SEGMENTSEPARATOR", + "SEMKATH", + "SENTENCEBREAK", + "SEP", + "SEPARATOR", + "SG", + "SHARADA", + "SHAVIAN", + "SHAW", + "SHIN", + "SHRD", + "SINGLEQUOTE", + "SINH", + "SINHALA", + "SK", + "SM", + "SMALL", + "SMALLFORMS", + "SMALLFORMVARIANTS", + "SML", + "SO", + "SOFTDOTTED", + "SORA", + "SORASOMPENG", + "SP", + "SPACE", + "SPACESEPARATOR", + "SPACINGMARK", + "SPACINGMODIFIERLETTERS", + "SPECIALS", + "SQ", + "SQR", + "SQUARE", + "ST", + "STERM", + "SUB", + "SUND", + "SUNDANESE", + "SUNDANESESUP", + "SUNDANESESUPPLEMENT", + "SUP", + "SUPARROWSA", + "SUPARROWSB", + "SUPER", + "SUPERANDSUB", + "SUPERSCRIPTSANDSUBSCRIPTS", + "SUPMATHOPERATORS", + "SUPPLEMENTALARROWSA", + "SUPPLEMENTALARROWSB", + "SUPPLEMENTALMATHEMATICALOPERATORS", + "SUPPLEMENTALPUNCTUATION", + "SUPPLEMENTARYPRIVATEUSEAREAA", + "SUPPLEMENTARYPRIVATEUSEAREAB", + "SUPPUAA", + "SUPPUAB", + "SUPPUNCTUATION", + "SURROGATE", + "SWASHKAF", + "SY", + "SYLO", + "SYLOTINAGRI", + "SYMBOL", + "SYRC", + "SYRIAC", + "SYRIACWAW", + "T", + "TAGALOG", + "TAGB", + "TAGBANWA", + "TAGS", + "TAH", + "TAILE", + "TAITHAM", + "TAIVIET", + "TAIXUANJING", + "TAIXUANJINGSYMBOLS", + "TAKR", + "TAKRI", + "TALE", + "TALU", + "TAMIL", + "TAML", + "TAVT", + "TAW", + "TEHMARBUTA", + "TEHMARBUTAGOAL", + "TELU", + "TELUGU", + "TERM", + "TERMINALPUNCTUATION", + "TETH", + "TFNG", + "TGLG", + "THAA", + "THAANA", + "THAI", + "TIBETAN", + "TIBT", + "TIFINAGH", + "TITLECASELETTER", + "TONELETTER", + "TONEMARK", + "TOP", + "TOPANDBOTTOM", + "TOPANDBOTTOMANDRIGHT", + "TOPANDLEFT", + "TOPANDLEFTANDRIGHT", + "TOPANDRIGHT", + "TRAILINGJAMO", + "TRANSPARENT", + "TRANSPORTANDMAP", + "TRANSPORTANDMAPSYMBOLS", + "TRUE", + "U", + "UCAS", + "UCASEXT", + "UGAR", + "UGARITIC", + "UIDEO", + "UNASSIGNED", + "UNIFIEDCANADIANABORIGINALSYLLABICS", + "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED", + "UNIFIEDIDEOGRAPH", + "UNKNOWN", + "UP", + "UPPER", + "UPPERCASE", + "UPPERCASELETTER", + "V", + "VAI", + "VAII", + "VARIATIONSELECTOR", + "VARIATIONSELECTORS", + "VARIATIONSELECTORSSUPPLEMENT", + "VEDICEXT", + "VEDICEXTENSIONS", + "VERT", + "VERTICAL", + "VERTICALFORMS", + "VIRAMA", + "VISARGA", + "VISUALORDERLEFT", + "VOWEL", + "VOWELDEPENDENT", + "VOWELINDEPENDENT", + "VOWELJAMO", + "VR", + "VS", + "VSSUP", + "W", + "WAW", + "WB", + "WHITESPACE", + "WIDE", + "WJ", + "WORD", + "WORDBREAK", + "WORDJOINER", + "WS", + "WSPACE", + "XDIGIT", + "XIDC", + "XIDCONTINUE", + "XIDS", + "XIDSTART", + "XPEO", + "XSUX", + "XX", + "Y", + "YEH", + "YEHBARREE", + "YEHWITHTAIL", + "YES", + "YI", + "YIII", + "YIJING", + "YIJINGHEXAGRAMSYMBOLS", + "YIRADICALS", + "YISYLLABLES", + "YUDH", + "YUDHHE", + "Z", + "Z&", + "ZAIN", + "ZHAIN", + "ZINH", + "ZL", + "ZP", + "ZS", + "ZW", + "ZWSPACE", + "ZYYY", + "ZZZZ", +}; + +/* strings: 10575 bytes. */ + +/* properties. */ + +RE_Property re_properties[] = { + { 506, 0, 0}, + { 504, 0, 0}, + { 231, 1, 1}, + { 230, 1, 1}, + { 958, 2, 2}, + { 956, 2, 2}, + {1118, 3, 3}, + {1113, 3, 3}, + { 521, 4, 4}, + { 505, 4, 4}, + { 964, 5, 5}, + { 955, 5, 5}, + { 732, 6, 6}, + { 155, 7, 6}, + { 154, 7, 6}, + { 707, 8, 6}, + { 706, 8, 6}, + {1088, 9, 6}, + {1087, 9, 6}, + { 271, 10, 6}, + { 273, 11, 6}, + { 324, 11, 6}, + { 319, 12, 6}, + { 399, 12, 6}, + { 321, 13, 6}, + { 401, 13, 6}, + { 320, 14, 6}, + { 400, 14, 6}, + { 317, 15, 6}, + { 397, 15, 6}, + { 318, 16, 6}, + { 398, 16, 6}, + { 585, 17, 6}, + { 581, 17, 6}, + { 577, 18, 6}, + { 576, 18, 6}, + {1126, 19, 6}, + {1125, 19, 6}, + {1124, 20, 6}, + {1123, 20, 6}, + { 424, 21, 6}, + { 432, 21, 6}, + { 522, 22, 6}, + { 530, 22, 6}, + { 520, 23, 6}, + { 524, 23, 6}, + { 523, 24, 6}, + { 531, 24, 6}, + {1114, 25, 6}, + { 987, 25, 6}, + {1121, 25, 6}, + { 223, 26, 6}, + { 221, 26, 6}, + { 620, 27, 6}, + { 618, 27, 6}, + { 417, 28, 6}, + { 574, 29, 6}, + { 921, 30, 6}, + { 918, 30, 6}, + {1051, 31, 6}, + {1050, 31, 6}, + { 862, 32, 6}, + { 844, 32, 6}, + { 565, 33, 6}, + { 564, 33, 6}, + { 186, 34, 6}, + { 144, 34, 6}, + { 855, 35, 6}, + { 828, 35, 6}, + { 579, 36, 6}, + { 578, 36, 6}, + { 434, 37, 6}, + { 433, 37, 6}, + { 483, 38, 6}, + { 481, 38, 6}, + { 861, 39, 6}, + { 843, 39, 6}, + { 867, 40, 6}, + { 868, 40, 6}, + { 806, 41, 6}, + { 792, 41, 6}, + { 857, 42, 6}, + { 833, 42, 6}, + { 583, 43, 6}, + { 582, 43, 6}, + { 586, 44, 6}, + { 584, 44, 6}, + { 923, 45, 6}, + {1084, 46, 6}, + {1080, 46, 6}, + { 856, 47, 6}, + { 830, 47, 6}, + { 426, 48, 6}, + { 425, 48, 6}, + { 983, 49, 6}, + { 959, 49, 6}, + { 705, 50, 6}, + { 704, 50, 6}, + { 859, 51, 6}, + { 835, 51, 6}, + { 858, 52, 6}, + { 834, 52, 6}, + { 996, 53, 6}, + {1093, 54, 6}, + {1109, 54, 6}, + { 877, 55, 6}, + { 878, 55, 6}, + { 876, 56, 6}, + { 875, 56, 6}, + { 553, 57, 7}, + { 572, 57, 7}, + { 222, 58, 8}, + { 213, 58, 8}, + { 266, 59, 9}, + { 276, 59, 9}, + { 423, 60, 10}, + { 447, 60, 10}, + { 451, 61, 11}, + { 450, 61, 11}, + { 621, 62, 12}, + { 616, 62, 12}, + { 622, 63, 13}, + { 623, 63, 13}, + { 697, 64, 14}, + { 675, 64, 14}, + { 823, 65, 15}, + { 817, 65, 15}, + { 824, 66, 16}, + { 826, 66, 16}, + { 225, 67, 6}, + { 224, 67, 6}, + { 589, 68, 17}, + { 597, 68, 17}, + { 591, 69, 18}, + { 598, 69, 18}, + { 158, 70, 6}, + { 153, 70, 6}, + { 165, 71, 6}, + { 185, 72, 6}, + { 187, 73, 6}, + { 229, 74, 6}, + { 519, 75, 6}, + { 906, 76, 6}, + {1117, 77, 6}, + {1122, 78, 6}, +}; + +/* properties: 580 bytes. */ + +/* property values. */ + +RE_PropertyValue re_property_values[] = { + {1081, 0, 0}, + { 355, 0, 0}, + {1089, 0, 1}, + { 714, 0, 1}, + { 708, 0, 2}, + { 701, 0, 2}, + {1061, 0, 3}, + { 713, 0, 3}, + { 771, 0, 4}, + { 702, 0, 4}, + { 860, 0, 5}, + { 703, 0, 5}, + { 809, 0, 6}, + { 770, 0, 6}, + { 465, 0, 7}, + { 740, 0, 7}, + { 989, 0, 8}, + { 739, 0, 8}, + { 422, 0, 9}, + { 438, 0, 9}, + { 793, 0, 9}, + { 689, 0, 10}, + { 801, 0, 10}, + { 864, 0, 11}, + { 802, 0, 11}, + { 988, 0, 12}, + {1150, 0, 12}, + { 699, 0, 13}, + {1148, 0, 13}, + { 874, 0, 14}, + {1149, 0, 14}, + { 382, 0, 15}, + { 356, 0, 15}, + { 275, 0, 15}, + { 497, 0, 16}, + { 314, 0, 16}, + { 907, 0, 17}, + { 357, 0, 17}, + {1018, 0, 18}, + { 391, 0, 18}, + { 418, 0, 19}, + { 880, 0, 19}, + { 847, 0, 20}, + { 910, 0, 20}, + { 353, 0, 21}, + { 883, 0, 21}, + { 372, 0, 22}, + { 879, 0, 22}, + { 865, 0, 23}, + { 899, 0, 23}, + { 737, 0, 24}, + { 977, 0, 24}, + { 395, 0, 25}, + { 956, 0, 25}, + { 773, 0, 26}, + { 976, 0, 26}, + { 866, 0, 27}, + { 982, 0, 27}, + { 596, 0, 28}, + { 896, 0, 28}, + { 492, 0, 29}, + { 884, 0, 29}, + { 854, 0, 30}, + { 260, 0, 30}, + { 259, 0, 30}, + { 687, 0, 31}, + { 653, 0, 31}, + { 654, 0, 31}, + { 731, 0, 32}, + { 723, 0, 32}, + { 724, 0, 32}, + { 363, 0, 32}, + { 820, 0, 33}, + { 785, 0, 33}, + { 786, 0, 33}, + { 913, 0, 34}, + { 912, 0, 34}, + { 873, 0, 34}, + { 872, 0, 34}, + {1023, 0, 35}, + { 946, 0, 35}, + { 945, 0, 35}, + { 966, 0, 36}, + {1144, 0, 36}, + {1143, 0, 36}, + { 272, 0, 37}, + { 803, 1, 0}, + { 791, 1, 0}, + { 209, 1, 1}, + { 185, 1, 1}, + { 663, 1, 2}, + { 661, 1, 2}, + { 662, 1, 2}, + { 669, 1, 3}, + { 664, 1, 3}, + { 671, 1, 4}, + { 666, 1, 4}, + { 606, 1, 5}, + { 605, 1, 5}, + { 990, 1, 6}, + { 772, 1, 6}, + { 359, 1, 7}, + { 435, 1, 7}, + { 526, 1, 8}, + { 525, 1, 8}, + { 404, 1, 9}, + { 410, 1, 10}, + { 409, 1, 10}, + { 411, 1, 10}, + { 181, 1, 11}, + { 559, 1, 12}, + { 168, 1, 13}, + {1025, 1, 14}, + { 180, 1, 15}, + { 179, 1, 15}, + {1056, 1, 16}, + { 799, 1, 17}, + { 950, 1, 18}, + { 729, 1, 19}, + { 170, 1, 20}, + { 169, 1, 20}, + { 429, 1, 21}, + { 219, 1, 22}, + { 534, 1, 23}, + { 532, 1, 24}, + { 849, 1, 25}, + {1042, 1, 26}, + {1049, 1, 27}, + { 636, 1, 28}, + { 727, 1, 29}, + { 975, 1, 30}, + {1057, 1, 31}, + { 658, 1, 32}, + {1058, 1, 33}, + { 781, 1, 34}, + { 510, 1, 35}, + { 549, 1, 36}, + { 611, 1, 36}, + { 469, 1, 37}, + { 475, 1, 38}, + { 474, 1, 38}, + { 323, 1, 39}, + {1082, 1, 40}, + { 264, 1, 40}, + {1076, 1, 40}, + { 832, 1, 41}, + { 943, 1, 42}, + {1028, 1, 43}, + { 556, 1, 44}, + { 255, 1, 45}, + {1030, 1, 46}, + { 646, 1, 47}, + { 777, 1, 48}, + {1083, 1, 49}, + {1077, 1, 49}, + { 692, 1, 50}, + {1033, 1, 51}, + { 796, 1, 52}, + { 647, 1, 53}, + { 253, 1, 54}, + {1034, 1, 55}, + { 204, 1, 56}, + { 999, 1, 57}, + { 210, 1, 58}, + { 686, 1, 59}, + { 836, 1, 60}, + {1001, 1, 61}, + {1000, 1, 61}, + {1097, 1, 62}, + {1096, 1, 62}, + { 893, 1, 63}, + { 892, 1, 63}, + { 894, 1, 64}, + { 895, 1, 64}, + { 361, 1, 65}, + { 437, 1, 65}, + { 670, 1, 66}, + { 665, 1, 66}, + { 528, 1, 67}, + { 527, 1, 67}, + { 507, 1, 68}, + { 913, 1, 68}, + {1007, 1, 69}, + {1006, 1, 69}, + { 396, 1, 70}, + { 360, 1, 71}, + { 436, 1, 71}, + { 364, 1, 71}, + { 688, 1, 72}, + { 821, 1, 73}, + { 184, 1, 74}, + { 735, 1, 75}, + { 736, 1, 75}, + { 762, 1, 76}, + { 767, 1, 76}, + { 383, 1, 77}, + { 848, 1, 78}, + { 829, 1, 78}, + { 458, 1, 79}, + { 457, 1, 79}, + { 241, 1, 80}, + { 232, 1, 81}, + { 508, 1, 82}, + { 759, 1, 83}, + { 766, 1, 83}, + { 439, 1, 84}, + { 757, 1, 85}, + { 763, 1, 85}, + {1009, 1, 86}, + {1003, 1, 86}, + { 247, 1, 87}, + { 246, 1, 87}, + {1010, 1, 88}, + {1004, 1, 88}, + { 758, 1, 89}, + { 764, 1, 89}, + {1011, 1, 90}, + {1008, 1, 90}, + { 760, 1, 91}, + { 756, 1, 91}, + { 515, 1, 92}, + { 672, 1, 93}, + { 667, 1, 93}, + { 385, 1, 94}, + { 512, 1, 95}, + { 511, 1, 95}, + {1060, 1, 96}, + { 472, 1, 97}, + { 470, 1, 97}, + { 407, 1, 98}, + { 405, 1, 98}, + {1012, 1, 99}, + {1017, 1, 99}, + { 341, 1, 100}, + { 340, 1, 100}, + { 635, 1, 101}, + { 634, 1, 101}, + { 580, 1, 102}, + { 576, 1, 102}, + { 344, 1, 103}, + { 343, 1, 103}, + { 570, 1, 104}, + { 638, 1, 105}, + { 235, 1, 106}, + { 548, 1, 107}, + { 369, 1, 107}, + { 633, 1, 108}, + { 237, 1, 109}, + { 236, 1, 109}, + { 342, 1, 110}, + { 641, 1, 111}, + { 639, 1, 111}, + { 462, 1, 112}, + { 461, 1, 112}, + { 330, 1, 113}, + { 328, 1, 113}, + { 346, 1, 114}, + { 336, 1, 114}, + {1138, 1, 115}, + {1137, 1, 115}, + { 345, 1, 116}, + { 327, 1, 116}, + {1140, 1, 117}, + {1139, 1, 118}, + { 700, 1, 119}, + {1091, 1, 120}, + { 408, 1, 121}, + { 406, 1, 121}, + { 206, 1, 122}, + { 774, 1, 123}, + { 673, 1, 124}, + { 668, 1, 124}, + {1022, 1, 125}, + { 366, 1, 126}, + { 590, 1, 126}, + { 886, 1, 127}, + { 954, 1, 128}, + { 431, 1, 129}, + { 430, 1, 129}, + { 642, 1, 130}, + { 927, 1, 131}, + { 550, 1, 132}, + { 612, 1, 132}, + { 615, 1, 133}, + { 316, 1, 134}, + { 783, 1, 135}, + { 782, 1, 135}, + {1035, 1, 136}, + { 746, 1, 137}, + { 745, 1, 137}, + { 473, 1, 138}, + { 471, 1, 138}, + { 744, 1, 139}, + { 552, 1, 140}, + { 547, 1, 140}, + { 551, 1, 141}, + { 613, 1, 141}, + { 568, 1, 142}, + { 566, 1, 143}, + { 567, 1, 143}, + { 709, 1, 144}, + { 908, 1, 145}, + { 911, 1, 145}, + { 907, 1, 145}, + { 332, 1, 146}, + { 334, 1, 146}, + { 157, 1, 147}, + { 156, 1, 147}, + { 177, 1, 148}, + { 175, 1, 148}, + {1094, 1, 149}, + {1109, 1, 149}, + {1100, 1, 150}, + { 362, 1, 151}, + { 541, 1, 151}, + { 331, 1, 152}, + { 329, 1, 152}, + { 980, 1, 153}, + { 979, 1, 153}, + { 178, 1, 154}, + { 176, 1, 154}, + { 543, 1, 155}, + { 540, 1, 155}, + { 991, 1, 156}, + { 696, 1, 157}, + { 695, 1, 158}, + { 143, 1, 159}, + { 163, 1, 160}, + { 164, 1, 161}, + { 888, 1, 162}, + { 887, 1, 162}, + { 720, 1, 163}, + { 269, 1, 164}, + { 838, 1, 165}, + { 518, 1, 166}, + {1079, 1, 167}, + { 839, 1, 168}, + { 427, 1, 169}, + { 969, 1, 170}, + { 853, 1, 171}, + { 403, 1, 172}, + { 587, 1, 173}, + { 891, 1, 174}, + { 722, 1, 175}, + { 750, 1, 176}, + { 749, 1, 177}, + { 645, 1, 178}, + { 840, 1, 179}, + { 198, 1, 180}, + { 600, 1, 181}, + { 599, 1, 182}, + { 841, 1, 183}, + { 942, 1, 184}, + { 941, 1, 184}, + { 244, 1, 185}, + { 627, 1, 186}, + { 985, 1, 187}, + { 315, 1, 188}, + { 968, 1, 189}, + {1039, 1, 190}, + { 392, 1, 191}, + { 394, 1, 192}, + { 393, 1, 192}, + { 453, 1, 193}, + { 208, 1, 194}, + { 207, 1, 194}, + { 751, 1, 195}, + { 631, 1, 196}, + { 630, 1, 196}, + { 258, 1, 197}, + { 257, 1, 197}, + { 780, 1, 198}, + { 779, 1, 198}, + { 162, 1, 199}, + { 161, 1, 199}, + {1037, 1, 200}, + {1036, 1, 200}, + { 387, 1, 201}, + { 386, 1, 201}, + { 734, 1, 202}, + { 733, 1, 202}, + { 173, 1, 203}, + { 172, 1, 203}, + { 726, 1, 204}, + { 725, 1, 204}, + { 441, 1, 205}, + { 440, 1, 205}, + { 897, 1, 206}, + { 459, 1, 207}, + { 460, 1, 207}, + { 464, 1, 208}, + { 463, 1, 208}, + { 761, 1, 209}, + { 765, 1, 209}, + { 454, 1, 210}, + {1073, 1, 211}, + {1072, 1, 211}, + { 150, 1, 212}, + { 149, 1, 212}, + { 347, 1, 213}, + { 337, 1, 213}, + { 348, 1, 214}, + { 338, 1, 214}, + { 349, 1, 215}, + { 339, 1, 215}, + { 333, 1, 216}, + { 335, 1, 216}, + {1031, 1, 217}, + {1095, 1, 218}, + {1110, 1, 218}, + {1013, 1, 219}, + {1015, 1, 219}, + {1014, 1, 220}, + {1016, 1, 220}, + {1085, 2, 0}, + {1154, 2, 0}, + { 365, 2, 1}, + {1153, 2, 1}, + { 660, 2, 2}, + { 674, 2, 2}, + { 525, 2, 3}, + { 529, 2, 3}, + { 404, 2, 4}, + { 412, 2, 4}, + { 181, 2, 5}, + { 183, 2, 5}, + { 559, 2, 6}, + { 558, 2, 6}, + { 168, 2, 7}, + { 167, 2, 7}, + {1025, 2, 8}, + {1024, 2, 8}, + {1056, 2, 9}, + {1055, 2, 9}, + { 429, 2, 10}, + { 428, 2, 10}, + { 219, 2, 11}, + { 218, 2, 11}, + { 534, 2, 12}, + { 535, 2, 12}, + { 532, 2, 13}, + { 533, 2, 13}, + { 849, 2, 14}, + { 851, 2, 14}, + {1042, 2, 15}, + {1043, 2, 15}, + {1049, 2, 16}, + {1048, 2, 16}, + { 636, 2, 17}, + { 649, 2, 17}, + { 727, 2, 18}, + { 769, 2, 18}, + { 975, 2, 19}, + { 974, 2, 19}, + {1057, 2, 20}, + { 658, 2, 21}, + { 659, 2, 21}, + {1058, 2, 22}, + {1059, 2, 22}, + { 781, 2, 23}, + { 784, 2, 23}, + { 510, 2, 24}, + { 509, 2, 24}, + { 547, 2, 25}, + { 546, 2, 25}, + { 469, 2, 26}, + { 468, 2, 26}, + { 323, 2, 27}, + { 322, 2, 27}, + { 263, 2, 28}, + { 267, 2, 28}, + { 832, 2, 29}, + { 831, 2, 29}, + { 943, 2, 30}, + { 944, 2, 30}, + { 646, 2, 31}, + { 648, 2, 31}, + { 777, 2, 32}, + { 776, 2, 32}, + { 570, 2, 33}, + { 569, 2, 33}, + { 638, 2, 34}, + { 629, 2, 34}, + { 235, 2, 35}, + { 234, 2, 35}, + { 545, 2, 36}, + { 554, 2, 36}, + {1135, 2, 37}, + {1136, 2, 37}, + { 838, 2, 38}, + { 610, 2, 38}, + { 518, 2, 39}, + { 517, 2, 39}, + { 427, 2, 40}, + { 446, 2, 40}, + { 593, 2, 41}, + {1147, 2, 41}, + { 915, 2, 41}, + {1028, 2, 42}, + {1054, 2, 42}, + { 556, 2, 43}, + { 555, 2, 43}, + { 255, 2, 44}, + { 254, 2, 44}, + {1030, 2, 45}, + {1029, 2, 45}, + { 692, 2, 46}, + { 691, 2, 46}, + {1033, 2, 47}, + {1040, 2, 47}, + { 694, 2, 48}, + { 693, 2, 48}, + {1079, 2, 49}, + {1078, 2, 49}, + { 969, 2, 50}, + { 970, 2, 50}, + { 853, 2, 51}, + { 852, 2, 51}, + { 402, 2, 52}, + { 389, 2, 52}, + { 246, 2, 53}, + { 245, 2, 53}, + { 253, 2, 54}, + { 252, 2, 54}, + { 385, 2, 55}, + { 384, 2, 55}, + { 914, 2, 55}, + { 796, 2, 56}, + {1041, 2, 56}, + { 515, 2, 57}, + { 514, 2, 57}, + {1060, 2, 58}, + {1053, 2, 58}, + {1022, 2, 59}, + {1021, 2, 59}, + { 839, 2, 60}, + {1127, 2, 60}, + { 645, 2, 61}, + { 644, 2, 61}, + { 204, 2, 62}, + { 203, 2, 62}, + { 392, 2, 63}, + {1128, 2, 63}, + { 891, 2, 64}, + { 890, 2, 64}, + { 886, 2, 65}, + { 885, 2, 65}, + { 799, 2, 66}, + { 800, 2, 66}, + { 999, 2, 67}, + { 998, 2, 67}, + { 686, 2, 68}, + { 685, 2, 68}, + { 836, 2, 69}, + { 837, 2, 69}, + {1091, 2, 70}, + {1092, 2, 70}, + { 954, 2, 71}, + { 953, 2, 71}, + { 642, 2, 72}, + { 628, 2, 72}, + { 927, 2, 73}, + { 936, 2, 73}, + { 720, 2, 74}, + { 719, 2, 74}, + { 269, 2, 75}, + { 268, 2, 75}, + { 722, 2, 76}, + { 721, 2, 76}, + { 316, 2, 77}, + {1034, 2, 78}, + { 657, 2, 78}, + {1035, 2, 79}, + {1044, 2, 79}, + { 198, 2, 80}, + { 199, 2, 80}, + { 453, 2, 81}, + { 452, 2, 81}, + { 950, 2, 82}, + { 951, 2, 82}, + { 700, 2, 83}, + { 206, 2, 84}, + { 205, 2, 84}, + { 615, 2, 85}, + { 614, 2, 85}, + { 744, 2, 86}, + { 778, 2, 86}, + { 587, 2, 87}, + { 182, 2, 87}, + { 840, 2, 88}, + { 952, 2, 88}, + { 600, 2, 89}, + { 909, 2, 89}, + { 599, 2, 90}, + { 889, 2, 90}, + { 841, 2, 91}, + { 850, 2, 91}, + { 627, 2, 92}, + { 651, 2, 92}, + { 210, 2, 93}, + { 211, 2, 93}, + { 244, 2, 94}, + { 243, 2, 94}, + { 729, 2, 95}, + { 728, 2, 95}, + { 315, 2, 96}, + { 261, 2, 96}, + { 749, 2, 97}, + { 747, 2, 97}, + { 750, 2, 98}, + { 748, 2, 98}, + { 751, 2, 99}, + { 898, 2, 99}, + { 968, 2, 100}, + { 972, 2, 100}, + { 985, 2, 101}, + { 984, 2, 101}, + {1039, 2, 102}, + {1038, 2, 102}, + { 640, 2, 103}, + { 854, 3, 0}, + {1129, 3, 0}, + { 444, 3, 1}, + { 445, 3, 1}, + { 973, 3, 2}, + { 992, 3, 2}, + { 560, 3, 3}, + { 571, 3, 3}, + { 390, 3, 4}, + { 690, 3, 5}, + { 795, 3, 6}, + { 801, 3, 6}, + { 482, 3, 7}, + { 924, 3, 8}, + { 929, 3, 8}, + { 497, 3, 9}, + { 495, 3, 9}, + { 638, 3, 10}, + { 625, 3, 10}, + { 152, 3, 11}, + { 676, 3, 11}, + { 752, 3, 12}, + { 768, 3, 12}, + { 753, 3, 13}, + { 770, 3, 13}, + { 754, 3, 14}, + { 738, 3, 14}, + { 822, 3, 15}, + { 818, 3, 15}, + { 484, 3, 16}, + { 479, 3, 16}, + { 854, 4, 0}, + {1129, 4, 0}, + { 390, 4, 1}, + { 690, 4, 2}, + { 382, 4, 3}, + { 355, 4, 3}, + { 482, 4, 4}, + { 479, 4, 4}, + { 924, 4, 5}, + { 929, 4, 5}, + { 989, 4, 6}, + { 977, 4, 6}, + { 653, 4, 7}, + {1090, 4, 8}, + {1027, 4, 9}, + { 715, 4, 10}, + { 717, 4, 11}, + { 905, 4, 12}, + { 854, 5, 0}, + {1129, 5, 0}, + { 390, 5, 1}, + { 690, 5, 2}, + { 482, 5, 3}, + { 479, 5, 3}, + { 965, 5, 4}, + { 960, 5, 4}, + { 497, 5, 5}, + { 495, 5, 5}, + { 986, 5, 6}, + { 706, 5, 7}, + { 703, 5, 7}, + {1087, 5, 8}, + {1086, 5, 8}, + { 842, 5, 9}, + { 676, 5, 9}, + { 822, 5, 10}, + { 818, 5, 10}, + { 192, 5, 11}, + { 188, 5, 11}, + { 996, 5, 12}, + { 995, 5, 12}, + { 351, 5, 13}, + { 350, 5, 13}, + { 957, 5, 14}, + { 956, 5, 14}, + { 802, 6, 0}, + { 785, 6, 0}, + { 485, 6, 0}, + { 486, 6, 0}, + {1134, 6, 1}, + {1130, 6, 1}, + {1027, 6, 1}, + {1074, 6, 1}, + { 812, 7, 0}, + { 787, 7, 0}, + { 677, 7, 1}, + { 653, 7, 1}, + {1107, 7, 2}, + {1090, 7, 2}, + {1070, 7, 3}, + {1027, 7, 3}, + { 716, 7, 4}, + { 715, 7, 4}, + { 718, 7, 5}, + { 717, 7, 5}, + { 681, 8, 0}, + { 653, 8, 0}, + { 932, 8, 1}, + { 922, 8, 1}, + { 476, 8, 2}, + { 455, 8, 2}, + { 477, 8, 3}, + { 466, 8, 3}, + { 478, 8, 4}, + { 467, 8, 4}, + { 174, 8, 5}, + { 160, 8, 5}, + { 367, 8, 6}, + { 391, 8, 6}, + { 874, 8, 7}, + { 200, 8, 7}, + { 962, 8, 8}, + { 945, 8, 8}, + {1114, 8, 9}, + {1120, 8, 9}, + { 863, 8, 10}, + { 845, 8, 10}, + { 240, 8, 11}, + { 233, 8, 11}, + { 809, 8, 12}, + { 816, 8, 12}, + { 171, 8, 13}, + { 147, 8, 13}, + { 684, 8, 14}, + { 712, 8, 14}, + { 935, 8, 15}, + { 939, 8, 15}, + { 682, 8, 16}, + { 710, 8, 16}, + { 933, 8, 17}, + { 937, 8, 17}, + { 900, 8, 18}, + { 881, 8, 18}, + { 683, 8, 19}, + { 711, 8, 19}, + { 934, 8, 20}, + { 938, 8, 20}, + { 494, 8, 21}, + { 500, 8, 21}, + { 901, 8, 22}, + { 882, 8, 22}, + { 813, 9, 0}, + { 814, 9, 0}, + { 1, 9, 0}, + { 870, 9, 1}, + { 2, 9, 1}, + { 869, 9, 1}, + { 819, 9, 2}, + { 798, 9, 2}, + { 119, 9, 2}, + { 632, 9, 3}, + { 652, 9, 3}, + { 126, 9, 3}, + {1101, 9, 4}, + {1108, 9, 4}, + { 132, 9, 4}, + { 277, 9, 5}, + { 13, 9, 5}, + { 280, 9, 6}, + { 22, 9, 6}, + { 282, 9, 7}, + { 25, 9, 7}, + { 285, 9, 8}, + { 28, 9, 8}, + { 289, 9, 9}, + { 32, 9, 9}, + { 290, 9, 10}, + { 33, 9, 10}, + { 291, 9, 11}, + { 35, 9, 11}, + { 292, 9, 12}, + { 36, 9, 12}, + { 293, 9, 13}, + { 38, 9, 13}, + { 294, 9, 14}, + { 39, 9, 14}, + { 295, 9, 15}, + { 43, 9, 15}, + { 296, 9, 16}, + { 48, 9, 16}, + { 297, 9, 17}, + { 53, 9, 17}, + { 298, 9, 18}, + { 59, 9, 18}, + { 299, 9, 19}, + { 64, 9, 19}, + { 300, 9, 20}, + { 66, 9, 20}, + { 301, 9, 21}, + { 67, 9, 21}, + { 302, 9, 22}, + { 68, 9, 22}, + { 303, 9, 23}, + { 69, 9, 23}, + { 304, 9, 24}, + { 70, 9, 24}, + { 305, 9, 25}, + { 77, 9, 25}, + { 306, 9, 26}, + { 81, 9, 26}, + { 307, 9, 27}, + { 82, 9, 27}, + { 308, 9, 28}, + { 83, 9, 28}, + { 309, 9, 29}, + { 84, 9, 29}, + { 310, 9, 30}, + { 85, 9, 30}, + { 311, 9, 31}, + { 86, 9, 31}, + { 312, 9, 32}, + { 131, 9, 32}, + { 313, 9, 33}, + { 138, 9, 33}, + { 278, 9, 34}, + { 20, 9, 34}, + { 279, 9, 35}, + { 21, 9, 35}, + { 281, 9, 36}, + { 24, 9, 36}, + { 283, 9, 37}, + { 26, 9, 37}, + { 284, 9, 38}, + { 27, 9, 38}, + { 286, 9, 39}, + { 30, 9, 39}, + { 287, 9, 40}, + { 31, 9, 40}, + { 195, 9, 41}, + { 191, 9, 41}, + { 47, 9, 41}, + { 193, 9, 42}, + { 189, 9, 42}, + { 49, 9, 42}, + { 194, 9, 43}, + { 190, 9, 43}, + { 50, 9, 43}, + { 216, 9, 44}, + { 52, 9, 44}, + { 228, 9, 44}, + { 215, 9, 45}, + { 200, 9, 45}, + { 54, 9, 45}, + { 217, 9, 46}, + { 242, 9, 46}, + { 55, 9, 46}, + { 678, 9, 47}, + { 653, 9, 47}, + { 56, 9, 47}, + { 930, 9, 48}, + { 57, 9, 48}, + { 922, 9, 48}, + { 141, 9, 49}, + { 147, 9, 49}, + { 58, 9, 49}, + { 140, 9, 50}, + { 139, 9, 50}, + { 60, 9, 50}, + { 142, 9, 51}, + { 166, 9, 51}, + { 61, 9, 51}, + { 443, 9, 52}, + { 419, 9, 52}, + { 62, 9, 52}, + { 442, 9, 53}, + { 63, 9, 53}, + { 414, 9, 53}, + { 604, 9, 54}, + { 607, 9, 54}, + { 65, 9, 54}, + { 288, 9, 55}, + { 196, 9, 56}, + { 807, 10, 0}, + { 265, 10, 1}, + { 262, 10, 1}, + { 368, 10, 2}, + { 358, 10, 2}, + { 496, 10, 3}, + { 804, 10, 4}, + { 791, 10, 4}, + { 595, 10, 5}, + { 594, 10, 5}, + { 742, 10, 6}, + { 741, 10, 6}, + { 491, 10, 7}, + { 490, 10, 7}, + { 609, 10, 8}, + { 608, 10, 8}, + { 325, 10, 9}, + { 456, 10, 9}, + {1005, 10, 10}, + {1002, 10, 10}, + { 997, 10, 11}, + {1099, 10, 12}, + {1098, 10, 12}, + {1115, 10, 13}, + { 790, 10, 14}, + { 789, 10, 14}, + { 978, 10, 15}, + { 981, 10, 15}, + { 994, 10, 16}, + { 993, 10, 16}, + { 499, 10, 17}, + { 498, 10, 17}, + { 794, 11, 0}, + { 785, 11, 0}, + { 159, 11, 1}, + { 139, 11, 1}, + { 542, 11, 2}, + { 536, 11, 2}, + {1115, 11, 3}, + {1111, 11, 3}, + { 501, 11, 4}, + { 485, 11, 4}, + { 790, 11, 5}, + { 787, 11, 5}, + { 805, 12, 0}, + { 146, 12, 1}, + { 148, 12, 2}, + { 151, 12, 3}, + { 214, 12, 4}, + { 220, 12, 5}, + { 415, 12, 6}, + { 416, 12, 7}, + { 449, 12, 8}, + { 489, 12, 9}, + { 493, 12, 10}, + { 502, 12, 11}, + { 503, 12, 12}, + { 539, 12, 13}, + { 544, 12, 14}, + {1047, 12, 14}, + { 557, 12, 15}, + { 561, 12, 16}, + { 562, 12, 17}, + { 563, 12, 18}, + { 626, 12, 19}, + { 637, 12, 20}, + { 650, 12, 21}, + { 655, 12, 22}, + { 656, 12, 23}, + { 743, 12, 24}, + { 755, 12, 25}, + { 811, 12, 26}, + { 825, 12, 27}, + { 883, 12, 28}, + { 916, 12, 29}, + { 917, 12, 30}, + { 926, 12, 31}, + { 928, 12, 32}, + { 948, 12, 33}, + { 949, 12, 34}, + { 961, 12, 35}, + { 963, 12, 36}, + { 971, 12, 37}, + {1019, 12, 38}, + {1032, 12, 39}, + {1045, 12, 40}, + {1046, 12, 41}, + {1052, 12, 42}, + {1112, 12, 43}, + {1026, 12, 44}, + {1131, 12, 45}, + {1132, 12, 46}, + {1133, 12, 47}, + {1141, 12, 48}, + {1142, 12, 49}, + {1145, 12, 50}, + {1146, 12, 51}, + { 643, 12, 52}, + { 488, 12, 53}, + { 256, 12, 54}, + { 487, 12, 55}, + { 827, 12, 56}, + { 940, 12, 57}, + { 808, 13, 0}, + {1075, 13, 0}, + { 619, 13, 1}, + { 259, 13, 1}, + { 448, 13, 2}, + { 413, 13, 2}, + { 931, 13, 3}, + { 922, 13, 3}, + { 680, 13, 4}, + { 653, 13, 4}, + {1071, 13, 5}, + {1027, 13, 5}, + {1085, 14, 0}, + {1129, 14, 0}, + { 847, 14, 1}, + { 846, 14, 1}, + { 353, 14, 2}, + { 350, 14, 2}, + { 920, 14, 3}, + { 919, 14, 3}, + { 516, 14, 4}, + { 513, 14, 4}, + { 810, 14, 5}, + { 815, 14, 5}, + { 480, 14, 6}, + { 479, 14, 6}, + { 251, 14, 7}, + {1020, 14, 7}, + { 592, 14, 8}, + { 607, 14, 8}, + { 904, 14, 9}, + { 903, 14, 9}, + { 902, 14, 10}, + { 899, 14, 10}, + { 822, 14, 11}, + { 818, 14, 11}, + { 155, 14, 12}, + { 147, 14, 12}, + { 579, 14, 13}, + { 575, 14, 13}, + { 601, 14, 14}, + { 602, 14, 14}, + { 588, 14, 14}, + { 574, 14, 15}, + { 573, 14, 15}, + { 363, 14, 16}, + { 354, 14, 16}, + { 249, 14, 17}, + { 212, 14, 17}, + { 248, 14, 18}, + { 202, 14, 18}, + { 987, 14, 19}, + { 986, 14, 19}, + { 730, 14, 20}, + { 227, 14, 20}, + { 270, 14, 21}, + { 390, 14, 21}, + { 698, 14, 22}, + { 690, 14, 22}, + { 381, 14, 23}, + { 274, 14, 23}, + { 370, 14, 24}, + { 947, 14, 24}, + { 159, 14, 25}, + { 145, 14, 25}, + { 250, 14, 26}, + { 201, 14, 26}, + {1018, 14, 27}, + { 967, 14, 27}, + {1152, 14, 28}, + {1151, 14, 28}, + { 797, 14, 29}, + { 801, 14, 29}, + {1119, 14, 30}, + {1116, 14, 30}, + { 617, 14, 31}, + { 624, 14, 32}, + { 623, 14, 33}, + { 537, 14, 34}, + { 538, 14, 35}, + { 352, 14, 36}, + { 388, 14, 36}, + { 560, 14, 37}, + { 571, 14, 37}, + { 371, 14, 38}, + { 326, 14, 38}, + { 924, 14, 39}, + { 929, 14, 39}, + { 807, 15, 0}, + { 822, 15, 1}, + { 818, 15, 1}, + { 438, 15, 2}, + { 432, 15, 2}, + { 421, 15, 3}, + { 420, 15, 3}, + { 788, 16, 0}, + { 0, 16, 1}, + { 1, 16, 2}, + { 4, 16, 3}, + { 3, 16, 4}, + { 12, 16, 5}, + { 11, 16, 6}, + { 10, 16, 7}, + { 9, 16, 8}, + { 72, 16, 9}, + { 8, 16, 10}, + { 7, 16, 11}, + { 6, 16, 12}, + { 76, 16, 13}, + { 42, 16, 14}, + { 5, 16, 15}, + { 75, 16, 16}, + { 109, 16, 17}, + { 41, 16, 18}, + { 74, 16, 19}, + { 91, 16, 20}, + { 108, 16, 21}, + { 121, 16, 22}, + { 2, 16, 23}, + { 73, 16, 24}, + { 40, 16, 25}, + { 107, 16, 26}, + { 71, 16, 27}, + { 120, 16, 28}, + { 90, 16, 29}, + { 133, 16, 30}, + { 106, 16, 31}, + { 23, 16, 32}, + { 114, 16, 33}, + { 29, 16, 34}, + { 119, 16, 35}, + { 34, 16, 36}, + { 126, 16, 37}, + { 37, 16, 38}, + { 132, 16, 39}, + { 13, 16, 40}, + { 22, 16, 41}, + { 25, 16, 42}, + { 28, 16, 43}, + { 32, 16, 44}, + { 33, 16, 45}, + { 35, 16, 46}, + { 36, 16, 47}, + { 38, 16, 48}, + { 39, 16, 49}, + { 43, 16, 50}, + { 48, 16, 51}, + { 53, 16, 52}, + { 59, 16, 53}, + { 64, 16, 54}, + { 66, 16, 55}, + { 67, 16, 56}, + { 68, 16, 57}, + { 69, 16, 58}, + { 70, 16, 59}, + { 77, 16, 60}, + { 81, 16, 61}, + { 82, 16, 62}, + { 83, 16, 63}, + { 84, 16, 64}, + { 85, 16, 65}, + { 86, 16, 66}, + { 87, 16, 67}, + { 88, 16, 68}, + { 89, 16, 69}, + { 92, 16, 70}, + { 96, 16, 71}, + { 97, 16, 72}, + { 98, 16, 73}, + { 100, 16, 74}, + { 101, 16, 75}, + { 102, 16, 76}, + { 103, 16, 77}, + { 104, 16, 78}, + { 105, 16, 79}, + { 110, 16, 80}, + { 115, 16, 81}, + { 122, 16, 82}, + { 127, 16, 83}, + { 134, 16, 84}, + { 14, 16, 85}, + { 44, 16, 86}, + { 78, 16, 87}, + { 93, 16, 88}, + { 111, 16, 89}, + { 116, 16, 90}, + { 123, 16, 91}, + { 128, 16, 92}, + { 135, 16, 93}, + { 15, 16, 94}, + { 45, 16, 95}, + { 79, 16, 96}, + { 94, 16, 97}, + { 112, 16, 98}, + { 117, 16, 99}, + { 124, 16, 100}, + { 129, 16, 101}, + { 136, 16, 102}, + { 16, 16, 103}, + { 46, 16, 104}, + { 80, 16, 105}, + { 95, 16, 106}, + { 113, 16, 107}, + { 118, 16, 108}, + { 125, 16, 109}, + { 130, 16, 110}, + { 137, 16, 111}, + { 17, 16, 112}, + { 51, 16, 113}, + { 99, 16, 114}, + { 18, 16, 115}, + { 19, 16, 116}, + { 787, 17, 0}, + { 930, 17, 1}, + { 678, 17, 2}, + {1103, 17, 3}, + { 679, 17, 4}, + {1064, 17, 5}, + { 238, 17, 6}, + {1065, 17, 7}, + {1069, 17, 8}, + {1067, 17, 9}, + {1068, 17, 10}, + { 239, 17, 11}, + {1066, 17, 12}, + { 871, 17, 13}, + { 603, 17, 14}, + { 854, 18, 0}, + { 226, 18, 1}, + {1102, 18, 2}, + { 197, 18, 3}, + { 819, 18, 4}, + {1101, 18, 5}, + {1106, 18, 6}, + {1105, 18, 7}, + {1104, 18, 8}, + { 378, 18, 9}, + { 373, 18, 10}, + { 374, 18, 11}, + { 379, 18, 12}, + { 380, 18, 13}, + { 377, 18, 14}, + { 375, 18, 15}, + { 376, 18, 16}, + { 775, 18, 17}, + {1062, 18, 18}, + {1063, 18, 19}, + { 925, 18, 20}, +}; + +/* property values: 4976 bytes. */ + +/* Codepoints which expand on full case-folding. */ + +RE_UINT16 re_expand_on_folding[] = { + 223, 304, 329, 496, 912, 944, 1415, 7830, + 7831, 7832, 7833, 7834, 7838, 8016, 8018, 8020, + 8022, 8064, 8065, 8066, 8067, 8068, 8069, 8070, + 8071, 8072, 8073, 8074, 8075, 8076, 8077, 8078, + 8079, 8080, 8081, 8082, 8083, 8084, 8085, 8086, + 8087, 8088, 8089, 8090, 8091, 8092, 8093, 8094, + 8095, 8096, 8097, 8098, 8099, 8100, 8101, 8102, + 8103, 8104, 8105, 8106, 8107, 8108, 8109, 8110, + 8111, 8114, 8115, 8116, 8118, 8119, 8124, 8130, + 8131, 8132, 8134, 8135, 8140, 8146, 8147, 8150, + 8151, 8162, 8163, 8164, 8166, 8167, 8178, 8179, + 8180, 8182, 8183, 8188, 64256, 64257, 64258, 64259, + 64260, 64261, 64262, 64275, 64276, 64277, 64278, 64279, +}; + +/* expand_on_folding: 208 bytes. */ + +/* General_Category. */ + +static RE_UINT8 re_general_category_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 14, 14, 14, 15, + 16, 17, 18, 19, 20, 19, 21, 19, 19, 19, 19, 19, 19, 22, 19, 19, + 19, 19, 19, 19, 19, 19, 23, 19, 19, 19, 24, 19, 19, 25, 26, 19, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 27, 7, 28, 29, 19, 19, 19, 19, 19, 19, 19, 30, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 31, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 32, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 32, +}; + +static RE_UINT8 re_general_category_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 34, 35, 36, 37, 38, 39, 34, 34, 34, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 64, 65, 66, 67, 68, 69, 70, 71, 69, 72, 73, + 69, 69, 64, 74, 64, 64, 75, 76, 77, 78, 79, 80, 81, 82, 69, 83, + 84, 85, 86, 87, 88, 89, 69, 69, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 90, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 91, + 92, 34, 34, 34, 34, 34, 34, 34, 34, 93, 34, 34, 94, 95, 96, 97, + 98, 99, 100, 101, 102, 103, 104, 105, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 106, + 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, + 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 34, 34, 109, 110, 111, 112, 34, 34, 113, 114, 115, 116, 117, 118, + 119, 120, 121, 122, 76, 123, 124, 125, 126, 127, 76, 76, 76, 76, 76, 76, + 128, 76, 129, 130, 131, 76, 132, 76, 133, 76, 76, 76, 134, 76, 76, 76, + 135, 136, 137, 138, 76, 76, 76, 76, 76, 76, 76, 76, 76, 139, 76, 76, + 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, + 34, 34, 34, 34, 34, 34, 140, 76, 141, 76, 76, 76, 76, 76, 76, 76, + 34, 34, 34, 34, 34, 34, 34, 34, 142, 76, 76, 76, 76, 76, 76, 76, + 34, 34, 34, 34, 143, 76, 76, 76, 76, 76, 76, 76, 76, 76, 144, 145, + 146, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, + 69, 147, 148, 149, 150, 76, 151, 76, 152, 153, 154, 155, 156, 157, 158, 159, + 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 160, 161, 76, 76, + 162, 163, 164, 165, 166, 76, 167, 168, 169, 170, 171, 172, 173, 174, 175, 76, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 176, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 177, 34, + 178, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, + 34, 34, 34, 34, 178, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, + 179, 76, 180, 181, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, + 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 182, +}; + +static RE_UINT16 re_general_category_stage_3[] = { + 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 7, 8, 9, 10, 11, 12, + 13, 13, 13, 14, 15, 13, 13, 16, 17, 18, 19, 20, 21, 22, 13, 23, + 13, 13, 13, 24, 25, 11, 11, 11, 11, 26, 11, 27, 28, 29, 30, 31, + 32, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 11, 37, 38, 13, 39, + 9, 9, 9, 11, 11, 11, 13, 13, 40, 13, 13, 13, 41, 13, 13, 13, + 13, 13, 42, 43, 9, 44, 45, 11, 46, 47, 32, 48, 49, 50, 51, 52, + 53, 54, 50, 50, 55, 32, 56, 57, 50, 50, 50, 50, 50, 58, 59, 60, + 61, 62, 50, 32, 63, 50, 50, 50, 50, 50, 64, 65, 66, 50, 67, 68, + 50, 69, 70, 71, 50, 72, 73, 73, 73, 73, 74, 73, 73, 73, 75, 76, + 77, 50, 50, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 84, 85, 92, 93, 94, 95, 96, 97, 98, 85, 99, 100, 101, 89, 102, + 83, 84, 85, 103, 104, 105, 89, 106, 107, 108, 109, 110, 111, 112, 95, 113, + 114, 115, 85, 116, 117, 118, 89, 119, 120, 115, 85, 121, 122, 123, 89, 124, + 120, 115, 50, 125, 126, 127, 89, 128, 129, 130, 50, 131, 132, 133, 73, 134, + 135, 50, 50, 136, 137, 138, 73, 73, 139, 140, 141, 142, 143, 144, 73, 73, + 145, 146, 147, 148, 149, 50, 150, 151, 152, 153, 32, 154, 155, 156, 73, 73, + 50, 50, 157, 158, 159, 160, 161, 162, 163, 164, 9, 9, 165, 50, 50, 166, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 167, 168, 50, 50, + 167, 50, 50, 169, 170, 171, 50, 50, 50, 170, 50, 50, 50, 172, 173, 174, + 50, 175, 50, 50, 50, 50, 50, 176, 177, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 178, 50, 179, 180, 50, 50, 50, 50, 181, 182, + 183, 184, 50, 185, 50, 186, 183, 187, 50, 50, 50, 188, 189, 190, 191, 192, + 193, 191, 50, 50, 194, 50, 50, 195, 50, 50, 196, 50, 50, 50, 50, 197, + 50, 150, 198, 199, 200, 50, 201, 176, 50, 50, 202, 203, 204, 205, 206, 206, + 50, 207, 50, 50, 50, 208, 209, 210, 191, 191, 211, 73, 73, 73, 73, 73, + 212, 50, 50, 213, 214, 159, 215, 216, 217, 50, 218, 66, 50, 50, 219, 220, + 50, 50, 221, 222, 223, 66, 50, 224, 73, 73, 73, 73, 225, 226, 227, 228, + 11, 11, 229, 27, 27, 27, 230, 231, 11, 232, 27, 27, 32, 32, 233, 234, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 235, 13, 13, 13, 13, 13, 13, + 236, 237, 236, 236, 237, 238, 236, 239, 240, 240, 240, 241, 242, 243, 244, 245, + 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 73, 258, 259, 260, + 261, 262, 263, 264, 265, 266, 267, 267, 268, 269, 270, 206, 271, 272, 206, 273, + 274, 274, 274, 274, 274, 274, 274, 274, 275, 206, 276, 206, 206, 206, 206, 277, + 206, 278, 274, 279, 206, 280, 281, 282, 206, 206, 283, 73, 284, 73, 266, 266, + 266, 285, 206, 206, 206, 206, 286, 266, 206, 206, 206, 206, 206, 206, 206, 206, + 206, 206, 206, 287, 288, 206, 206, 289, 206, 206, 206, 206, 206, 206, 290, 206, + 291, 206, 206, 206, 206, 206, 292, 293, 266, 294, 206, 206, 295, 274, 296, 274, + 297, 298, 274, 274, 274, 299, 274, 300, 206, 206, 206, 274, 301, 175, 73, 73, + 73, 73, 73, 73, 73, 73, 73, 73, 9, 9, 302, 11, 11, 303, 304, 305, + 13, 13, 13, 13, 13, 13, 306, 307, 11, 11, 308, 50, 50, 50, 309, 310, + 50, 311, 312, 312, 312, 312, 32, 32, 313, 314, 315, 316, 73, 73, 73, 73, + 206, 317, 206, 206, 206, 206, 206, 282, 206, 206, 206, 206, 206, 318, 73, 319, + 320, 321, 322, 323, 135, 50, 50, 50, 50, 324, 177, 50, 50, 50, 50, 325, + 326, 50, 201, 135, 50, 50, 50, 50, 327, 328, 50, 51, 206, 206, 282, 50, + 206, 329, 330, 206, 331, 332, 206, 206, 330, 206, 206, 332, 206, 206, 206, 329, + 50, 50, 50, 197, 206, 206, 206, 206, 50, 50, 50, 50, 150, 73, 73, 73, + 50, 333, 50, 50, 50, 50, 50, 50, 150, 206, 206, 206, 283, 50, 50, 224, + 334, 50, 335, 73, 13, 13, 336, 337, 13, 338, 50, 50, 50, 50, 339, 340, + 31, 341, 342, 343, 13, 13, 13, 344, 345, 346, 347, 73, 73, 73, 73, 348, + 349, 50, 350, 351, 50, 50, 50, 352, 353, 50, 50, 354, 355, 191, 32, 356, + 66, 50, 357, 50, 358, 359, 50, 150, 77, 50, 50, 360, 361, 362, 73, 73, + 50, 50, 363, 364, 365, 366, 50, 367, 50, 50, 50, 368, 369, 370, 371, 372, + 373, 374, 312, 73, 73, 73, 73, 73, 73, 73, 73, 73, 50, 50, 375, 191, + 50, 50, 376, 50, 377, 50, 50, 202, 378, 378, 378, 378, 378, 378, 378, 378, + 379, 379, 379, 379, 379, 379, 379, 379, 50, 50, 50, 50, 50, 50, 201, 50, + 50, 50, 50, 50, 50, 380, 73, 73, 381, 382, 383, 384, 385, 50, 50, 50, + 50, 50, 50, 386, 387, 388, 50, 50, 50, 50, 50, 389, 73, 50, 50, 50, + 50, 390, 50, 50, 195, 73, 73, 391, 32, 392, 233, 393, 394, 395, 396, 397, + 50, 50, 50, 50, 50, 50, 50, 398, 399, 2, 3, 4, 5, 400, 401, 402, + 50, 403, 50, 327, 404, 405, 406, 407, 408, 50, 171, 409, 201, 201, 73, 73, + 50, 50, 50, 50, 50, 50, 50, 51, 410, 266, 266, 411, 267, 267, 267, 412, + 413, 319, 73, 73, 73, 206, 206, 414, 50, 150, 50, 50, 50, 101, 73, 73, + 50, 327, 415, 50, 416, 73, 73, 73, 50, 417, 50, 50, 418, 419, 73, 73, + 9, 9, 420, 11, 11, 50, 50, 50, 50, 201, 191, 73, 73, 73, 73, 73, + 421, 50, 50, 422, 50, 423, 73, 73, 50, 424, 50, 425, 73, 73, 73, 73, + 50, 50, 50, 426, 73, 73, 73, 73, 427, 428, 50, 429, 430, 431, 50, 432, + 50, 50, 50, 433, 50, 434, 50, 435, 50, 50, 50, 50, 436, 73, 73, 73, + 73, 73, 73, 73, 73, 73, 266, 437, 438, 50, 50, 439, 440, 441, 442, 73, + 217, 50, 50, 443, 444, 50, 436, 191, 445, 50, 446, 447, 448, 73, 73, 73, + 217, 50, 50, 449, 450, 191, 73, 73, 50, 50, 451, 452, 191, 73, 73, 73, + 50, 50, 50, 50, 50, 50, 327, 73, 267, 267, 267, 267, 267, 267, 453, 448, + 50, 50, 327, 73, 73, 73, 73, 73, 50, 50, 50, 436, 73, 73, 73, 73, + 50, 50, 50, 50, 176, 454, 203, 455, 456, 457, 73, 73, 73, 73, 73, 73, + 458, 73, 73, 73, 73, 73, 73, 73, 206, 206, 206, 206, 206, 206, 206, 318, + 206, 206, 459, 206, 206, 206, 460, 461, 462, 206, 463, 206, 206, 464, 73, 73, + 206, 206, 206, 206, 465, 73, 73, 73, 206, 206, 206, 206, 206, 283, 266, 466, + 9, 467, 11, 468, 469, 470, 236, 9, 471, 472, 473, 474, 475, 9, 467, 11, + 476, 477, 11, 478, 479, 480, 481, 9, 482, 11, 9, 467, 11, 468, 469, 11, + 236, 9, 471, 481, 9, 482, 11, 9, 467, 11, 483, 9, 484, 485, 486, 487, + 11, 488, 9, 489, 490, 491, 492, 11, 493, 9, 494, 11, 495, 496, 496, 496, + 497, 50, 498, 499, 500, 501, 502, 503, 504, 202, 505, 202, 73, 73, 73, 506, + 206, 206, 319, 206, 206, 206, 206, 206, 206, 282, 329, 507, 291, 291, 73, 73, + 508, 206, 329, 206, 206, 206, 319, 206, 206, 284, 73, 73, 73, 73, 509, 206, + 510, 206, 206, 284, 511, 512, 73, 73, 206, 206, 513, 514, 206, 206, 206, 515, + 206, 282, 206, 206, 516, 73, 206, 513, 206, 206, 206, 329, 517, 206, 206, 206, + 206, 206, 206, 206, 206, 206, 206, 518, 206, 206, 206, 464, 282, 206, 519, 73, + 73, 73, 73, 73, 73, 73, 73, 520, 206, 206, 206, 206, 521, 73, 73, 73, + 206, 206, 206, 206, 318, 73, 73, 73, 206, 206, 206, 206, 206, 206, 206, 282, + 50, 50, 50, 50, 50, 311, 73, 73, 50, 50, 50, 176, 50, 50, 50, 50, + 50, 201, 73, 73, 73, 73, 73, 73, 522, 73, 523, 523, 523, 523, 523, 523, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 73, + 379, 379, 379, 379, 379, 379, 379, 524, +}; + +static RE_UINT8 re_general_category_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 2, 4, 5, 6, 2, + 7, 7, 7, 7, 7, 2, 8, 9, 10, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 17, 18, 19, 1, 20, 20, 21, 22, 23, 24, 25, + 26, 27, 15, 2, 28, 29, 27, 30, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 31, 11, 11, 11, 32, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 33, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 34, 34, 34, 34, 34, 34, 34, 34, 16, 32, 32, 32, + 32, 32, 32, 32, 11, 34, 34, 16, 34, 32, 32, 11, 34, 11, 16, 11, + 11, 34, 32, 11, 32, 16, 11, 34, 32, 32, 32, 11, 34, 16, 32, 11, + 34, 11, 34, 34, 32, 35, 32, 16, 36, 36, 37, 34, 38, 37, 34, 34, + 34, 34, 34, 34, 34, 34, 16, 32, 34, 38, 32, 11, 32, 32, 32, 32, + 32, 32, 16, 16, 16, 11, 34, 32, 34, 34, 11, 32, 32, 32, 32, 32, + 16, 16, 39, 16, 16, 16, 16, 16, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 41, 41, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, + 40, 40, 42, 41, 41, 41, 42, 42, 41, 41, 41, 41, 41, 41, 41, 41, + 43, 43, 43, 43, 43, 43, 43, 43, 32, 32, 42, 32, 44, 45, 16, 46, + 44, 44, 41, 47, 11, 48, 48, 11, 34, 11, 11, 11, 11, 11, 11, 11, + 11, 49, 11, 11, 11, 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 34, + 16, 11, 32, 16, 32, 32, 32, 32, 16, 16, 32, 50, 34, 32, 34, 11, + 32, 51, 43, 43, 52, 32, 32, 32, 11, 34, 34, 34, 34, 34, 34, 16, + 32, 32, 32, 32, 44, 44, 44, 44, 49, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 48, 53, 2, 2, 2, 54, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 55, 56, 44, 57, 58, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 59, 60, 61, 43, 60, 44, 44, 44, 44, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, 44, 44, + 36, 63, 46, 44, 44, 44, 44, 44, 64, 64, 65, 8, 9, 66, 2, 67, + 43, 43, 43, 43, 43, 61, 65, 2, 68, 36, 36, 36, 36, 69, 43, 43, + 7, 7, 7, 7, 7, 2, 2, 36, 70, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 71, 43, 43, 43, 72, 51, 43, 43, 73, 74, 75, 43, 43, 36, + 7, 7, 7, 7, 7, 36, 76, 77, 2, 2, 2, 2, 2, 2, 2, 78, + 69, 36, 36, 36, 36, 36, 36, 36, 43, 43, 43, 43, 43, 79, 80, 36, + 36, 36, 36, 43, 43, 43, 43, 43, 70, 44, 44, 44, 44, 44, 44, 44, + 7, 7, 7, 7, 7, 36, 36, 36, 36, 36, 36, 36, 36, 69, 43, 43, + 43, 43, 40, 21, 2, 81, 44, 44, 36, 36, 36, 43, 43, 74, 43, 43, + 43, 43, 74, 43, 74, 43, 43, 44, 2, 2, 2, 2, 2, 2, 2, 46, + 36, 36, 36, 36, 69, 43, 44, 46, 44, 44, 44, 44, 44, 44, 44, 44, + 62, 36, 36, 36, 36, 36, 62, 44, 44, 44, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 79, 43, 82, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 82, 70, 83, 84, 43, 43, 43, 82, 83, 84, 83, + 69, 43, 43, 43, 36, 36, 36, 36, 36, 43, 2, 7, 7, 7, 7, 7, + 85, 36, 36, 36, 80, 36, 36, 36, 58, 83, 80, 36, 36, 36, 62, 80, + 62, 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, 36, 36, 36, + 62, 62, 44, 36, 36, 44, 70, 83, 84, 43, 79, 86, 87, 86, 84, 62, + 44, 44, 44, 86, 44, 44, 36, 80, 36, 43, 44, 7, 7, 7, 7, 7, + 36, 20, 27, 27, 27, 88, 44, 44, 58, 82, 80, 36, 36, 62, 44, 80, + 62, 36, 80, 62, 36, 44, 79, 83, 84, 79, 44, 58, 79, 58, 43, 44, + 58, 44, 44, 44, 80, 36, 62, 62, 44, 44, 44, 7, 7, 7, 7, 7, + 43, 36, 69, 44, 44, 44, 44, 44, 58, 82, 80, 36, 36, 36, 36, 80, + 36, 80, 36, 36, 36, 36, 36, 36, 62, 36, 80, 36, 36, 44, 70, 83, + 84, 43, 43, 58, 82, 86, 84, 44, 62, 44, 44, 44, 44, 44, 44, 44, + 66, 44, 44, 44, 44, 44, 44, 44, 62, 36, 80, 36, 36, 44, 70, 84, + 84, 43, 79, 86, 87, 86, 84, 44, 44, 44, 44, 82, 44, 44, 36, 80, + 77, 27, 27, 27, 44, 44, 44, 44, 44, 70, 80, 36, 36, 62, 44, 36, + 62, 36, 36, 44, 80, 62, 62, 36, 44, 80, 62, 44, 36, 62, 44, 36, + 36, 36, 36, 36, 36, 44, 44, 83, 82, 87, 44, 83, 87, 83, 84, 44, + 62, 44, 44, 86, 44, 44, 44, 44, 27, 89, 67, 67, 88, 90, 44, 44, + 86, 83, 80, 36, 36, 36, 62, 36, 62, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 80, 36, 36, 44, 80, 43, 82, 83, 87, 43, 79, 43, 43, 44, + 44, 44, 58, 79, 36, 44, 44, 44, 44, 44, 44, 44, 27, 27, 27, 89, + 44, 83, 80, 36, 36, 36, 62, 36, 36, 36, 80, 36, 36, 44, 70, 84, + 83, 83, 87, 82, 87, 83, 43, 44, 44, 44, 86, 87, 44, 44, 44, 62, + 80, 62, 44, 44, 44, 44, 44, 44, 36, 36, 36, 36, 36, 62, 80, 83, + 84, 43, 79, 83, 87, 83, 84, 62, 44, 44, 44, 86, 44, 44, 44, 44, + 27, 27, 27, 44, 91, 36, 36, 36, 44, 83, 80, 36, 36, 36, 36, 36, + 36, 36, 36, 62, 44, 36, 36, 36, 36, 80, 36, 36, 36, 36, 80, 44, + 36, 36, 36, 62, 44, 79, 44, 86, 83, 43, 79, 79, 83, 83, 83, 83, + 44, 83, 46, 44, 44, 44, 44, 44, 80, 36, 36, 36, 36, 36, 36, 36, + 69, 36, 43, 43, 43, 79, 44, 57, 36, 36, 36, 74, 43, 43, 43, 61, + 7, 7, 7, 7, 7, 2, 44, 44, 80, 62, 62, 80, 62, 62, 80, 44, + 44, 44, 36, 36, 80, 36, 36, 36, 80, 36, 80, 80, 44, 36, 80, 36, + 69, 36, 43, 43, 43, 58, 70, 44, 36, 36, 62, 81, 43, 43, 43, 44, + 7, 7, 7, 7, 7, 44, 36, 36, 76, 67, 2, 2, 2, 2, 2, 2, + 2, 92, 92, 67, 43, 67, 67, 67, 7, 7, 7, 7, 7, 27, 27, 27, + 27, 27, 51, 51, 51, 4, 4, 83, 36, 36, 36, 36, 80, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 62, 44, 58, 43, 43, 43, 43, 43, 43, 82, + 43, 43, 61, 43, 36, 36, 69, 43, 43, 43, 43, 43, 58, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 79, 67, 67, 67, 67, 75, 67, 67, 90, 67, + 2, 2, 92, 67, 21, 46, 44, 44, 36, 36, 36, 36, 36, 93, 84, 43, + 82, 43, 43, 43, 84, 82, 84, 70, 7, 7, 7, 7, 7, 2, 2, 2, + 36, 36, 36, 83, 43, 36, 36, 43, 70, 83, 94, 93, 83, 83, 83, 36, + 69, 43, 70, 36, 36, 36, 36, 36, 36, 82, 84, 82, 83, 83, 84, 93, + 7, 7, 7, 7, 7, 83, 84, 67, 11, 11, 11, 49, 44, 44, 49, 44, + 36, 36, 36, 36, 36, 63, 68, 36, 36, 36, 36, 36, 62, 36, 36, 44, + 36, 36, 36, 62, 62, 36, 36, 44, 62, 36, 36, 44, 36, 36, 36, 62, + 62, 36, 36, 44, 36, 36, 36, 36, 36, 36, 36, 62, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 62, 58, 43, 2, 2, 2, 2, 95, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 96, 44, 67, 67, 67, 67, 67, 44, 44, 44, + 36, 36, 62, 44, 44, 44, 44, 44, 97, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 63, 71, 98, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 99, 100, 44, 36, 36, 36, 36, 36, 63, 2, 101, + 102, 44, 44, 44, 44, 44, 44, 44, 36, 36, 36, 36, 36, 36, 62, 36, + 36, 43, 79, 44, 44, 44, 44, 44, 36, 43, 61, 46, 44, 44, 44, 44, + 36, 43, 44, 44, 44, 44, 44, 44, 62, 43, 44, 44, 44, 44, 44, 44, + 36, 36, 43, 84, 43, 43, 43, 83, 83, 83, 83, 82, 84, 43, 43, 43, + 43, 43, 2, 85, 2, 66, 69, 44, 7, 7, 7, 7, 7, 44, 44, 44, + 27, 27, 27, 27, 27, 44, 44, 44, 2, 2, 2, 103, 2, 60, 43, 65, + 36, 104, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 44, 44, 44, 44, + 36, 36, 36, 36, 69, 62, 44, 44, 36, 36, 36, 44, 44, 44, 44, 44, + 43, 82, 83, 84, 82, 83, 44, 44, 83, 82, 83, 83, 84, 43, 44, 44, + 90, 44, 2, 7, 7, 7, 7, 7, 36, 36, 36, 36, 36, 36, 36, 44, + 36, 36, 36, 36, 36, 36, 44, 44, 83, 83, 83, 83, 83, 83, 83, 83, + 94, 36, 36, 36, 83, 44, 44, 44, 7, 7, 7, 7, 7, 96, 44, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 36, 36, 36, 69, 82, 84, 44, 2, + 36, 36, 93, 82, 43, 43, 43, 79, 82, 82, 84, 43, 43, 43, 82, 83, + 83, 84, 43, 43, 43, 43, 79, 58, 2, 2, 2, 85, 2, 2, 2, 44, + 43, 43, 94, 36, 36, 36, 36, 36, 36, 36, 82, 43, 43, 82, 82, 83, + 83, 82, 94, 36, 36, 36, 44, 44, 92, 67, 67, 67, 67, 51, 43, 43, + 43, 43, 67, 67, 67, 67, 90, 44, 43, 94, 36, 36, 36, 36, 36, 36, + 93, 43, 43, 83, 43, 84, 83, 36, 36, 36, 36, 82, 43, 83, 84, 84, + 43, 83, 44, 44, 44, 44, 2, 2, 36, 36, 83, 83, 83, 83, 43, 43, + 43, 43, 83, 43, 44, 55, 2, 2, 7, 7, 7, 7, 7, 44, 80, 36, + 36, 36, 36, 36, 40, 40, 40, 2, 2, 2, 2, 2, 44, 44, 44, 44, + 43, 61, 43, 43, 43, 43, 43, 43, 82, 43, 43, 43, 70, 36, 69, 36, + 36, 83, 70, 62, 44, 44, 44, 44, 16, 16, 16, 16, 16, 16, 40, 40, + 40, 40, 40, 40, 40, 45, 16, 16, 16, 16, 16, 16, 45, 16, 16, 16, + 16, 16, 16, 16, 16, 105, 40, 40, 43, 43, 43, 79, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 43, 43, 32, 32, 32, 16, 16, 16, 16, 32, + 16, 16, 16, 16, 11, 11, 11, 11, 16, 16, 16, 44, 11, 11, 11, 44, + 16, 16, 16, 16, 49, 49, 49, 49, 16, 16, 16, 16, 16, 16, 16, 44, + 16, 16, 16, 16, 106, 106, 106, 106, 16, 16, 107, 16, 11, 11, 108, 109, + 41, 16, 107, 16, 11, 11, 108, 41, 16, 16, 44, 16, 11, 11, 110, 41, + 16, 16, 16, 16, 11, 11, 111, 41, 44, 16, 107, 16, 11, 11, 108, 112, + 113, 113, 113, 113, 113, 114, 64, 64, 115, 115, 115, 2, 116, 117, 116, 117, + 2, 2, 2, 2, 118, 64, 64, 119, 2, 2, 2, 2, 120, 121, 2, 122, + 123, 2, 124, 125, 2, 2, 2, 2, 2, 9, 123, 2, 2, 2, 2, 126, + 64, 64, 65, 64, 64, 64, 64, 64, 127, 44, 27, 27, 27, 8, 124, 128, + 27, 27, 27, 27, 27, 8, 124, 100, 40, 40, 40, 40, 40, 40, 81, 44, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 129, 44, 44, + 43, 43, 43, 43, 43, 43, 130, 52, 131, 52, 131, 43, 43, 43, 43, 43, + 79, 44, 44, 44, 44, 44, 44, 44, 67, 132, 67, 133, 67, 34, 11, 16, + 11, 32, 133, 67, 50, 11, 11, 67, 67, 67, 132, 132, 132, 11, 11, 134, + 11, 11, 35, 36, 39, 67, 16, 11, 8, 8, 50, 16, 16, 26, 67, 135, + 27, 27, 27, 27, 27, 27, 27, 27, 101, 101, 101, 101, 101, 101, 101, 101, + 101, 136, 137, 101, 138, 44, 44, 44, 8, 8, 139, 67, 67, 8, 67, 67, + 139, 26, 67, 139, 67, 67, 67, 139, 67, 67, 67, 67, 67, 67, 67, 8, + 67, 139, 139, 67, 67, 67, 67, 67, 67, 67, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 67, 67, 67, 67, 4, 4, 67, 67, + 8, 67, 67, 67, 140, 141, 67, 67, 67, 67, 67, 67, 67, 67, 139, 67, + 67, 67, 67, 67, 67, 26, 8, 8, 8, 8, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 8, 8, 8, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 44, 44, 44, 44, 44, 44, 67, 67, 67, 90, 44, 44, 44, 44, + 67, 67, 67, 67, 67, 90, 44, 44, 27, 27, 27, 27, 27, 27, 67, 67, + 67, 67, 67, 67, 67, 27, 27, 27, 67, 67, 67, 26, 67, 67, 67, 67, + 26, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 8, 8, 8, 8, + 67, 67, 67, 67, 67, 67, 67, 26, 91, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 4, 4, 4, 4, 4, 4, 4, 27, 27, 27, 27, 27, + 27, 27, 67, 67, 67, 67, 67, 67, 8, 8, 124, 142, 8, 8, 8, 8, + 8, 8, 8, 4, 4, 4, 4, 4, 8, 124, 143, 143, 143, 143, 143, 143, + 143, 143, 143, 143, 142, 8, 8, 8, 8, 8, 8, 8, 4, 4, 8, 8, + 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 139, 26, 8, 8, 144, 44, + 11, 11, 11, 11, 11, 11, 11, 48, 16, 16, 16, 16, 16, 16, 16, 107, + 32, 11, 32, 34, 34, 34, 34, 11, 32, 32, 34, 16, 16, 16, 40, 11, + 32, 32, 135, 67, 67, 133, 34, 145, 43, 32, 44, 44, 55, 2, 95, 2, + 16, 16, 16, 54, 44, 44, 54, 44, 36, 36, 36, 36, 44, 44, 44, 53, + 46, 44, 44, 44, 44, 44, 44, 58, 36, 36, 36, 62, 44, 44, 44, 44, + 36, 36, 36, 62, 36, 36, 36, 62, 2, 116, 116, 2, 120, 121, 116, 2, + 2, 2, 2, 6, 2, 103, 116, 2, 116, 4, 4, 4, 4, 2, 2, 85, + 2, 2, 2, 2, 2, 115, 44, 44, 67, 67, 67, 67, 67, 91, 67, 67, + 67, 67, 67, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, 67, 44, 44, + 1, 2, 146, 147, 4, 4, 4, 4, 4, 67, 4, 4, 4, 4, 148, 149, + 150, 101, 101, 101, 101, 43, 43, 83, 151, 40, 40, 67, 101, 152, 63, 67, + 36, 36, 36, 62, 58, 153, 154, 68, 36, 36, 36, 36, 36, 63, 40, 68, + 44, 44, 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, + 67, 27, 27, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 90, + 27, 27, 27, 27, 27, 67, 67, 67, 67, 67, 67, 67, 27, 27, 27, 27, + 155, 27, 27, 27, 27, 27, 27, 27, 36, 36, 104, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 156, 2, 7, 7, 7, 7, 7, 36, 44, 44, + 32, 32, 32, 32, 32, 32, 32, 69, 52, 157, 43, 43, 43, 43, 43, 85, + 32, 32, 32, 32, 44, 44, 44, 58, 36, 36, 36, 101, 101, 101, 101, 101, + 43, 2, 2, 2, 44, 44, 44, 44, 41, 41, 41, 154, 40, 40, 40, 40, + 41, 32, 32, 32, 32, 32, 32, 32, 16, 32, 32, 32, 32, 32, 32, 32, + 45, 16, 16, 16, 34, 34, 34, 32, 32, 32, 32, 32, 42, 158, 34, 107, + 32, 32, 44, 44, 44, 44, 44, 44, 32, 32, 32, 32, 32, 48, 44, 44, + 44, 44, 44, 44, 40, 35, 36, 36, 36, 70, 36, 70, 36, 69, 36, 36, + 36, 93, 84, 82, 67, 67, 44, 44, 27, 27, 27, 67, 159, 44, 44, 44, + 36, 36, 2, 2, 44, 44, 44, 44, 83, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 83, 83, 83, 83, 83, 83, 83, 83, 79, 44, 44, 44, 44, 2, + 43, 36, 36, 36, 2, 71, 44, 44, 36, 36, 36, 43, 43, 43, 43, 2, + 36, 36, 36, 69, 43, 43, 43, 43, 43, 83, 44, 44, 44, 44, 44, 55, + 36, 69, 83, 43, 43, 83, 82, 83, 160, 2, 2, 2, 2, 2, 2, 53, + 7, 7, 7, 7, 7, 44, 44, 2, 36, 36, 36, 36, 69, 43, 43, 82, + 84, 82, 84, 79, 44, 44, 44, 44, 36, 69, 36, 36, 36, 36, 82, 44, + 7, 7, 7, 7, 7, 44, 2, 2, 68, 36, 36, 76, 67, 93, 44, 44, + 70, 43, 70, 69, 70, 36, 36, 43, 69, 62, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 80, 104, 2, 36, 36, 36, 36, 36, 93, 43, 83, + 2, 104, 161, 79, 44, 44, 44, 44, 80, 36, 36, 62, 80, 36, 36, 62, + 80, 36, 36, 62, 44, 44, 44, 44, 36, 93, 84, 83, 82, 160, 84, 44, + 36, 36, 44, 44, 44, 44, 44, 44, 36, 36, 36, 62, 44, 80, 36, 36, + 162, 162, 162, 162, 162, 162, 162, 162, 163, 163, 163, 163, 163, 163, 163, 163, + 36, 36, 36, 36, 36, 44, 44, 44, 16, 16, 16, 107, 44, 44, 44, 44, + 44, 54, 16, 16, 44, 44, 80, 70, 36, 36, 36, 36, 164, 36, 36, 36, + 36, 36, 36, 62, 36, 36, 62, 62, 36, 80, 62, 36, 36, 36, 36, 36, + 36, 41, 41, 41, 41, 41, 41, 41, 41, 44, 44, 44, 44, 44, 44, 44, + 44, 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 4, + 44, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 159, 44, + 2, 2, 2, 165, 125, 44, 44, 44, 6, 166, 167, 143, 143, 143, 143, 143, + 143, 143, 125, 165, 125, 2, 122, 168, 2, 46, 2, 2, 148, 143, 143, 125, + 2, 169, 8, 144, 66, 2, 44, 44, 36, 36, 62, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 62, 78, 55, 2, 3, 2, 4, 5, 6, 2, + 16, 16, 16, 16, 16, 17, 18, 124, 125, 4, 2, 36, 36, 36, 36, 36, + 68, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 40, + 44, 36, 36, 36, 44, 36, 36, 36, 44, 36, 36, 36, 44, 36, 62, 44, + 20, 170, 88, 129, 26, 8, 139, 90, 44, 44, 44, 44, 78, 64, 67, 44, + 36, 36, 36, 36, 36, 36, 80, 36, 36, 36, 36, 36, 36, 62, 36, 80, + 2, 46, 44, 171, 27, 27, 27, 27, 27, 27, 44, 91, 67, 67, 67, 67, + 101, 101, 138, 27, 89, 67, 67, 67, 67, 67, 67, 67, 67, 96, 44, 44, + 67, 67, 67, 67, 67, 67, 51, 44, 27, 27, 44, 44, 44, 44, 44, 44, + 147, 36, 36, 36, 36, 102, 44, 44, 36, 36, 36, 36, 36, 36, 36, 55, + 36, 36, 44, 44, 36, 36, 36, 36, 172, 101, 101, 44, 44, 44, 44, 44, + 11, 11, 11, 11, 16, 16, 16, 16, 36, 36, 36, 44, 62, 36, 36, 36, + 36, 36, 36, 80, 62, 44, 62, 80, 36, 36, 36, 55, 27, 27, 27, 27, + 36, 36, 36, 27, 27, 27, 44, 55, 36, 36, 36, 36, 36, 44, 44, 55, + 36, 36, 36, 36, 44, 44, 44, 36, 69, 43, 58, 79, 44, 44, 43, 43, + 36, 36, 80, 36, 80, 36, 36, 36, 36, 36, 44, 44, 43, 79, 44, 58, + 27, 27, 27, 27, 44, 44, 44, 44, 2, 2, 2, 2, 46, 44, 44, 44, + 36, 36, 36, 36, 36, 36, 173, 30, 36, 36, 36, 44, 55, 2, 2, 2, + 36, 36, 36, 44, 27, 27, 27, 27, 36, 62, 44, 44, 27, 27, 27, 27, + 36, 36, 36, 36, 62, 44, 44, 44, 27, 27, 27, 27, 27, 27, 27, 96, + 84, 94, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 43, 43, 43, 43, + 43, 43, 43, 61, 2, 2, 2, 44, 44, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 7, 7, 7, 7, 7, 83, 84, 43, 82, 84, 61, 174, 2, + 2, 44, 44, 44, 44, 44, 44, 44, 43, 70, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 69, 43, 43, 84, 43, 43, 43, 79, 7, 7, 7, 7, 7, + 2, 2, 44, 44, 44, 44, 44, 44, 36, 93, 83, 43, 43, 43, 43, 82, + 94, 36, 63, 2, 46, 44, 44, 44, 36, 36, 36, 36, 36, 69, 84, 83, + 43, 43, 43, 84, 44, 44, 44, 44, 101, 102, 44, 44, 44, 44, 44, 44, + 93, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 87, + 44, 44, 44, 44, 44, 44, 44, 58, 43, 73, 40, 40, 40, 40, 40, 40, + 36, 44, 44, 44, 44, 44, 44, 44, 67, 67, 67, 90, 91, 67, 67, 67, + 67, 67, 175, 84, 43, 67, 175, 83, 83, 176, 64, 64, 64, 177, 43, 43, + 43, 75, 51, 43, 43, 43, 67, 67, 67, 67, 67, 67, 67, 43, 43, 67, + 67, 67, 67, 67, 67, 67, 67, 44, 67, 43, 75, 44, 44, 44, 44, 44, + 27, 44, 44, 44, 44, 44, 44, 44, 11, 11, 11, 11, 11, 16, 16, 16, + 16, 16, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, + 16, 16, 107, 16, 16, 16, 16, 16, 11, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 48, 11, 44, 48, 49, 48, 49, 11, 48, 11, + 11, 11, 11, 16, 16, 54, 54, 16, 16, 16, 54, 16, 16, 16, 16, 16, + 16, 16, 11, 49, 11, 48, 49, 11, 11, 11, 48, 11, 11, 11, 48, 16, + 16, 16, 16, 16, 11, 49, 11, 48, 11, 11, 48, 48, 44, 11, 11, 11, + 48, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 11, 11, + 11, 11, 11, 16, 16, 16, 16, 16, 16, 16, 16, 44, 11, 11, 11, 11, + 31, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 33, 16, 16, + 16, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 31, 16, 16, + 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 31, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 31, 16, 16, 16, 16, 33, 16, 16, 16, + 11, 11, 11, 11, 31, 16, 16, 16, 16, 33, 16, 16, 16, 32, 44, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 36, 36, 80, 36, 36, 36, 36, 36, + 80, 62, 62, 80, 80, 36, 36, 36, 36, 62, 36, 36, 80, 80, 44, 44, + 44, 62, 44, 80, 80, 80, 80, 36, 80, 62, 62, 80, 80, 80, 80, 80, + 80, 62, 62, 80, 36, 62, 36, 36, 36, 62, 36, 36, 80, 36, 62, 62, + 36, 36, 36, 36, 36, 80, 36, 36, 80, 36, 80, 36, 36, 80, 36, 36, + 8, 44, 44, 44, 44, 44, 44, 44, 91, 67, 67, 67, 67, 67, 67, 90, + 27, 27, 27, 27, 27, 96, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, + 67, 90, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 90, 44, 44, 44, + 67, 44, 44, 44, 44, 44, 44, 44, 90, 44, 44, 44, 44, 44, 44, 44, + 67, 67, 67, 91, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 90, 44, + 67, 67, 90, 67, 67, 90, 44, 44, 90, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 91, 67, 90, 44, 67, 67, 67, 67, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 91, 67, 67, 90, 44, 91, 67, 67, 67, 67, 67, + 78, 44, 44, 44, 44, 44, 44, 44, 64, 64, 64, 64, 64, 64, 64, 64, + 163, 163, 163, 163, 163, 163, 163, 44, +}; + +static RE_UINT8 re_general_category_stage_5[] = { + 15, 15, 12, 23, 23, 23, 25, 23, 20, 21, 23, 24, 23, 19, 9, 9, + 24, 24, 24, 23, 23, 1, 1, 1, 1, 20, 23, 21, 26, 22, 26, 2, + 2, 2, 2, 20, 24, 21, 24, 15, 25, 25, 27, 23, 26, 27, 5, 28, + 24, 16, 27, 26, 27, 24, 11, 11, 26, 11, 5, 29, 11, 23, 1, 24, + 1, 2, 2, 24, 2, 1, 2, 5, 5, 5, 1, 3, 3, 2, 5, 2, + 4, 4, 26, 26, 4, 26, 6, 6, 0, 0, 4, 2, 23, 0, 1, 23, + 1, 0, 0, 1, 24, 1, 27, 6, 7, 7, 0, 4, 0, 2, 0, 23, + 19, 0, 0, 25, 0, 6, 19, 6, 23, 6, 6, 23, 5, 0, 5, 23, + 16, 16, 16, 0, 23, 25, 27, 27, 4, 5, 5, 6, 6, 5, 23, 5, + 6, 16, 6, 4, 4, 6, 6, 27, 5, 27, 27, 5, 0, 16, 6, 0, + 0, 5, 4, 0, 6, 8, 8, 8, 8, 6, 23, 4, 0, 8, 8, 0, + 27, 25, 11, 27, 27, 0, 0, 27, 23, 27, 5, 8, 8, 5, 23, 11, + 11, 0, 19, 5, 12, 5, 5, 20, 21, 0, 10, 10, 10, 0, 19, 23, + 5, 4, 2, 4, 3, 3, 2, 0, 3, 26, 2, 26, 0, 26, 1, 26, + 26, 0, 12, 12, 12, 16, 19, 19, 28, 29, 20, 28, 13, 14, 16, 12, + 23, 28, 29, 23, 23, 22, 22, 23, 24, 20, 21, 23, 23, 12, 11, 4, + 21, 4, 25, 0, 6, 7, 7, 6, 1, 27, 27, 1, 27, 2, 2, 27, + 10, 1, 2, 10, 10, 11, 24, 27, 27, 20, 21, 27, 21, 24, 21, 20, + 24, 0, 2, 6, 27, 4, 5, 10, 19, 20, 21, 21, 27, 10, 19, 4, + 10, 4, 6, 26, 26, 4, 27, 11, 4, 23, 7, 23, 26, 1, 25, 27, + 8, 23, 4, 8, 18, 18, 17, 17, 5, 24, 23, 20, 19, 22, 22, 20, + 22, 22, 24, 19, 24, 26, 0, 11, 23, 10, 5, 11, 23, 16, 27, 8, + 8, 16, 16, 6, +}; + +/* General_Category: 8556 bytes. */ + +RE_UINT32 re_get_general_category(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 11; + code = ch ^ (f << 11); + pos = (RE_UINT32)re_general_category_stage_1[f] << 4; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_general_category_stage_2[pos + f] << 3; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_general_category_stage_3[pos + f] << 3; + f = code >> 1; + code ^= f << 1; + pos = (RE_UINT32)re_general_category_stage_4[pos + f] << 1; + value = re_general_category_stage_5[pos + code]; + + return value; +} + +/* Block. */ + +static RE_UINT8 re_block_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 10, 11, 12, 12, 12, 12, 13, 14, 15, 15, 15, 16, + 17, 18, 19, 20, 21, 20, 22, 20, 20, 20, 20, 20, 20, 23, 20, 20, + 20, 20, 20, 20, 20, 20, 24, 20, 20, 20, 25, 20, 20, 26, 27, 20, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 29, 30, 31, 32, 20, 20, 20, 20, 20, 20, 20, 33, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 34, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, +}; + +static RE_UINT8 re_block_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 10, 11, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, + 29, 30, 31, 31, 32, 32, 32, 33, 34, 34, 34, 34, 34, 35, 36, 37, + 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 50, 51, 51, + 52, 53, 54, 55, 56, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, + 65, 65, 66, 67, 68, 68, 69, 69, 70, 71, 72, 73, 74, 75, 76, 77, + 78, 79, 80, 81, 82, 82, 83, 83, 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 85, 86, 86, 86, 86, + 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, + 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, 89, 89, 90, 91, 92, 93, + 94, 95, 96, 97, 98, 99, 100, 101, 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 103, + 104, 104, 104, 104, 104, 104, 104, 105, 106, 106, 106, 106, 106, 106, 106, 106, + 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, + 107, 107, 108, 108, 108, 108, 109, 110, 110, 110, 110, 110, 111, 112, 113, 114, + 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 119, 119, 119, 119, 119, 119, + 125, 119, 126, 127, 128, 119, 129, 119, 130, 119, 119, 119, 131, 119, 119, 119, + 132, 133, 134, 135, 119, 119, 119, 119, 119, 119, 119, 119, 119, 136, 119, 119, + 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 137, 137, 137, 137, 137, 137, 137, 137, 138, 119, 119, 119, 119, 119, 119, 119, + 139, 139, 139, 139, 139, 139, 139, 139, 140, 119, 119, 119, 119, 119, 119, 119, + 141, 141, 141, 141, 142, 119, 119, 119, 119, 119, 119, 119, 119, 119, 143, 144, + 145, 145, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 146, 146, 147, 147, 148, 119, 149, 119, 150, 150, 150, 150, 150, 150, 150, 150, + 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 151, 151, 119, 119, + 152, 153, 154, 154, 155, 155, 156, 156, 156, 156, 156, 156, 157, 158, 159, 119, + 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 161, 162, 162, + 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, + 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 163, 164, + 165, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 166, 166, 166, 166, 167, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 168, 119, 169, 170, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, + 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, +}; + +static RE_UINT8 re_block_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 10, + 10, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, + 17, 17, 17, 17, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, + 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, + 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, + 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, + 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 36, 36, + 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 41, 41, 42, 42, 42, 42, 42, 42, + 43, 43, 44, 44, 45, 45, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49, + 50, 50, 50, 50, 50, 51, 51, 51, 52, 52, 52, 52, 52, 52, 53, 53, + 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 55, 19, 19, 19, 19, 19, + 56, 56, 56, 56, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, + 59, 59, 59, 59, 59, 60, 60, 60, 19, 19, 19, 19, 61, 62, 62, 62, + 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 65, 65, 65, 65, + 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, + 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, + 72, 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, + 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, + 77, 77, 77, 77, 78, 78, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, + 80, 80, 80, 80, 80, 80, 80, 80, 81, 81, 82, 82, 82, 82, 82, 82, + 83, 83, 83, 83, 83, 83, 83, 83, 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 85, 85, 85, 86, 87, 87, 87, 87, 87, 87, 87, 87, + 88, 88, 88, 88, 88, 88, 88, 88, 89, 89, 89, 89, 89, 89, 89, 89, + 90, 90, 90, 90, 90, 90, 90, 90, 91, 91, 91, 91, 91, 91, 91, 91, + 92, 92, 92, 92, 92, 92, 93, 93, 94, 94, 94, 94, 94, 94, 94, 94, + 95, 95, 95, 96, 96, 96, 96, 96, 97, 97, 97, 97, 97, 97, 98, 98, + 99, 99, 99, 99, 99, 99, 99, 99, 100, 100, 100, 100, 100, 100, 100, 100, + 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 19, 102, + 103, 103, 103, 103, 104, 104, 104, 104, 104, 104, 105, 105, 105, 105, 105, 105, + 106, 106, 106, 107, 107, 107, 107, 107, 107, 108, 109, 109, 110, 110, 110, 111, + 112, 112, 112, 112, 112, 112, 112, 112, 113, 113, 113, 113, 113, 113, 113, 113, + 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 115, 115, 115, 115, + 116, 116, 116, 116, 116, 116, 116, 116, 117, 117, 117, 117, 117, 117, 117, 117, + 117, 118, 118, 118, 118, 119, 119, 119, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 121, 121, 121, 121, 121, 121, 122, 122, 122, 122, 122, 122, + 123, 123, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 125, 125, 125, 126, 127, 127, 127, 127, 128, 128, 128, 128, 128, 128, 129, 129, + 130, 130, 130, 131, 131, 131, 132, 132, 133, 133, 133, 133, 133, 133, 19, 19, + 134, 134, 134, 134, 134, 134, 135, 135, 136, 136, 136, 136, 136, 136, 137, 137, + 138, 138, 138, 19, 19, 19, 19, 19, 19, 19, 19, 19, 139, 139, 139, 139, + 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 141, 141, 141, 141, 141, + 142, 142, 142, 142, 142, 142, 142, 142, 143, 143, 143, 143, 143, 143, 143, 143, + 144, 144, 144, 144, 144, 144, 144, 144, 145, 145, 145, 145, 145, 145, 145, 145, + 146, 146, 146, 146, 146, 146, 146, 146, 147, 147, 147, 147, 147, 148, 148, 148, + 148, 148, 148, 148, 148, 148, 148, 148, 149, 150, 151, 152, 152, 153, 153, 154, + 154, 154, 154, 154, 154, 154, 154, 154, 155, 155, 155, 155, 155, 155, 155, 155, + 155, 155, 155, 155, 155, 155, 155, 156, 157, 157, 157, 157, 157, 157, 157, 157, + 158, 158, 158, 158, 158, 158, 158, 158, 159, 159, 159, 159, 160, 160, 160, 160, + 160, 161, 161, 161, 161, 162, 162, 162, 19, 19, 19, 19, 19, 19, 19, 19, + 163, 163, 164, 164, 164, 164, 19, 19, 165, 165, 165, 166, 166, 19, 19, 19, + 167, 167, 168, 168, 168, 168, 19, 19, 169, 169, 169, 169, 169, 170, 170, 170, + 171, 171, 171, 19, 19, 19, 19, 19, 172, 172, 172, 172, 173, 173, 19, 19, + 174, 174, 175, 175, 19, 19, 19, 19, 176, 176, 177, 177, 177, 177, 177, 177, + 178, 178, 178, 178, 178, 178, 179, 179, 180, 180, 180, 180, 181, 181, 182, 182, + 183, 183, 183, 183, 183, 19, 19, 19, 19, 19, 19, 19, 19, 19, 184, 184, + 185, 185, 185, 185, 185, 185, 185, 185, 186, 186, 186, 186, 186, 187, 187, 187, + 188, 188, 188, 188, 188, 19, 19, 19, 189, 189, 189, 189, 189, 189, 19, 19, + 190, 190, 190, 190, 190, 19, 19, 19, 191, 191, 191, 191, 191, 191, 191, 191, + 192, 192, 192, 192, 192, 192, 192, 192, 193, 193, 193, 193, 193, 193, 193, 193, + 193, 193, 193, 19, 19, 19, 19, 19, 194, 194, 194, 194, 194, 194, 194, 194, + 194, 194, 194, 194, 19, 19, 19, 19, 195, 195, 195, 195, 195, 195, 195, 195, + 195, 195, 19, 19, 19, 19, 19, 19, 196, 196, 196, 196, 196, 196, 196, 196, + 197, 197, 197, 197, 197, 197, 197, 197, 198, 198, 198, 198, 198, 198, 198, 198, + 199, 199, 199, 199, 199, 19, 19, 19, 200, 200, 200, 200, 200, 200, 201, 201, + 202, 202, 202, 202, 202, 202, 202, 202, 203, 203, 203, 203, 203, 203, 203, 203, + 204, 204, 204, 205, 205, 205, 205, 205, 205, 205, 206, 206, 206, 206, 206, 206, + 207, 207, 207, 207, 207, 207, 207, 207, 208, 208, 208, 208, 208, 208, 208, 208, + 209, 209, 209, 209, 209, 209, 209, 209, 210, 210, 210, 210, 210, 19, 19, 19, + 211, 211, 211, 211, 211, 211, 211, 211, 212, 212, 212, 212, 212, 212, 212, 212, + 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 19, 19, + 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 215, 215, 215, 215, + 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 19, 19, 19, 19, 19, 19, + 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 19, 19, 19, 19, 19, 19, + 217, 217, 217, 217, 217, 217, 217, 217, 218, 218, 218, 218, 218, 218, 218, 218, + 218, 218, 218, 218, 218, 218, 218, 19, 219, 219, 219, 219, 219, 219, 219, 219, + 220, 220, 220, 220, 220, 220, 220, 220, +}; + +static RE_UINT8 re_block_stage_4[] = { + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, + 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, + 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, + 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, + 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, + 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, + 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, + 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, + 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, + 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, + 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, + 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, + 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, + 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, + 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 71, 71, 71, 71, + 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, + 76, 76, 76, 76, 77, 77, 77, 77, 78, 78, 78, 78, 79, 79, 79, 79, + 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, + 84, 84, 84, 84, 85, 85, 85, 85, 86, 86, 86, 86, 87, 87, 87, 87, + 88, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 91, 91, 91, 91, + 92, 92, 92, 92, 93, 93, 93, 93, 94, 94, 94, 94, 95, 95, 95, 95, + 96, 96, 96, 96, 97, 97, 97, 97, 98, 98, 98, 98, 99, 99, 99, 99, + 100, 100, 100, 100, 101, 101, 101, 101, 102, 102, 102, 102, 103, 103, 103, 103, + 104, 104, 104, 104, 105, 105, 105, 105, 106, 106, 106, 106, 107, 107, 107, 107, + 108, 108, 108, 108, 109, 109, 109, 109, 110, 110, 110, 110, 111, 111, 111, 111, + 112, 112, 112, 112, 113, 113, 113, 113, 114, 114, 114, 114, 115, 115, 115, 115, + 116, 116, 116, 116, 117, 117, 117, 117, 118, 118, 118, 118, 119, 119, 119, 119, + 120, 120, 120, 120, 121, 121, 121, 121, 122, 122, 122, 122, 123, 123, 123, 123, + 124, 124, 124, 124, 125, 125, 125, 125, 126, 126, 126, 126, 127, 127, 127, 127, + 128, 128, 128, 128, 129, 129, 129, 129, 130, 130, 130, 130, 131, 131, 131, 131, + 132, 132, 132, 132, 133, 133, 133, 133, 134, 134, 134, 134, 135, 135, 135, 135, + 136, 136, 136, 136, 137, 137, 137, 137, 138, 138, 138, 138, 139, 139, 139, 139, + 140, 140, 140, 140, 141, 141, 141, 141, 142, 142, 142, 142, 143, 143, 143, 143, + 144, 144, 144, 144, 145, 145, 145, 145, 146, 146, 146, 146, 147, 147, 147, 147, + 148, 148, 148, 148, 149, 149, 149, 149, 150, 150, 150, 150, 151, 151, 151, 151, + 152, 152, 152, 152, 153, 153, 153, 153, 154, 154, 154, 154, 155, 155, 155, 155, + 156, 156, 156, 156, 157, 157, 157, 157, 158, 158, 158, 158, 159, 159, 159, 159, + 160, 160, 160, 160, 161, 161, 161, 161, 162, 162, 162, 162, 163, 163, 163, 163, + 164, 164, 164, 164, 165, 165, 165, 165, 166, 166, 166, 166, 167, 167, 167, 167, + 168, 168, 168, 168, 169, 169, 169, 169, 170, 170, 170, 170, 171, 171, 171, 171, + 172, 172, 172, 172, 173, 173, 173, 173, 174, 174, 174, 174, 175, 175, 175, 175, + 176, 176, 176, 176, 177, 177, 177, 177, 178, 178, 178, 178, 179, 179, 179, 179, + 180, 180, 180, 180, 181, 181, 181, 181, 182, 182, 182, 182, 183, 183, 183, 183, + 184, 184, 184, 184, 185, 185, 185, 185, 186, 186, 186, 186, 187, 187, 187, 187, + 188, 188, 188, 188, 189, 189, 189, 189, 190, 190, 190, 190, 191, 191, 191, 191, + 192, 192, 192, 192, 193, 193, 193, 193, 194, 194, 194, 194, 195, 195, 195, 195, + 196, 196, 196, 196, 197, 197, 197, 197, 198, 198, 198, 198, 199, 199, 199, 199, + 200, 200, 200, 200, 201, 201, 201, 201, 202, 202, 202, 202, 203, 203, 203, 203, + 204, 204, 204, 204, 205, 205, 205, 205, 206, 206, 206, 206, 207, 207, 207, 207, + 208, 208, 208, 208, 209, 209, 209, 209, 210, 210, 210, 210, 211, 211, 211, 211, + 212, 212, 212, 212, 213, 213, 213, 213, 214, 214, 214, 214, 215, 215, 215, 215, + 216, 216, 216, 216, 217, 217, 217, 217, 218, 218, 218, 218, 219, 219, 219, 219, + 220, 220, 220, 220, +}; + +static RE_UINT8 re_block_stage_5[] = { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, + 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, + 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, + 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, + 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 0, 0, 0, 0, + 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, + 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, + 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, + 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, + 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, + 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, + 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, + 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, + 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, + 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, + 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, + 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, + 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 71, 71, 71, 71, + 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, + 76, 76, 76, 76, 77, 77, 77, 77, 78, 78, 78, 78, 79, 79, 79, 79, + 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, + 84, 84, 84, 84, 85, 85, 85, 85, 86, 86, 86, 86, 87, 87, 87, 87, + 88, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 91, 91, 91, 91, + 92, 92, 92, 92, 93, 93, 93, 93, 94, 94, 94, 94, 95, 95, 95, 95, + 96, 96, 96, 96, 97, 97, 97, 97, 98, 98, 98, 98, 99, 99, 99, 99, + 100, 100, 100, 100, 101, 101, 101, 101, 102, 102, 102, 102, 103, 103, 103, 103, + 104, 104, 104, 104, 105, 105, 105, 105, 106, 106, 106, 106, 107, 107, 107, 107, + 108, 108, 108, 108, 109, 109, 109, 109, 110, 110, 110, 110, 111, 111, 111, 111, + 112, 112, 112, 112, 113, 113, 113, 113, 114, 114, 114, 114, 115, 115, 115, 115, + 116, 116, 116, 116, 117, 117, 117, 117, 118, 118, 118, 118, 119, 119, 119, 119, + 120, 120, 120, 120, 121, 121, 121, 121, 122, 122, 122, 122, 123, 123, 123, 123, + 124, 124, 124, 124, 125, 125, 125, 125, 126, 126, 126, 126, 127, 127, 127, 127, + 128, 128, 128, 128, 129, 129, 129, 129, 130, 130, 130, 130, 131, 131, 131, 131, + 132, 132, 132, 132, 133, 133, 133, 133, 134, 134, 134, 134, 135, 135, 135, 135, + 136, 136, 136, 136, 137, 137, 137, 137, 138, 138, 138, 138, 139, 139, 139, 139, + 140, 140, 140, 140, 141, 141, 141, 141, 142, 142, 142, 142, 143, 143, 143, 143, + 144, 144, 144, 144, 145, 145, 145, 145, 146, 146, 146, 146, 147, 147, 147, 147, + 148, 148, 148, 148, 149, 149, 149, 149, 150, 150, 150, 150, 151, 151, 151, 151, + 152, 152, 152, 152, 153, 153, 153, 153, 154, 154, 154, 154, 155, 155, 155, 155, + 156, 156, 156, 156, 157, 157, 157, 157, 158, 158, 158, 158, 159, 159, 159, 159, + 160, 160, 160, 160, 161, 161, 161, 161, 162, 162, 162, 162, 163, 163, 163, 163, + 164, 164, 164, 164, 165, 165, 165, 165, 166, 166, 166, 166, 167, 167, 167, 167, + 168, 168, 168, 168, 169, 169, 169, 169, 170, 170, 170, 170, 171, 171, 171, 171, + 172, 172, 172, 172, 173, 173, 173, 173, 174, 174, 174, 174, 175, 175, 175, 175, + 176, 176, 176, 176, 177, 177, 177, 177, 178, 178, 178, 178, 179, 179, 179, 179, + 180, 180, 180, 180, 181, 181, 181, 181, 182, 182, 182, 182, 183, 183, 183, 183, + 184, 184, 184, 184, 185, 185, 185, 185, 186, 186, 186, 186, 187, 187, 187, 187, + 188, 188, 188, 188, 189, 189, 189, 189, 190, 190, 190, 190, 191, 191, 191, 191, + 192, 192, 192, 192, 193, 193, 193, 193, 194, 194, 194, 194, 195, 195, 195, 195, + 196, 196, 196, 196, 197, 197, 197, 197, 198, 198, 198, 198, 199, 199, 199, 199, + 200, 200, 200, 200, 201, 201, 201, 201, 202, 202, 202, 202, 203, 203, 203, 203, + 204, 204, 204, 204, 205, 205, 205, 205, 206, 206, 206, 206, 207, 207, 207, 207, + 208, 208, 208, 208, 209, 209, 209, 209, 210, 210, 210, 210, 211, 211, 211, 211, + 212, 212, 212, 212, 213, 213, 213, 213, 214, 214, 214, 214, 215, 215, 215, 215, + 216, 216, 216, 216, 217, 217, 217, 217, 218, 218, 218, 218, 219, 219, 219, 219, + 220, 220, 220, 220, +}; + +/* Block: 4288 bytes. */ + +RE_UINT32 re_get_block(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 11; + code = ch ^ (f << 11); + pos = (RE_UINT32)re_block_stage_1[f] << 4; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_block_stage_2[pos + f] << 3; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_block_stage_3[pos + f] << 2; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_block_stage_4[pos + f] << 2; + value = re_block_stage_5[pos + code]; + + return value; +} + +/* Script. */ + +static RE_UINT8 re_script_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 12, 12, 12, 12, 13, 14, 14, 14, 14, 15, + 16, 17, 18, 14, 19, 14, 20, 14, 14, 14, 14, 14, 14, 21, 14, 14, + 14, 14, 14, 14, 14, 14, 22, 14, 14, 14, 23, 14, 14, 24, 25, 14, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 26, 7, 27, 28, 14, 14, 14, 14, 14, 14, 14, 29, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 30, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +}; + +static RE_UINT8 re_script_stage_2[] = { + 0, 1, 2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 32, 33, 34, 35, 36, 37, 37, 37, 37, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 2, 2, 53, 54, + 55, 56, 57, 58, 59, 59, 59, 60, 61, 59, 59, 59, 59, 59, 62, 59, + 63, 63, 59, 59, 59, 59, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, + 74, 75, 76, 77, 78, 79, 80, 59, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 81, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 82, + 83, 83, 83, 83, 83, 83, 83, 83, 83, 84, 85, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 98, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 72, 72, 99, 100, 101, 102, 103, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 65, 114, 115, 116, 117, 118, 65, 65, 65, 65, 65, 65, + 119, 65, 120, 121, 122, 65, 123, 65, 124, 65, 65, 65, 125, 65, 65, 65, + 126, 127, 128, 129, 65, 65, 65, 65, 65, 65, 65, 65, 65, 130, 65, 65, + 131, 131, 131, 131, 131, 131, 132, 65, 133, 65, 65, 65, 65, 65, 65, 65, + 134, 134, 134, 134, 134, 134, 134, 134, 135, 65, 65, 65, 65, 65, 65, 65, + 136, 136, 136, 136, 137, 65, 65, 65, 65, 65, 65, 65, 65, 65, 138, 139, + 140, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 59, 141, 142, 143, 144, 65, 145, 65, 146, 147, 148, 59, 59, 149, 59, 150, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 151, 152, 65, 65, + 153, 154, 155, 156, 157, 65, 158, 159, 160, 161, 162, 163, 164, 165, 60, 65, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 166, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 167, 72, + 168, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 72, 72, 72, 72, 168, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 169, 65, 170, 171, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, +}; + +static RE_UINT16 re_script_stage_3[] = { + 0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 3, 3, 4, 5, 4, 5, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 0, 0, 7, 0, + 8, 8, 8, 8, 8, 8, 8, 9, 10, 11, 12, 11, 11, 11, 13, 11, + 14, 14, 14, 14, 14, 14, 14, 14, 15, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 16, 17, 18, 19, 17, 18, 20, 21, 22, 22, 23, 22, 24, 25, + 26, 27, 28, 28, 29, 30, 31, 32, 28, 28, 28, 28, 28, 33, 28, 28, + 34, 35, 35, 35, 36, 28, 28, 28, 37, 37, 37, 38, 39, 39, 39, 40, + 41, 41, 42, 43, 44, 45, 46, 46, 46, 46, 47, 46, 46, 46, 48, 49, + 50, 50, 50, 50, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, + 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, + 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 46, 124, + 125, 126, 126, 127, 126, 128, 46, 46, 129, 130, 131, 132, 133, 134, 46, 46, + 135, 135, 135, 135, 136, 135, 137, 138, 135, 136, 135, 139, 139, 140, 46, 46, + 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 142, 142, 143, 142, 142, 144, + 145, 145, 145, 145, 145, 145, 145, 145, 146, 146, 146, 146, 147, 148, 146, 146, + 147, 146, 146, 149, 150, 151, 146, 146, 146, 150, 146, 146, 146, 152, 146, 153, + 146, 154, 155, 155, 155, 155, 155, 156, 157, 157, 157, 157, 157, 157, 157, 157, + 158, 159, 160, 160, 160, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, + 171, 171, 171, 171, 171, 172, 173, 173, 174, 175, 176, 176, 176, 176, 176, 177, + 176, 176, 178, 157, 157, 157, 157, 179, 180, 181, 182, 182, 183, 184, 185, 186, + 187, 187, 188, 187, 189, 190, 171, 171, 191, 192, 193, 193, 193, 194, 193, 195, + 196, 196, 197, 46, 46, 46, 46, 46, 198, 198, 198, 198, 199, 198, 198, 200, + 201, 201, 201, 201, 202, 202, 202, 203, 204, 204, 204, 205, 206, 207, 207, 207, + 46, 46, 46, 46, 208, 209, 210, 211, 4, 4, 212, 4, 4, 213, 214, 215, + 4, 4, 4, 216, 8, 8, 217, 218, 11, 219, 11, 11, 219, 220, 11, 221, + 11, 11, 11, 222, 222, 223, 11, 224, 225, 0, 0, 0, 0, 0, 226, 227, + 228, 229, 0, 230, 46, 8, 8, 231, 0, 0, 232, 233, 234, 0, 4, 4, + 235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 236, 0, 0, 237, 46, 230, 46, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 239, 239, 239, 239, 239, 239, 239, 239, + 0, 0, 0, 0, 240, 241, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, + 242, 242, 243, 242, 242, 243, 4, 4, 244, 244, 244, 244, 244, 244, 244, 245, + 142, 142, 143, 246, 246, 246, 247, 248, 146, 249, 250, 250, 250, 250, 14, 14, + 0, 0, 0, 251, 46, 46, 46, 46, 252, 253, 252, 252, 252, 252, 252, 254, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 255, 46, 251, + 256, 0, 257, 258, 259, 260, 260, 260, 260, 261, 262, 263, 263, 263, 263, 264, + 265, 266, 267, 268, 145, 145, 145, 145, 269, 0, 266, 270, 0, 0, 236, 263, + 145, 269, 0, 0, 0, 0, 145, 271, 0, 0, 0, 0, 0, 263, 263, 272, + 263, 263, 263, 263, 263, 273, 0, 0, 252, 252, 252, 255, 0, 0, 0, 0, + 252, 252, 252, 252, 274, 46, 46, 46, 275, 275, 275, 275, 275, 275, 275, 275, + 276, 275, 275, 275, 277, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, + 279, 279, 280, 46, 14, 14, 14, 14, 14, 281, 282, 282, 282, 282, 282, 283, + 0, 0, 284, 4, 4, 4, 4, 4, 285, 286, 287, 46, 46, 46, 46, 288, + 289, 289, 290, 241, 291, 291, 291, 292, 293, 293, 293, 293, 294, 295, 50, 296, + 297, 297, 297, 298, 298, 299, 145, 300, 301, 301, 301, 301, 302, 303, 46, 46, + 304, 304, 304, 305, 306, 307, 141, 308, 309, 309, 309, 309, 310, 311, 312, 313, + 314, 315, 250, 46, 46, 46, 46, 46, 46, 46, 46, 46, 312, 312, 316, 317, + 145, 145, 318, 145, 319, 145, 145, 320, 252, 252, 252, 252, 252, 252, 321, 252, + 252, 252, 252, 252, 252, 322, 46, 46, 323, 324, 22, 325, 326, 28, 28, 28, + 28, 28, 28, 28, 327, 328, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 329, 46, 28, 28, 28, 28, 330, 28, 28, 331, 46, 46, 332, + 8, 241, 217, 0, 0, 333, 334, 335, 28, 28, 28, 28, 28, 28, 28, 336, + 238, 0, 1, 2, 1, 2, 337, 262, 263, 338, 145, 269, 339, 340, 341, 342, + 343, 344, 345, 346, 347, 347, 46, 46, 344, 344, 344, 344, 344, 344, 344, 348, + 349, 0, 0, 350, 11, 11, 11, 11, 351, 251, 46, 46, 46, 0, 0, 352, + 353, 354, 355, 355, 355, 356, 46, 46, 357, 358, 359, 360, 361, 46, 46, 46, + 362, 363, 364, 364, 365, 366, 46, 46, 367, 367, 367, 367, 367, 368, 368, 368, + 369, 370, 371, 46, 46, 46, 46, 46, 372, 373, 373, 374, 375, 376, 46, 46, + 377, 378, 379, 380, 46, 46, 46, 46, 381, 381, 382, 383, 46, 46, 46, 46, + 384, 385, 386, 387, 388, 389, 390, 390, 391, 391, 391, 392, 393, 394, 395, 396, + 397, 397, 397, 397, 398, 46, 46, 46, 46, 46, 46, 46, 46, 46, 28, 49, + 399, 399, 399, 399, 400, 401, 399, 46, 402, 402, 402, 402, 403, 404, 405, 406, + 407, 407, 407, 408, 409, 46, 46, 46, 410, 410, 410, 410, 411, 412, 46, 46, + 413, 413, 413, 414, 415, 46, 46, 46, 416, 416, 416, 416, 416, 416, 416, 416, + 416, 416, 416, 416, 416, 416, 417, 46, 416, 416, 416, 416, 416, 416, 418, 419, + 420, 420, 420, 420, 420, 420, 420, 420, 420, 420, 421, 46, 46, 46, 46, 46, + 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 422, 46, 46, 46, 46, + 423, 423, 423, 423, 424, 423, 423, 425, 426, 423, 46, 46, 46, 46, 46, 46, + 427, 46, 46, 46, 46, 46, 46, 46, 0, 0, 0, 0, 0, 0, 0, 428, + 0, 0, 429, 0, 0, 0, 430, 431, 432, 0, 433, 0, 0, 434, 46, 46, + 11, 11, 11, 11, 435, 46, 46, 46, 0, 0, 0, 0, 0, 237, 0, 436, + 0, 0, 0, 0, 0, 226, 0, 0, 0, 437, 438, 439, 440, 0, 0, 0, + 441, 442, 0, 443, 444, 445, 0, 0, 0, 0, 446, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 447, 0, 0, 0, 448, 28, 449, 450, 451, 452, 453, 454, + 455, 456, 457, 456, 46, 46, 46, 327, 0, 0, 251, 0, 0, 0, 0, 0, + 0, 236, 228, 458, 238, 238, 46, 46, 230, 0, 228, 0, 0, 0, 251, 0, + 0, 230, 46, 46, 46, 46, 459, 0, 460, 0, 0, 230, 461, 436, 46, 46, + 0, 0, 462, 463, 0, 0, 0, 240, 0, 236, 0, 0, 464, 46, 0, 462, + 0, 0, 0, 228, 445, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 465, + 0, 0, 0, 434, 236, 0, 466, 46, 46, 46, 46, 46, 46, 46, 46, 467, + 0, 0, 0, 0, 468, 46, 46, 46, 0, 0, 0, 0, 428, 46, 46, 46, + 252, 252, 252, 252, 252, 469, 46, 46, 252, 252, 252, 470, 252, 252, 252, 252, + 252, 321, 46, 46, 46, 46, 46, 46, 471, 46, 0, 0, 0, 0, 0, 0, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 46, +}; + +static RE_UINT8 re_script_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, + 2, 2, 2, 2, 3, 0, 0, 0, 2, 2, 3, 0, 0, 4, 0, 0, + 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 7, 6, 8, 6, 6, 9, + 8, 8, 10, 10, 6, 11, 11, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 12, 6, 6, 6, 6, 6, 6, 6, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 16, 14, 14, 14, 14, + 14, 14, 14, 14, 8, 8, 8, 8, 17, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, 17, 18, 18, 18, + 18, 18, 18, 18, 20, 19, 8, 17, 21, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 8, 8, 8, 8, + 22, 22, 22, 22, 22, 23, 8, 8, 22, 22, 23, 8, 8, 8, 8, 8, + 24, 24, 25, 24, 24, 24, 26, 24, 24, 24, 24, 24, 24, 27, 25, 27, + 24, 24, 24, 24, 24, 24, 24, 24, 26, 24, 24, 24, 24, 28, 5, 5, + 5, 5, 5, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0, 24, 24, 24, + 29, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 27, 24, + 30, 30, 30, 30, 30, 30, 30, 31, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 32, 31, 30, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 8, 8, 8, 8, 8, 8, 8, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 35, 8, 8, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 8, 36, 36, 36, 36, 36, 36, 36, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 8, 39, + 8, 8, 8, 8, 8, 8, 8, 8, 25, 24, 24, 24, 24, 24, 25, 8, + 8, 8, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, + 40, 40, 40, 40, 40, 40, 40, 40, 41, 42, 40, 40, 40, 40, 40, 40, + 40, 40, 0, 40, 40, 40, 40, 40, 40, 40, 40, 40, 43, 40, 40, 40, + 44, 45, 44, 45, 45, 45, 46, 44, 46, 44, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 46, 45, 45, 45, 46, 46, 8, 45, 45, 8, 45, 45, + 45, 45, 46, 44, 46, 44, 45, 46, 8, 8, 8, 44, 8, 8, 45, 44, + 45, 45, 8, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 8, 8, + 47, 48, 47, 48, 48, 49, 8, 47, 49, 47, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 49, 48, 48, 48, 49, 48, 47, 49, 48, 8, 49, 48, + 48, 49, 8, 47, 49, 47, 48, 8, 47, 8, 8, 8, 47, 48, 49, 49, + 8, 8, 8, 48, 48, 48, 48, 48, 48, 48, 48, 8, 8, 8, 8, 8, + 50, 51, 50, 51, 51, 51, 51, 50, 51, 50, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 52, 51, 51, 51, 52, 51, 50, 51, 51, 8, 51, 51, + 51, 51, 51, 50, 51, 50, 51, 8, 52, 8, 8, 8, 8, 8, 8, 8, + 51, 51, 8, 51, 51, 51, 51, 51, 51, 8, 8, 8, 8, 8, 8, 8, + 53, 54, 53, 54, 54, 54, 55, 53, 55, 53, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 55, 54, 54, 54, 55, 54, 53, 54, 54, 8, 54, 54, + 54, 54, 55, 53, 55, 53, 54, 8, 8, 8, 8, 54, 8, 8, 54, 53, + 54, 54, 8, 54, 54, 54, 54, 54, 54, 54, 54, 54, 8, 8, 8, 8, + 8, 56, 57, 56, 56, 58, 8, 56, 58, 56, 56, 8, 57, 58, 58, 56, + 8, 57, 58, 8, 56, 58, 8, 56, 56, 56, 56, 56, 56, 8, 8, 56, + 56, 58, 8, 56, 58, 56, 56, 8, 58, 8, 8, 57, 8, 8, 8, 8, + 8, 8, 8, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 58, 8, 8, + 59, 60, 59, 60, 60, 60, 61, 60, 61, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 61, 60, 60, 60, 60, 60, 59, 60, 60, 8, 59, 60, + 60, 60, 61, 60, 61, 60, 60, 8, 8, 8, 59, 61, 60, 8, 8, 8, + 60, 60, 8, 60, 60, 60, 60, 60, 8, 8, 8, 8, 60, 60, 60, 60, + 8, 62, 63, 62, 62, 62, 64, 62, 64, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 64, 62, 62, 62, 62, 62, 63, 62, 62, 8, 62, 62, + 62, 62, 64, 62, 64, 62, 62, 8, 8, 8, 63, 64, 8, 8, 8, 64, + 62, 62, 8, 62, 62, 62, 62, 62, 63, 64, 8, 8, 8, 8, 8, 8, + 8, 65, 66, 65, 65, 65, 67, 65, 67, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 67, 66, 65, + 65, 65, 67, 65, 67, 65, 65, 67, 8, 8, 8, 66, 8, 8, 8, 8, + 65, 65, 8, 65, 65, 65, 65, 65, 65, 65, 65, 8, 66, 65, 65, 65, + 8, 68, 69, 68, 68, 68, 68, 68, 68, 68, 68, 70, 8, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 68, 68, 68, 68, 69, 8, + 68, 68, 68, 70, 8, 70, 8, 69, 68, 68, 70, 70, 68, 68, 68, 68, + 8, 68, 70, 8, 8, 8, 8, 8, 71, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 73, 8, 20, + 72, 72, 72, 72, 72, 72, 8, 8, 74, 75, 75, 74, 75, 75, 74, 8, + 8, 8, 76, 76, 74, 76, 76, 76, 74, 76, 74, 74, 8, 76, 74, 76, + 76, 76, 76, 76, 76, 74, 76, 8, 76, 76, 75, 75, 76, 76, 76, 8, + 76, 76, 76, 76, 76, 8, 76, 76, 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 78, 77, 77, 77, 77, 77, 77, 77, 77, 77, 79, 8, + 78, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 79, 77, + 77, 77, 80, 0, 81, 79, 8, 8, 82, 82, 82, 82, 82, 82, 82, 82, + 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 84, 8, 8, 84, 8, + 83, 83, 83, 83, 83, 85, 83, 83, 86, 86, 86, 86, 86, 86, 86, 86, + 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, 87, 87, 8, + 87, 87, 87, 88, 88, 87, 87, 8, 88, 87, 87, 8, 87, 87, 87, 88, + 88, 87, 87, 8, 87, 87, 87, 87, 87, 87, 87, 88, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 88, 89, 87, 87, 87, 87, 87, 87, 87, 88, 8, + 87, 87, 87, 87, 87, 8, 8, 8, 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 91, 8, 8, 8, 8, 8, 92, 92, 92, 92, 92, 92, 92, 92, + 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 94, 8, + 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 96, 0, 95, + 97, 8, 8, 8, 8, 8, 8, 8, 98, 98, 98, 98, 98, 98, 99, 98, + 98, 98, 99, 8, 8, 8, 8, 8, 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 101, 9, 8, 8, 8, 8, 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 8, 8, 8, 8, 8, 8, 103, 103, 103, 103, 103, 103, 104, 103, + 104, 103, 8, 8, 8, 8, 8, 8, 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 8, 105, 105, 105, 105, 105, 8, 8, 8, + 106, 0, 107, 106, 106, 106, 106, 108, 106, 106, 106, 106, 106, 8, 8, 8, + 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 8, 8, 8, 8, + 106, 106, 106, 106, 106, 108, 8, 8, 92, 92, 92, 8, 8, 8, 8, 8, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 110, 8, + 109, 109, 109, 109, 109, 109, 8, 8, 110, 8, 109, 109, 109, 109, 109, 109, + 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 8, + 111, 111, 112, 8, 8, 8, 8, 8, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 8, 8, 113, 113, 113, 113, 113, 8, 8, 8, + 113, 113, 113, 113, 113, 114, 8, 113, 115, 115, 115, 115, 115, 115, 115, 115, + 115, 115, 115, 115, 115, 115, 8, 115, 116, 116, 116, 116, 116, 116, 116, 116, + 116, 116, 116, 116, 116, 116, 116, 117, 116, 116, 116, 116, 116, 116, 117, 118, + 116, 116, 116, 116, 116, 8, 8, 8, 116, 116, 116, 116, 116, 116, 116, 8, + 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 8, 8, + 119, 119, 119, 119, 119, 119, 120, 8, 121, 121, 121, 121, 121, 121, 121, 121, + 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 8, 8, 8, 8, 122, 122, + 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 8, 124, 123, 123, + 123, 123, 123, 123, 123, 8, 124, 123, 125, 125, 125, 125, 125, 125, 125, 125, + 121, 121, 121, 121, 8, 8, 8, 8, 5, 126, 5, 5, 5, 5, 5, 5, + 126, 5, 5, 5, 126, 0, 127, 0, 0, 0, 126, 9, 8, 8, 8, 8, + 2, 2, 2, 6, 6, 128, 2, 2, 2, 2, 2, 2, 2, 2, 129, 6, + 6, 2, 2, 6, 6, 130, 2, 2, 2, 2, 2, 2, 131, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 129, 5, 5, 5, 132, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 5, 5, 6, 6, 6, 8, 6, 6, 6, 8, + 6, 6, 6, 6, 12, 12, 12, 12, 6, 6, 6, 6, 6, 6, 6, 8, + 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 8, 6, 6, 6, 12, 6, + 8, 6, 11, 6, 6, 6, 6, 11, 0, 0, 0, 0, 0, 0, 5, 0, + 0, 0, 9, 0, 0, 0, 0, 0, 1, 8, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 9, 2, 2, 2, 2, 2, 2, 133, 8, + 0, 0, 0, 0, 0, 9, 8, 8, 132, 8, 8, 8, 8, 8, 8, 8, + 0, 0, 0, 10, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 3, 2, 2, 2, 2, 3, 8, 8, 8, + 0, 0, 8, 8, 8, 8, 8, 8, 0, 0, 0, 9, 8, 8, 8, 8, + 20, 0, 0, 0, 0, 0, 0, 0, 134, 134, 134, 134, 134, 134, 134, 134, + 0, 0, 0, 0, 0, 0, 9, 8, 0, 0, 0, 0, 0, 8, 8, 8, + 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 136, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 8, 8, 137, 13, 13, 13, + 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 8, 8, 8, 139, + 140, 8, 8, 8, 8, 8, 8, 139, 87, 87, 87, 88, 8, 8, 8, 8, + 87, 87, 87, 88, 87, 87, 87, 88, 0, 0, 0, 0, 0, 0, 8, 8, + 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 142, 141, 141, + 141, 141, 8, 8, 8, 8, 8, 8, 141, 141, 141, 8, 8, 8, 8, 8, + 0, 0, 143, 143, 0, 0, 0, 0, 143, 141, 141, 141, 141, 5, 5, 86, + 0, 0, 0, 0, 141, 141, 0, 0, 144, 145, 145, 145, 145, 145, 145, 145, + 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 146, 147, 126, 148, 145, + 149, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 151, 149, 150, 8, 8, 152, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, + 153, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 154, + 4, 4, 4, 4, 4, 155, 8, 8, 86, 86, 86, 86, 86, 86, 86, 156, + 150, 150, 150, 150, 150, 150, 150, 157, 150, 150, 150, 150, 0, 0, 0, 0, + 141, 141, 141, 141, 141, 141, 158, 8, 159, 159, 159, 159, 159, 159, 159, 159, + 159, 159, 159, 159, 159, 159, 160, 8, 159, 159, 159, 160, 8, 8, 8, 8, + 161, 161, 161, 161, 161, 161, 161, 161, 162, 162, 162, 162, 162, 162, 162, 162, + 162, 162, 162, 162, 162, 162, 8, 8, 14, 14, 14, 14, 8, 8, 8, 163, + 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 8, 8, 8, 8, + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 2, 133, + 2, 2, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 133, 8, 8, + 8, 8, 8, 8, 2, 2, 2, 2, 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 8, 8, 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 8, 8, 8, 8, 167, 167, 167, 167, 167, 167, 167, 167, + 167, 167, 168, 8, 8, 8, 8, 167, 167, 167, 167, 167, 167, 8, 8, 8, + 40, 40, 40, 40, 40, 40, 8, 8, 169, 169, 169, 169, 169, 169, 169, 169, + 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 8, 8, 8, 8, 8, 171, + 86, 86, 86, 86, 86, 86, 154, 8, 172, 172, 172, 172, 172, 172, 172, 172, + 172, 172, 172, 172, 172, 172, 172, 20, 172, 172, 172, 172, 172, 8, 8, 172, + 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 174, 8, 8, 8, 8, + 173, 173, 173, 173, 173, 173, 173, 8, 173, 173, 173, 173, 173, 8, 173, 173, + 82, 82, 82, 82, 82, 82, 8, 8, 175, 175, 175, 175, 175, 175, 175, 175, + 175, 176, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 177, 175, 175, + 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 179, 8, 8, 8, 8, + 89, 87, 87, 88, 89, 87, 87, 88, 89, 87, 87, 88, 8, 8, 8, 8, + 178, 178, 178, 178, 178, 178, 178, 8, 178, 178, 178, 178, 178, 8, 8, 8, + 86, 86, 8, 8, 8, 8, 8, 8, 86, 86, 86, 154, 8, 153, 86, 86, + 86, 86, 86, 86, 86, 86, 8, 8, 141, 141, 141, 141, 141, 141, 141, 8, + 141, 141, 141, 141, 141, 8, 8, 8, 2, 2, 2, 133, 8, 8, 8, 8, + 8, 17, 18, 18, 8, 8, 21, 22, 22, 22, 22, 23, 22, 22, 23, 23, + 22, 21, 23, 22, 22, 22, 22, 22, 24, 8, 8, 8, 8, 8, 8, 8, + 8, 180, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, + 8, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 8, 8, 8, 8, + 24, 24, 24, 24, 24, 24, 27, 8, 0, 9, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 9, 0, 0, 8, 8, 24, 24, 25, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 25, 20, 0, 0, 0, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 0, 8, 86, 86, 86, 8, 86, 86, 86, + 8, 86, 86, 86, 8, 86, 154, 8, 0, 0, 0, 9, 0, 0, 0, 9, + 8, 8, 8, 8, 20, 0, 0, 8, 181, 181, 181, 181, 181, 181, 182, 181, + 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 183, 181, 181, 181, 181, + 181, 181, 181, 181, 181, 183, 181, 182, 181, 181, 181, 181, 181, 181, 181, 8, + 181, 181, 181, 181, 181, 183, 8, 8, 0, 9, 8, 20, 0, 0, 0, 0, + 0, 0, 8, 20, 0, 0, 0, 0, 6, 6, 6, 6, 6, 11, 8, 8, + 0, 0, 0, 0, 0, 0, 127, 8, 184, 184, 184, 184, 184, 184, 184, 184, + 184, 184, 184, 184, 184, 184, 185, 8, 186, 186, 186, 186, 186, 186, 186, 186, + 187, 8, 8, 8, 8, 8, 8, 8, 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 189, 188, 188, 8, 8, 8, 8, 8, 8, + 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 191, 8, 8, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 193, + 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 8, 8, 194, 194, 194, 194, + 194, 194, 194, 8, 8, 8, 8, 8, 195, 195, 195, 195, 195, 195, 195, 195, + 196, 196, 196, 196, 196, 196, 196, 196, 197, 197, 197, 197, 197, 197, 197, 197, + 197, 197, 197, 197, 197, 197, 197, 8, 197, 197, 197, 197, 197, 8, 8, 8, + 198, 198, 198, 8, 199, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, + 198, 198, 198, 200, 199, 8, 199, 200, 201, 201, 201, 201, 201, 201, 201, 201, + 201, 201, 201, 202, 201, 201, 201, 201, 203, 203, 203, 203, 203, 203, 203, 203, + 203, 203, 203, 203, 203, 203, 8, 204, 205, 205, 205, 205, 205, 205, 205, 205, + 205, 205, 205, 205, 205, 8, 8, 206, 207, 207, 207, 207, 207, 207, 207, 207, + 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 8, 8, 8, 208, + 209, 209, 210, 211, 8, 8, 209, 209, 209, 209, 210, 209, 210, 209, 209, 209, + 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 8, 8, 209, 211, 8, 210, + 209, 209, 209, 209, 8, 8, 8, 8, 209, 209, 209, 209, 211, 8, 8, 8, + 212, 212, 212, 212, 212, 212, 212, 212, 213, 213, 213, 213, 213, 213, 213, 213, + 213, 213, 213, 8, 214, 213, 213, 213, 215, 215, 215, 215, 215, 215, 215, 215, + 215, 215, 215, 8, 215, 215, 215, 215, 216, 216, 216, 216, 216, 216, 216, 216, + 216, 217, 8, 8, 216, 216, 216, 216, 218, 218, 218, 218, 218, 218, 218, 218, + 218, 218, 218, 218, 219, 8, 8, 8, 220, 220, 220, 220, 220, 220, 220, 220, + 220, 220, 220, 220, 220, 220, 220, 8, 8, 220, 220, 220, 220, 220, 220, 220, + 221, 221, 221, 221, 221, 221, 221, 221, 221, 8, 8, 8, 8, 8, 8, 8, + 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 223, 8, 8, 8, + 222, 222, 222, 222, 222, 8, 8, 8, 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 225, 224, 224, 224, 224, 224, 224, 224, 8, 8, 8, 8, 8, 8, + 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 227, 8, 8, 8, + 226, 226, 226, 226, 226, 8, 8, 8, 228, 228, 228, 228, 228, 228, 228, 228, + 228, 228, 228, 228, 8, 8, 8, 8, 228, 228, 228, 228, 228, 8, 8, 8, + 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 230, + 229, 230, 8, 8, 8, 8, 8, 8, 229, 229, 8, 8, 8, 8, 8, 8, + 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 232, + 164, 164, 164, 164, 233, 8, 8, 8, 234, 234, 234, 234, 234, 234, 234, 234, + 234, 234, 235, 8, 8, 8, 8, 8, 234, 234, 234, 234, 234, 234, 234, 235, + 8, 8, 8, 8, 8, 8, 8, 236, 237, 8, 8, 8, 8, 8, 8, 8, + 0, 0, 0, 8, 8, 8, 8, 8, 0, 0, 0, 9, 20, 0, 0, 0, + 0, 0, 0, 127, 5, 0, 0, 0, 0, 0, 0, 0, 0, 127, 5, 5, + 5, 126, 127, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, + 0, 0, 0, 0, 0, 0, 0, 8, 6, 6, 6, 8, 8, 8, 8, 8, + 0, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 9, 0, + 8, 9, 20, 9, 20, 0, 9, 0, 0, 0, 0, 0, 0, 20, 20, 0, + 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 9, 20, 0, + 0, 0, 9, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 20, 0, 9, + 0, 0, 9, 9, 8, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, + 24, 24, 180, 24, 24, 24, 24, 24, 180, 25, 25, 180, 180, 24, 24, 24, + 24, 25, 24, 24, 180, 180, 8, 8, 8, 25, 8, 180, 180, 180, 180, 24, + 180, 25, 25, 180, 180, 180, 180, 180, 180, 25, 25, 180, 24, 25, 24, 24, + 24, 25, 24, 24, 180, 24, 25, 25, 24, 24, 24, 24, 24, 180, 24, 24, + 24, 24, 24, 24, 24, 24, 8, 8, 180, 24, 180, 24, 24, 180, 24, 24, + 20, 0, 0, 0, 0, 0, 0, 9, 8, 8, 8, 0, 0, 0, 0, 0, + 238, 9, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 9, 8, 8, 8, + 9, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 20, 0, 0, 0, 0, + 0, 0, 9, 0, 0, 9, 8, 8, 0, 0, 0, 0, 20, 0, 9, 8, + 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 20, 0, 0, + 9, 8, 20, 0, 0, 0, 0, 0, 141, 141, 141, 158, 8, 8, 8, 8, + 141, 141, 158, 8, 8, 8, 8, 8, 20, 8, 8, 8, 8, 8, 8, 8, +}; + +static RE_UINT8 re_script_stage_5[] = { + 1, 1, 1, 2, 2, 2, 2, 1, 35, 35, 41, 41, 3, 3, 1, 3, + 0, 0, 1, 0, 3, 1, 3, 0, 0, 3, 55, 55, 4, 4, 4, 41, + 41, 4, 0, 5, 5, 5, 5, 0, 0, 1, 0, 6, 6, 6, 6, 0, + 7, 7, 7, 0, 1, 7, 7, 1, 7, 41, 41, 7, 8, 8, 0, 8, + 8, 0, 9, 9, 66, 66, 66, 0, 82, 82, 82, 0, 95, 95, 95, 0, + 10, 10, 10, 41, 41, 10, 0, 10, 0, 11, 11, 11, 11, 0, 0, 12, + 12, 12, 12, 0, 0, 13, 13, 13, 13, 0, 0, 14, 14, 14, 14, 0, + 15, 15, 0, 15, 15, 0, 0, 16, 16, 16, 16, 0, 17, 17, 0, 17, + 17, 0, 18, 18, 0, 18, 18, 0, 19, 19, 0, 19, 19, 0, 0, 20, + 20, 20, 20, 0, 0, 21, 21, 0, 21, 21, 22, 22, 0, 22, 22, 0, + 22, 1, 1, 22, 23, 23, 24, 24, 0, 24, 24, 1, 25, 25, 26, 26, + 26, 0, 0, 26, 27, 27, 27, 0, 28, 28, 29, 29, 29, 0, 30, 30, + 30, 1, 30, 0, 42, 42, 42, 0, 43, 43, 43, 1, 44, 44, 45, 45, + 45, 0, 31, 31, 32, 32, 32, 1, 32, 0, 46, 46, 46, 0, 47, 47, + 47, 0, 56, 56, 56, 0, 54, 54, 78, 78, 78, 0, 0, 78, 62, 62, + 62, 0, 67, 67, 93, 93, 68, 68, 0, 68, 69, 69, 41, 1, 1, 41, + 3, 4, 2, 3, 3, 2, 4, 2, 41, 0, 2, 0, 53, 53, 57, 57, + 57, 0, 0, 55, 58, 58, 0, 58, 58, 0, 36, 36, 0, 36, 1, 36, + 0, 33, 33, 33, 33, 0, 0, 41, 1, 33, 1, 34, 34, 34, 34, 1, + 0, 35, 0, 25, 25, 0, 35, 0, 25, 1, 34, 0, 36, 0, 37, 37, + 37, 0, 83, 83, 70, 70, 0, 4, 84, 84, 59, 59, 65, 65, 71, 71, + 71, 0, 72, 72, 73, 73, 0, 73, 85, 85, 77, 77, 77, 0, 79, 79, + 79, 0, 0, 79, 86, 86, 86, 0, 0, 7, 48, 48, 0, 48, 48, 0, + 74, 74, 74, 0, 75, 75, 75, 0, 38, 38, 38, 0, 39, 39, 39, 0, + 49, 49, 0, 49, 60, 60, 40, 40, 50, 50, 51, 51, 52, 52, 52, 0, + 0, 52, 87, 87, 0, 87, 64, 64, 0, 64, 76, 76, 0, 76, 98, 98, + 97, 97, 61, 61, 0, 61, 61, 0, 88, 88, 80, 80, 0, 80, 89, 89, + 90, 90, 90, 0, 91, 91, 91, 0, 94, 94, 92, 92, 101, 101, 101, 0, + 96, 96, 96, 0, 100, 100, 100, 0, 102, 102, 63, 63, 63, 0, 81, 81, + 81, 0, 84, 0, 99, 99, 99, 0, 0, 99, 34, 33, 33, 1, +}; + +/* Script: 8046 bytes. */ + +RE_UINT32 re_get_script(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 11; + code = ch ^ (f << 11); + pos = (RE_UINT32)re_script_stage_1[f] << 4; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_script_stage_2[pos + f] << 3; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_script_stage_3[pos + f] << 3; + f = code >> 1; + code ^= f << 1; + pos = (RE_UINT32)re_script_stage_4[pos + f] << 1; + value = re_script_stage_5[pos + code]; + + return value; +} + +/* Word_Break. */ + +static RE_UINT8 re_word_break_stage_1[] = { + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 5, 6, 6, 7, 4, 8, + 9, 10, 11, 12, 4, 4, 13, 4, 4, 4, 4, 14, 4, 15, 16, 17, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 18, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +}; + +static RE_UINT8 re_word_break_stage_2[] = { + 0, 1, 2, 2, 2, 3, 4, 5, 2, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 2, 2, 31, 32, 33, 34, 35, 2, 2, 2, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, 2, 2, 51, 52, + 53, 54, 55, 56, 57, 57, 57, 57, 57, 58, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 59, 60, 61, 62, 63, 57, 57, 57, + 64, 65, 66, 67, 57, 68, 69, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 70, 2, 2, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 83, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 84, 85, 2, 2, 86, 87, 88, 89, 90, 91, + 92, 93, 94, 95, 57, 96, 97, 98, 2, 99, 57, 57, 57, 57, 57, 57, + 100, 57, 101, 102, 103, 57, 104, 57, 105, 57, 57, 57, 57, 57, 57, 57, + 106, 107, 108, 109, 57, 57, 57, 57, 57, 57, 57, 57, 57, 110, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 2, 2, 2, 2, 2, 2, 111, 57, 112, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 2, 2, 2, 2, 2, 2, 2, 2, 113, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 2, 2, 2, 2, 114, 57, 57, 57, 57, 57, 57, 57, 57, 57, 115, 116, + 117, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 118, 119, 120, 57, 57, 57, 121, 122, 123, 2, 2, 124, 125, 126, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 127, 128, 57, 57, + 57, 57, 57, 129, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 130, 57, 131, 132, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, +}; + +static RE_UINT8 re_word_break_stage_3[] = { + 0, 1, 0, 0, 2, 3, 4, 5, 6, 7, 7, 8, 6, 7, 7, 9, + 10, 0, 0, 0, 0, 11, 12, 13, 7, 7, 14, 7, 7, 7, 14, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 15, 7, 16, 0, 17, 18, 0, 0, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 21, + 22, 23, 7, 7, 24, 7, 7, 7, 7, 7, 7, 7, 7, 7, 25, 7, + 26, 27, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 0, 6, 7, 7, 7, 14, 28, 6, 7, 7, 7, + 7, 29, 30, 19, 19, 19, 19, 31, 32, 0, 33, 33, 33, 34, 35, 0, + 36, 37, 19, 38, 7, 7, 7, 7, 7, 39, 19, 19, 4, 40, 41, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 42, 43, 44, 45, 4, 46, + 0, 47, 48, 7, 7, 7, 19, 19, 19, 49, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 50, 19, 51, 0, 4, 52, 7, 7, 7, 39, 53, 54, + 7, 7, 50, 55, 56, 57, 0, 0, 7, 7, 7, 58, 0, 0, 0, 0, + 0, 0, 0, 0, 59, 17, 0, 0, 0, 0, 0, 0, 60, 19, 19, 61, + 62, 7, 7, 7, 7, 7, 7, 63, 19, 19, 64, 7, 65, 4, 6, 6, + 66, 67, 68, 7, 7, 59, 69, 70, 71, 72, 73, 74, 65, 4, 75, 0, + 66, 76, 68, 7, 7, 59, 77, 78, 79, 80, 81, 82, 83, 4, 84, 0, + 66, 25, 24, 7, 7, 59, 85, 70, 31, 86, 87, 0, 65, 4, 0, 0, + 66, 67, 68, 7, 7, 59, 85, 70, 71, 80, 88, 74, 65, 4, 28, 0, + 89, 90, 91, 92, 93, 90, 7, 94, 95, 96, 97, 0, 83, 4, 0, 0, + 66, 20, 59, 7, 7, 59, 98, 99, 100, 96, 101, 75, 65, 4, 0, 0, + 102, 20, 59, 7, 7, 59, 98, 70, 100, 96, 101, 103, 65, 4, 104, 0, + 102, 20, 59, 7, 7, 7, 7, 105, 100, 106, 73, 0, 65, 4, 0, 107, + 102, 7, 14, 107, 7, 7, 24, 108, 14, 109, 110, 19, 0, 0, 111, 0, + 0, 0, 0, 0, 0, 0, 112, 113, 73, 61, 4, 114, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 112, 115, 0, 116, 4, 114, 0, 0, 0, 0, + 87, 0, 0, 117, 4, 114, 118, 119, 7, 6, 7, 7, 7, 17, 30, 19, + 100, 120, 19, 30, 19, 19, 19, 121, 122, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 123, 19, 61, 4, 114, 88, 124, 125, 116, 126, 0, + 127, 31, 4, 128, 7, 7, 7, 7, 25, 129, 7, 7, 7, 7, 7, 130, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 91, 14, 91, 7, 7, 7, 7, + 7, 91, 7, 7, 7, 7, 91, 14, 91, 7, 14, 7, 7, 7, 7, 7, + 7, 7, 91, 7, 7, 7, 7, 7, 7, 7, 7, 131, 0, 0, 0, 0, + 7, 7, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 17, 0, + 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 67, 7, 7, + 6, 7, 7, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 90, 87, 0, + 7, 20, 132, 0, 7, 7, 132, 0, 7, 7, 133, 0, 7, 20, 134, 0, + 0, 0, 0, 0, 0, 0, 60, 19, 19, 19, 135, 136, 4, 114, 0, 0, + 0, 137, 4, 114, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, + 7, 7, 7, 7, 7, 138, 7, 7, 7, 7, 7, 7, 7, 7, 139, 0, + 7, 7, 7, 17, 19, 135, 19, 135, 83, 4, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 19, 19, 140, 117, 4, 114, 0, 0, 0, 0, + 7, 7, 141, 135, 0, 0, 0, 0, 0, 0, 142, 61, 19, 19, 19, 71, + 4, 114, 4, 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 143, 7, 7, 7, 7, 7, 144, 19, 143, 145, 4, 114, 0, 123, 135, 0, + 146, 7, 7, 7, 64, 147, 4, 52, 7, 7, 7, 7, 50, 19, 135, 0, + 7, 7, 7, 7, 144, 19, 19, 0, 4, 148, 4, 52, 7, 7, 7, 139, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 149, 19, 19, 150, 151, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 19, 19, 19, 19, 61, 0, 0, 60, + 7, 7, 139, 139, 7, 7, 7, 7, 139, 139, 7, 152, 7, 7, 7, 139, + 7, 7, 7, 7, 7, 7, 20, 153, 154, 17, 155, 145, 7, 17, 154, 17, + 0, 156, 0, 157, 158, 159, 0, 160, 161, 0, 162, 0, 163, 164, 28, 165, + 0, 0, 7, 17, 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 140, 0, + 166, 107, 108, 167, 18, 168, 7, 169, 170, 171, 0, 0, 7, 7, 7, 7, + 7, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 172, 7, 7, 7, 7, 7, 7, 75, 0, 0, + 7, 7, 7, 7, 7, 14, 7, 7, 7, 7, 7, 14, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 17, 173, 174, 0, + 7, 7, 7, 7, 25, 129, 7, 7, 7, 7, 7, 7, 7, 165, 0, 73, + 7, 7, 14, 0, 14, 14, 14, 14, 14, 14, 14, 14, 19, 19, 19, 19, + 0, 0, 0, 0, 0, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 129, 0, 0, 0, 0, 127, 175, 93, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 176, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 178, + 170, 7, 7, 7, 7, 139, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 14, 0, 0, 7, 7, 7, 9, 0, 0, 0, 0, 0, 0, 177, 177, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 177, 177, 177, 177, 177, 179, + 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 0, 0, 0, 0, 0, + 7, 17, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 139, + 7, 17, 7, 7, 4, 180, 0, 0, 7, 7, 7, 7, 7, 141, 149, 181, + 7, 7, 7, 73, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 117, 0, + 0, 0, 165, 7, 107, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 182, 145, 0, 7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, + 183, 184, 7, 7, 39, 0, 0, 0, 7, 7, 7, 7, 7, 7, 145, 0, + 27, 7, 7, 7, 7, 7, 144, 19, 121, 0, 4, 114, 19, 19, 27, 185, + 4, 52, 7, 7, 50, 116, 7, 7, 141, 19, 135, 0, 7, 7, 7, 17, + 62, 7, 7, 7, 7, 7, 39, 19, 140, 165, 4, 114, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 64, 61, 0, 184, 186, 4, 114, 0, 0, 0, 187, + 0, 0, 0, 0, 0, 0, 125, 188, 81, 0, 0, 0, 7, 39, 189, 0, + 190, 190, 190, 0, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 39, 191, 4, 114, + 7, 7, 7, 7, 145, 0, 7, 7, 14, 192, 7, 7, 7, 7, 7, 145, + 14, 0, 192, 193, 33, 194, 195, 196, 197, 33, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 75, 0, 0, 0, 192, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 139, 0, 0, 7, 7, 7, 7, 7, 7, + 7, 7, 107, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 7, 145, + 19, 19, 198, 0, 61, 0, 199, 0, 0, 200, 201, 0, 0, 0, 20, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 202, + 203, 3, 0, 204, 6, 7, 7, 8, 6, 7, 7, 9, 205, 177, 177, 177, + 177, 177, 177, 206, 7, 7, 7, 14, 107, 107, 107, 207, 0, 0, 0, 208, + 7, 98, 7, 7, 14, 7, 7, 209, 7, 139, 7, 139, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 17, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136, + 7, 7, 7, 17, 7, 7, 7, 7, 7, 7, 87, 0, 0, 0, 0, 0, + 7, 7, 7, 14, 0, 0, 7, 7, 7, 9, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 139, 7, 7, 7, 7, 145, 7, 167, 0, 0, 0, 0, 0, + 7, 7, 7, 139, 4, 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 139, 59, 7, 7, 7, 7, 25, 210, 7, 7, 139, 0, 0, 0, 0, 0, + 7, 7, 139, 0, 7, 7, 7, 75, 0, 0, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 172, 0, 0, 0, 0, 0, 0, 0, 0, + 211, 60, 98, 6, 7, 7, 145, 79, 0, 0, 0, 0, 7, 7, 7, 17, + 7, 7, 7, 7, 7, 7, 139, 0, 7, 7, 139, 0, 7, 7, 9, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 87, 0, 0, 0, 0, 0, 0, + 146, 7, 7, 7, 7, 7, 7, 19, 61, 0, 0, 0, 83, 4, 0, 0, + 146, 7, 7, 7, 7, 7, 19, 212, 0, 0, 7, 7, 7, 87, 4, 114, + 146, 7, 7, 7, 141, 19, 213, 4, 0, 0, 0, 0, 0, 0, 0, 0, + 146, 7, 7, 7, 7, 7, 39, 19, 214, 0, 4, 114, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 39, 19, 0, 4, 114, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 14, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 0, 0, 0, + 7, 7, 7, 7, 7, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 87, 0, 0, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 17, 0, 64, 19, 19, 19, 19, 61, + 0, 73, 146, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 215, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 142, 216, 217, 218, + 219, 135, 0, 0, 0, 220, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 221, 0, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 20, 7, 7, 7, 7, 7, + 7, 7, 7, 20, 222, 223, 7, 224, 98, 7, 7, 7, 7, 7, 7, 7, + 25, 225, 20, 20, 7, 7, 7, 226, 153, 107, 59, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 139, 7, 7, 7, 59, 7, 7, 130, 7, 7, 7, 130, + 7, 7, 20, 7, 7, 7, 20, 7, 7, 14, 7, 7, 7, 14, 7, 7, + 7, 59, 7, 7, 7, 59, 7, 7, 130, 227, 4, 4, 4, 4, 4, 4, + 98, 7, 7, 7, 228, 6, 130, 229, 166, 230, 228, 152, 228, 130, 130, 82, + 7, 24, 7, 145, 231, 24, 7, 145, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 232, 233, 233, 233, + 234, 0, 0, 0, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, 0, +}; + +static RE_UINT8 re_word_break_stage_4[] = { + 0, 0, 1, 2, 3, 4, 0, 5, 6, 6, 7, 0, 8, 9, 9, 9, + 10, 11, 10, 0, 0, 12, 13, 14, 0, 15, 13, 0, 9, 10, 16, 17, + 16, 18, 9, 19, 0, 20, 21, 21, 9, 22, 17, 23, 0, 24, 10, 22, + 25, 9, 9, 25, 26, 21, 27, 9, 28, 0, 29, 0, 30, 21, 21, 31, + 32, 31, 33, 33, 34, 0, 35, 36, 37, 38, 0, 39, 40, 38, 41, 21, + 42, 43, 44, 9, 9, 45, 21, 46, 21, 47, 48, 27, 49, 50, 0, 51, + 52, 9, 40, 8, 9, 53, 54, 0, 49, 9, 21, 16, 55, 0, 56, 21, + 21, 57, 57, 58, 57, 0, 22, 9, 0, 21, 21, 40, 21, 9, 53, 59, + 57, 21, 53, 60, 30, 8, 9, 50, 50, 9, 20, 17, 16, 59, 21, 61, + 61, 62, 0, 63, 0, 25, 16, 0, 10, 64, 22, 65, 16, 48, 40, 63, + 61, 58, 66, 0, 8, 20, 0, 60, 27, 67, 22, 8, 31, 58, 19, 0, + 0, 68, 69, 8, 10, 17, 22, 16, 65, 22, 64, 19, 16, 68, 40, 68, + 48, 58, 19, 63, 9, 8, 16, 45, 21, 48, 0, 32, 68, 8, 0, 13, + 65, 0, 10, 45, 48, 62, 17, 9, 9, 28, 70, 63, 21, 71, 68, 0, + 66, 21, 40, 0, 72, 0, 31, 73, 21, 58, 58, 0, 0, 74, 66, 68, + 9, 57, 21, 73, 0, 70, 63, 21, 58, 68, 48, 61, 30, 73, 68, 21, + 75, 58, 0, 28, 10, 9, 10, 30, 53, 73, 53, 0, 76, 0, 21, 0, + 0, 66, 63, 77, 78, 0, 9, 16, 73, 0, 9, 41, 0, 30, 21, 44, + 9, 21, 9, 0, 79, 9, 21, 27, 72, 8, 40, 21, 44, 52, 53, 80, + 81, 81, 9, 20, 17, 22, 9, 17, 0, 82, 83, 0, 0, 84, 85, 86, + 0, 11, 87, 88, 0, 87, 37, 89, 37, 37, 0, 64, 13, 64, 8, 16, + 22, 25, 16, 9, 0, 8, 16, 13, 0, 17, 64, 41, 27, 0, 90, 91, + 92, 93, 94, 94, 95, 94, 94, 95, 49, 0, 21, 96, 50, 10, 97, 97, + 41, 9, 64, 0, 9, 58, 63, 0, 73, 68, 17, 98, 8, 10, 40, 58, + 64, 9, 0, 99, 100, 33, 33, 34, 33, 101, 102, 100, 103, 88, 11, 87, + 0, 104, 5, 105, 9, 106, 0, 107, 108, 0, 0, 109, 94, 110, 17, 19, + 111, 0, 10, 25, 19, 50, 57, 32, 40, 14, 21, 112, 44, 19, 93, 0, + 58, 30, 113, 37, 114, 21, 40, 30, 68, 58, 68, 73, 13, 65, 8, 22, + 25, 8, 10, 8, 25, 10, 9, 60, 65, 50, 81, 0, 81, 8, 8, 8, + 0, 115, 116, 116, 14, 0, +}; + +static RE_UINT8 re_word_break_stage_5[] = { + 0, 0, 0, 0, 0, 0, 5, 6, 6, 4, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 2, 13, 0, 14, 0, 15, 15, 15, 15, 15, 15, 12, 13, + 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 0, 0, 0, 16, + 0, 6, 0, 0, 0, 0, 11, 0, 0, 9, 0, 0, 0, 11, 0, 12, + 11, 11, 0, 0, 0, 0, 11, 11, 0, 0, 0, 12, 11, 0, 0, 0, + 11, 0, 11, 0, 7, 7, 7, 7, 11, 0, 11, 11, 11, 11, 13, 0, + 0, 0, 11, 12, 11, 11, 0, 11, 11, 11, 0, 7, 7, 7, 11, 11, + 0, 11, 0, 0, 0, 13, 0, 0, 0, 7, 7, 7, 7, 7, 0, 7, + 0, 7, 7, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 11, + 12, 0, 0, 0, 9, 9, 9, 9, 9, 0, 0, 0, 13, 13, 0, 0, + 7, 7, 7, 0, 11, 11, 11, 7, 15, 15, 0, 15, 13, 0, 11, 11, + 7, 11, 11, 11, 0, 11, 7, 7, 7, 9, 0, 7, 7, 11, 11, 7, + 7, 0, 7, 7, 15, 15, 11, 11, 11, 0, 0, 11, 0, 0, 0, 9, + 11, 7, 11, 11, 11, 11, 7, 7, 7, 11, 0, 0, 13, 0, 11, 0, + 7, 7, 11, 7, 11, 7, 7, 7, 7, 7, 0, 0, 7, 11, 7, 7, + 0, 0, 15, 15, 7, 0, 0, 7, 7, 7, 11, 0, 0, 0, 0, 7, + 0, 0, 0, 11, 0, 11, 11, 0, 0, 7, 0, 0, 11, 7, 0, 0, + 0, 0, 7, 7, 0, 0, 7, 11, 0, 0, 7, 0, 7, 0, 7, 0, + 15, 15, 0, 0, 7, 0, 0, 0, 0, 7, 0, 7, 15, 15, 7, 7, + 11, 0, 7, 7, 7, 7, 9, 0, 11, 7, 11, 0, 7, 7, 7, 11, + 7, 11, 11, 0, 0, 11, 0, 11, 7, 7, 9, 9, 14, 14, 0, 0, + 14, 0, 0, 12, 6, 6, 9, 9, 9, 9, 9, 0, 16, 0, 0, 0, + 13, 0, 0, 0, 9, 0, 9, 9, 0, 10, 10, 10, 10, 10, 0, 0, + 0, 7, 7, 10, 10, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 0, + 7, 7, 0, 11, 11, 11, 7, 11, 11, 7, 7, 0, 0, 3, 7, 3, + 3, 0, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 13, 0, 0, 12, + 0, 16, 16, 16, 13, 12, 0, 0, 11, 0, 0, 9, 0, 0, 0, 14, + 0, 0, 12, 13, 0, 0, 10, 10, 10, 10, 7, 7, 0, 9, 9, 9, + 7, 0, 15, 15, 7, 7, 7, 9, 9, 9, 9, 7, 0, 0, 8, 8, + 8, 8, 8, 8, +}; + +/* Word_Break: 3946 bytes. */ + +RE_UINT32 re_get_word_break(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_word_break_stage_1[f] << 5; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_word_break_stage_2[pos + f] << 4; + f = code >> 3; + code ^= f << 3; + pos = (RE_UINT32)re_word_break_stage_3[pos + f] << 1; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_word_break_stage_4[pos + f] << 2; + value = re_word_break_stage_5[pos + code]; + + return value; +} + +/* Grapheme_Cluster_Break. */ + +static RE_UINT8 re_grapheme_cluster_break_stage_1[] = { + 0, 1, 2, 2, 2, 3, 4, 5, 6, 2, 2, 7, 2, 2, 8, 9, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_grapheme_cluster_break_stage_2[] = { + 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 1, 17, 1, 1, 1, 18, 19, 20, 21, 22, 23, 24, 1, 1, + 25, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 26, 27, 1, 1, + 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 29, 1, 30, 31, 32, 33, 34, 35, 36, 37, + 38, 39, 40, 34, 35, 36, 37, 38, 39, 40, 34, 35, 36, 37, 38, 39, + 40, 34, 35, 36, 37, 38, 39, 40, 34, 35, 36, 37, 38, 39, 40, 34, + 35, 36, 37, 38, 39, 40, 34, 41, 42, 42, 42, 42, 42, 42, 42, 42, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43, 1, 1, 44, 45, + 1, 46, 1, 1, 1, 1, 1, 1, 1, 1, 47, 1, 1, 1, 1, 1, + 48, 49, 1, 1, 1, 1, 50, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 51, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 52, 53, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 54, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 42, 55, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_grapheme_cluster_break_stage_3[] = { + 0, 1, 2, 2, 2, 2, 2, 3, 1, 1, 4, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 6, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 5, 8, 9, 2, 2, 2, + 10, 11, 2, 2, 12, 5, 2, 13, 2, 2, 2, 2, 2, 14, 15, 2, + 3, 16, 2, 5, 17, 2, 2, 2, 2, 2, 18, 13, 2, 2, 12, 19, + 2, 20, 21, 2, 2, 22, 2, 2, 2, 2, 2, 2, 2, 2, 23, 24, + 25, 2, 2, 26, 27, 28, 29, 2, 30, 2, 2, 31, 32, 33, 29, 2, + 34, 2, 2, 35, 36, 16, 2, 37, 34, 2, 2, 35, 38, 2, 29, 2, + 30, 2, 2, 39, 32, 40, 29, 2, 41, 2, 2, 42, 43, 33, 2, 2, + 44, 2, 2, 45, 46, 47, 29, 2, 48, 2, 2, 49, 50, 47, 29, 2, + 48, 2, 2, 42, 51, 33, 29, 2, 48, 2, 2, 2, 52, 53, 2, 48, + 2, 2, 2, 54, 55, 2, 2, 2, 2, 2, 2, 56, 57, 2, 2, 2, + 2, 58, 2, 59, 2, 2, 2, 60, 61, 62, 5, 63, 64, 2, 2, 2, + 2, 2, 65, 66, 2, 67, 13, 68, 69, 70, 2, 2, 2, 2, 2, 2, + 71, 71, 71, 71, 71, 71, 72, 72, 72, 72, 73, 74, 74, 74, 74, 74, + 2, 2, 2, 2, 2, 65, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 75, 2, 75, 2, 29, 2, 29, 2, 2, 2, 76, 77, 78, 2, 2, + 79, 2, 2, 2, 2, 2, 2, 2, 2, 2, 80, 2, 2, 2, 2, 2, + 2, 2, 81, 82, 2, 2, 2, 2, 2, 2, 2, 83, 2, 2, 2, 2, + 2, 84, 2, 2, 2, 85, 86, 87, 2, 2, 2, 2, 2, 2, 2, 2, + 88, 2, 2, 89, 90, 2, 12, 19, 91, 2, 92, 2, 2, 2, 93, 94, + 2, 2, 95, 96, 2, 2, 2, 2, 2, 2, 2, 2, 2, 97, 98, 99, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 100, 101, + 102, 2, 103, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 5, 5, 13, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 104, 105, + 2, 2, 2, 2, 2, 2, 2, 104, 2, 2, 2, 2, 2, 2, 5, 5, + 2, 2, 106, 2, 2, 2, 2, 2, 2, 107, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 104, 108, 2, 104, 2, 2, 2, 2, 2, 105, + 109, 2, 110, 2, 2, 2, 2, 2, 111, 2, 2, 112, 113, 2, 5, 105, + 2, 2, 114, 2, 115, 94, 71, 116, 25, 2, 2, 117, 118, 2, 2, 2, + 2, 2, 119, 120, 121, 2, 2, 2, 2, 2, 2, 122, 16, 2, 123, 124, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 125, 2, + 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, + 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, + 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, + 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, + 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, + 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, + 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, + 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 131, 72, 132, 74, 74, 133, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 134, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 5, 2, 100, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 45, 2, 2, 2, 2, 2, 135, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 70, + 136, 2, 2, 137, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 138, 2, 2, 139, 100, 2, 2, 2, 91, 2, 2, 140, 2, 2, 2, 2, + 141, 2, 142, 143, 2, 2, 2, 2, 91, 2, 2, 144, 118, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 145, 146, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 147, 148, 149, 104, 141, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 150, 151, 152, 2, 153, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 75, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 154, 155, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, +}; + +static RE_UINT8 re_grapheme_cluster_break_stage_4[] = { + 0, 0, 1, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 4, + 3, 3, 3, 5, 6, 6, 6, 6, 7, 6, 8, 3, 9, 6, 6, 6, + 6, 6, 6, 10, 11, 10, 3, 3, 0, 12, 3, 3, 6, 6, 13, 12, + 3, 3, 7, 6, 14, 3, 3, 3, 3, 15, 6, 16, 6, 17, 18, 8, + 19, 3, 3, 3, 6, 6, 13, 3, 3, 15, 6, 6, 6, 3, 3, 3, + 3, 15, 10, 6, 6, 9, 9, 8, 3, 3, 9, 3, 3, 6, 6, 6, + 6, 6, 6, 13, 20, 3, 3, 3, 3, 3, 21, 22, 23, 6, 24, 25, + 9, 6, 3, 3, 15, 3, 3, 3, 26, 3, 3, 3, 3, 3, 3, 27, + 23, 28, 29, 30, 3, 7, 3, 3, 31, 3, 3, 3, 3, 3, 3, 22, + 32, 7, 17, 8, 8, 19, 3, 3, 23, 10, 33, 30, 3, 3, 3, 18, + 3, 15, 3, 3, 34, 3, 3, 3, 3, 3, 3, 21, 35, 36, 37, 30, + 38, 3, 3, 3, 3, 3, 3, 15, 24, 39, 18, 8, 3, 11, 3, 3, + 36, 3, 3, 3, 3, 3, 3, 40, 41, 42, 37, 8, 23, 22, 37, 30, + 3, 3, 34, 7, 43, 44, 45, 46, 47, 6, 13, 3, 3, 7, 6, 13, + 47, 6, 10, 14, 3, 3, 6, 8, 3, 3, 8, 3, 3, 48, 19, 36, + 9, 6, 6, 20, 6, 18, 3, 9, 6, 6, 9, 6, 6, 6, 6, 14, + 3, 34, 3, 3, 3, 3, 3, 9, 49, 6, 31, 32, 3, 36, 8, 15, + 9, 14, 3, 3, 34, 32, 3, 19, 3, 3, 3, 19, 50, 50, 50, 50, + 51, 51, 51, 51, 51, 51, 52, 52, 52, 52, 52, 52, 15, 14, 3, 3, + 3, 53, 6, 54, 45, 41, 23, 6, 6, 3, 3, 19, 3, 3, 7, 55, + 3, 3, 19, 3, 20, 46, 24, 3, 41, 45, 23, 3, 3, 38, 56, 3, + 3, 7, 57, 3, 3, 58, 6, 13, 44, 9, 6, 24, 46, 6, 6, 17, + 6, 59, 3, 3, 3, 49, 20, 24, 41, 59, 3, 3, 60, 3, 3, 3, + 61, 54, 53, 62, 3, 21, 54, 63, 54, 3, 3, 3, 3, 45, 45, 6, + 6, 43, 3, 3, 13, 6, 6, 6, 49, 6, 14, 19, 36, 14, 3, 3, + 6, 13, 3, 3, 3, 3, 3, 6, 3, 3, 4, 64, 3, 3, 0, 65, + 3, 3, 3, 7, 8, 3, 3, 3, 3, 3, 15, 6, 3, 3, 11, 3, + 13, 6, 6, 8, 34, 34, 7, 3, 66, 67, 3, 3, 62, 3, 3, 3, + 3, 45, 45, 45, 45, 14, 3, 3, 3, 15, 6, 8, 3, 7, 6, 6, + 50, 50, 50, 68, 7, 43, 54, 24, 59, 3, 3, 3, 3, 3, 9, 20, + 67, 32, 3, 3, 7, 3, 3, 69, 18, 17, 14, 15, 3, 3, 66, 54, + 3, 70, 3, 3, 66, 25, 35, 30, 71, 72, 72, 72, 72, 72, 72, 71, + 72, 72, 72, 72, 72, 72, 71, 72, 72, 71, 72, 72, 72, 3, 3, 3, + 51, 73, 74, 52, 52, 52, 52, 3, 3, 3, 3, 34, 0, 0, 0, 3, + 9, 11, 3, 6, 3, 3, 13, 7, 75, 3, 3, 3, 3, 3, 6, 6, + 46, 20, 32, 5, 13, 3, 3, 3, 3, 7, 6, 23, 6, 14, 3, 3, + 66, 43, 6, 20, 3, 3, 7, 25, 6, 53, 3, 3, 38, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 76, 3, 77, 8, 61, 78, 0, 79, 6, + 13, 9, 6, 3, 3, 3, 15, 8, 3, 80, 81, 81, 81, 81, 81, 81, +}; + +static RE_UINT8 re_grapheme_cluster_break_stage_5[] = { + 3, 3, 3, 3, 3, 3, 2, 3, 3, 1, 3, 3, 0, 0, 0, 0, + 0, 0, 0, 3, 0, 3, 0, 0, 4, 4, 4, 4, 0, 0, 0, 4, + 4, 4, 0, 0, 0, 4, 4, 4, 4, 4, 0, 4, 0, 4, 4, 0, + 3, 0, 0, 0, 4, 4, 4, 0, 4, 0, 0, 0, 0, 0, 4, 4, + 4, 3, 0, 4, 4, 0, 0, 4, 4, 0, 4, 4, 0, 4, 0, 0, + 4, 4, 4, 6, 0, 0, 4, 6, 4, 0, 6, 6, 6, 4, 4, 4, + 4, 6, 6, 6, 6, 4, 6, 6, 0, 4, 6, 6, 4, 0, 4, 6, + 4, 0, 0, 6, 6, 0, 0, 6, 6, 4, 0, 0, 0, 4, 4, 6, + 6, 4, 4, 0, 4, 6, 0, 6, 0, 0, 4, 0, 4, 6, 6, 0, + 0, 0, 6, 6, 6, 0, 6, 6, 0, 6, 6, 6, 6, 0, 4, 4, + 4, 0, 6, 4, 6, 6, 4, 6, 6, 0, 4, 6, 6, 6, 4, 4, + 4, 0, 4, 0, 6, 6, 6, 6, 6, 6, 6, 4, 0, 4, 0, 6, + 0, 4, 0, 4, 4, 6, 4, 4, 7, 7, 7, 7, 8, 8, 8, 8, + 9, 9, 9, 9, 4, 4, 6, 4, 4, 4, 6, 6, 4, 4, 3, 0, + 0, 0, 6, 0, 4, 6, 6, 4, 0, 6, 4, 6, 6, 0, 0, 0, + 4, 4, 6, 0, 0, 6, 4, 4, 6, 6, 0, 0, 6, 4, 6, 4, + 4, 4, 3, 3, 3, 3, 3, 0, 0, 0, 0, 6, 6, 4, 4, 6, + 7, 0, 0, 0, 4, 6, 0, 0, 0, 6, 4, 0, 10, 11, 11, 11, + 11, 11, 11, 11, 8, 8, 8, 0, 0, 0, 0, 9, 6, 4, 6, 0, + 6, 6, 6, 0, 0, 4, 6, 4, 4, 4, 4, 3, 3, 3, 3, 4, + 0, 0, 5, 5, 5, 5, 5, 5, +}; + +/* Grapheme_Cluster_Break: 2336 bytes. */ + +RE_UINT32 re_get_grapheme_cluster_break(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_grapheme_cluster_break_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_grapheme_cluster_break_stage_2[pos + f] << 4; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_grapheme_cluster_break_stage_3[pos + f] << 2; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_grapheme_cluster_break_stage_4[pos + f] << 2; + value = re_grapheme_cluster_break_stage_5[pos + code]; + + return value; +} + +/* Sentence_Break. */ + +static RE_UINT8 re_sentence_break_stage_1[] = { + 0, 1, 2, 3, 4, 5, 5, 5, 5, 6, 7, 5, 5, 8, 9, 10, + 11, 12, 13, 14, 9, 9, 15, 9, 9, 9, 9, 16, 9, 17, 18, 9, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 19, 20, 9, 9, 9, 21, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 22, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, +}; + +static RE_UINT8 re_sentence_break_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 33, 33, 36, 33, 37, 33, 33, 38, 39, 40, 33, + 41, 42, 33, 33, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 43, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 44, + 17, 17, 17, 17, 45, 17, 46, 47, 48, 49, 50, 51, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 52, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 33, 17, 53, 54, 17, 55, 56, 57, + 58, 59, 60, 61, 62, 33, 33, 33, 63, 64, 65, 66, 67, 33, 33, 33, + 68, 69, 33, 33, 33, 33, 70, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 17, 17, 17, 71, 72, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 17, 17, 17, 17, 73, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 17, 17, 74, 33, 33, 33, 33, 75, + 76, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 77, 78, 33, 79, 80, 81, 82, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 83, 33, + 17, 17, 17, 17, 17, 17, 84, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 85, 86, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 17, 17, 86, 33, 33, 33, 33, 33, + 87, 88, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, +}; + +static RE_UINT16 re_sentence_break_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 8, 16, 17, 18, 19, 20, 21, 22, 23, 23, 23, 24, 25, 26, 27, 28, + 29, 30, 18, 8, 31, 8, 32, 8, 8, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 41, 41, 44, 45, 46, 47, 48, 41, 41, 49, 50, 51, + 52, 53, 54, 55, 55, 56, 55, 57, 58, 59, 60, 61, 62, 63, 64, 65, + 66, 67, 68, 69, 70, 71, 72, 73, 62, 71, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 73, 83, 84, 85, 86, 83, 87, 88, 89, 90, 91, 92, 93, + 94, 95, 96, 55, 97, 98, 99, 55, 100, 101, 102, 103, 104, 105, 106, 55, + 41, 107, 108, 109, 110, 29, 111, 112, 41, 41, 41, 41, 41, 41, 41, 41, + 41, 41, 113, 41, 114, 115, 116, 41, 117, 41, 118, 119, 120, 41, 41, 121, + 94, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 122, 123, 41, 41, 124, + 125, 126, 127, 128, 41, 129, 130, 131, 132, 41, 41, 133, 41, 134, 41, 135, + 136, 137, 138, 139, 41, 140, 141, 55, 142, 41, 143, 144, 145, 146, 55, 55, + 147, 129, 148, 149, 150, 151, 41, 152, 41, 153, 154, 155, 55, 55, 156, 157, + 18, 18, 18, 18, 18, 18, 23, 158, 8, 8, 8, 8, 159, 8, 8, 8, + 160, 161, 162, 163, 161, 164, 165, 166, 167, 168, 169, 170, 171, 55, 172, 173, + 174, 175, 176, 30, 177, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 178, 179, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 180, 30, 181, + 55, 55, 182, 183, 55, 55, 184, 185, 55, 55, 55, 55, 186, 55, 187, 188, + 29, 189, 190, 191, 8, 8, 8, 192, 18, 193, 41, 194, 195, 196, 196, 23, + 197, 198, 55, 55, 55, 55, 55, 55, 199, 200, 94, 41, 201, 94, 41, 112, + 202, 203, 41, 41, 204, 205, 55, 206, 41, 41, 41, 41, 41, 135, 55, 55, + 41, 41, 41, 41, 41, 41, 207, 55, 41, 41, 41, 41, 207, 55, 206, 208, + 209, 210, 8, 211, 212, 41, 41, 213, 214, 215, 8, 216, 217, 218, 55, 219, + 220, 221, 41, 222, 223, 129, 224, 225, 50, 226, 227, 136, 58, 228, 229, 55, + 41, 230, 231, 232, 41, 233, 234, 235, 236, 237, 55, 55, 55, 55, 41, 238, + 41, 41, 41, 41, 41, 239, 240, 241, 41, 41, 41, 242, 41, 41, 243, 55, + 244, 245, 246, 41, 41, 247, 248, 41, 41, 249, 206, 41, 250, 41, 251, 252, + 253, 254, 255, 256, 41, 41, 41, 257, 258, 2, 259, 260, 261, 262, 263, 264, + 265, 266, 267, 55, 41, 41, 41, 205, 55, 55, 41, 121, 55, 55, 55, 268, + 55, 55, 55, 55, 136, 41, 269, 55, 262, 206, 270, 55, 271, 41, 272, 55, + 29, 273, 274, 41, 271, 131, 55, 55, 275, 276, 135, 55, 55, 55, 55, 55, + 135, 243, 55, 55, 41, 277, 55, 55, 278, 279, 280, 136, 55, 55, 55, 55, + 41, 135, 135, 281, 55, 55, 55, 55, 41, 41, 282, 55, 55, 55, 55, 55, + 150, 283, 284, 79, 150, 285, 286, 287, 150, 288, 289, 55, 150, 228, 290, 55, + 55, 55, 55, 55, 41, 291, 131, 55, 41, 41, 41, 204, 55, 55, 55, 55, + 41, 41, 41, 292, 55, 55, 55, 55, 41, 204, 55, 55, 55, 55, 55, 55, + 41, 293, 55, 55, 55, 55, 55, 55, 41, 41, 294, 295, 296, 55, 55, 55, + 297, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 298, 299, 300, 55, 55, + 55, 55, 301, 55, 55, 55, 55, 55, 302, 303, 304, 305, 306, 307, 308, 309, + 310, 311, 312, 313, 314, 302, 303, 315, 305, 316, 317, 318, 309, 319, 320, 321, + 322, 323, 324, 189, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 55, 55, + 41, 41, 41, 41, 41, 41, 195, 55, 41, 121, 41, 41, 41, 41, 41, 41, + 271, 55, 55, 55, 55, 55, 55, 55, 335, 336, 336, 336, 55, 55, 55, 55, + 23, 23, 23, 23, 23, 23, 23, 337, +}; + +static RE_UINT8 re_sentence_break_stage_4[] = { + 0, 0, 1, 2, 0, 0, 0, 0, 3, 4, 5, 6, 7, 7, 8, 9, + 10, 11, 11, 11, 11, 11, 12, 13, 14, 15, 15, 15, 15, 15, 16, 13, + 0, 17, 0, 0, 0, 0, 0, 0, 18, 0, 19, 20, 0, 21, 19, 0, + 11, 11, 11, 11, 11, 22, 11, 23, 15, 15, 15, 15, 15, 24, 15, 15, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, + 26, 26, 27, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 28, 29, + 30, 31, 32, 33, 28, 31, 34, 28, 25, 31, 29, 31, 32, 26, 35, 34, + 36, 28, 31, 26, 26, 26, 26, 27, 25, 25, 25, 25, 30, 31, 25, 25, + 25, 25, 25, 25, 25, 15, 33, 30, 26, 23, 25, 25, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 37, 15, 15, + 15, 15, 15, 15, 15, 15, 38, 36, 39, 40, 36, 36, 41, 0, 0, 0, + 15, 42, 0, 43, 0, 0, 0, 0, 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 25, 45, 46, 39, 0, 47, 22, 48, 32, 11, 11, 11, + 49, 11, 11, 15, 15, 15, 15, 15, 15, 15, 15, 50, 33, 34, 25, 25, + 25, 25, 25, 25, 15, 51, 30, 32, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 15, 15, 15, 15, 52, 44, 53, 25, 25, 25, 25, 25, + 28, 26, 26, 29, 25, 25, 25, 25, 25, 25, 0, 0, 10, 11, 11, 11, + 11, 11, 11, 11, 11, 22, 54, 55, 14, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 56, 0, 57, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 58, + 59, 58, 0, 0, 36, 36, 36, 36, 36, 36, 60, 0, 36, 0, 0, 0, + 61, 62, 0, 63, 44, 44, 64, 65, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 66, 44, 44, 44, 44, 44, 7, 7, 67, 68, 69, 36, 36, 36, + 36, 36, 36, 36, 36, 70, 44, 71, 44, 72, 73, 74, 7, 7, 75, 76, + 77, 0, 0, 78, 79, 36, 36, 36, 36, 36, 36, 36, 44, 44, 44, 44, + 44, 44, 64, 80, 36, 36, 36, 36, 36, 81, 44, 44, 82, 0, 0, 0, + 7, 7, 75, 36, 36, 36, 36, 36, 36, 36, 66, 44, 44, 41, 83, 0, + 36, 36, 36, 36, 36, 81, 84, 44, 44, 85, 85, 86, 0, 0, 0, 0, + 36, 36, 36, 36, 36, 36, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 87, 36, 36, 88, 0, 0, 0, 0, 0, 44, 44, 44, 44, 44, 44, 64, + 44, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 81, 89, + 44, 44, 44, 44, 85, 44, 36, 36, 81, 90, 7, 7, 80, 36, 80, 36, + 57, 80, 36, 76, 76, 36, 36, 36, 36, 36, 87, 36, 43, 40, 41, 89, + 44, 91, 91, 92, 0, 93, 0, 94, 81, 95, 7, 7, 41, 0, 0, 0, + 57, 80, 60, 96, 76, 36, 36, 36, 36, 36, 87, 36, 87, 97, 41, 73, + 64, 93, 91, 86, 98, 0, 80, 43, 0, 95, 7, 7, 74, 99, 0, 0, + 57, 80, 36, 94, 94, 36, 36, 36, 36, 36, 87, 36, 87, 80, 41, 89, + 44, 58, 58, 86, 88, 0, 0, 0, 81, 95, 7, 7, 0, 0, 0, 0, + 44, 91, 91, 86, 0, 100, 0, 94, 81, 95, 7, 7, 54, 0, 0, 0, + 101, 80, 60, 40, 87, 41, 97, 87, 96, 88, 60, 40, 36, 36, 41, 100, + 64, 100, 73, 86, 88, 93, 0, 0, 0, 95, 7, 7, 0, 0, 0, 0, + 57, 80, 36, 87, 87, 36, 36, 36, 36, 36, 87, 36, 36, 80, 41, 102, + 44, 73, 73, 86, 0, 59, 41, 0, 100, 80, 36, 87, 87, 36, 36, 36, + 36, 36, 87, 36, 36, 80, 41, 89, 44, 73, 73, 86, 0, 59, 0, 103, + 81, 95, 7, 7, 97, 0, 0, 0, 36, 36, 36, 36, 36, 36, 60, 102, + 44, 73, 73, 92, 0, 93, 0, 0, 81, 95, 7, 7, 0, 0, 40, 36, + 100, 80, 36, 36, 36, 60, 40, 36, 36, 36, 36, 36, 94, 36, 36, 54, + 36, 60, 104, 93, 44, 105, 44, 44, 0, 0, 0, 0, 100, 0, 0, 0, + 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 79, 44, 64, 0, + 36, 66, 44, 64, 7, 7, 106, 0, 97, 76, 43, 54, 0, 36, 80, 36, + 80, 107, 40, 80, 79, 44, 58, 82, 36, 43, 44, 86, 7, 7, 106, 36, + 88, 0, 0, 0, 0, 0, 86, 0, 7, 7, 106, 0, 0, 108, 109, 110, + 36, 36, 80, 36, 36, 36, 36, 36, 36, 36, 36, 88, 57, 44, 44, 44, + 44, 73, 36, 85, 44, 44, 57, 44, 44, 44, 44, 44, 44, 44, 44, 111, + 0, 104, 0, 0, 0, 0, 0, 0, 36, 36, 66, 44, 44, 44, 44, 112, + 7, 7, 113, 0, 36, 81, 74, 81, 89, 72, 44, 74, 85, 69, 36, 36, + 81, 44, 44, 84, 7, 7, 114, 86, 11, 49, 0, 115, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 60, 36, 36, 36, 87, 41, 36, 60, 87, 41, + 36, 36, 87, 41, 36, 36, 36, 36, 36, 36, 36, 36, 87, 41, 36, 60, + 87, 41, 36, 36, 36, 60, 36, 36, 36, 36, 36, 36, 87, 41, 36, 36, + 36, 36, 36, 36, 36, 36, 60, 57, 116, 9, 117, 0, 0, 0, 0, 0, + 36, 36, 36, 36, 0, 0, 0, 0, 36, 36, 36, 36, 36, 88, 0, 0, + 36, 36, 36, 118, 36, 36, 36, 36, 119, 36, 36, 36, 36, 36, 120, 121, + 36, 36, 60, 40, 88, 0, 0, 0, 36, 36, 36, 87, 81, 111, 0, 0, + 36, 36, 36, 36, 81, 122, 0, 0, 36, 36, 36, 36, 81, 0, 0, 0, + 36, 36, 36, 87, 123, 0, 0, 0, 36, 36, 36, 36, 36, 44, 44, 44, + 44, 44, 44, 44, 44, 96, 0, 99, 7, 7, 106, 0, 0, 0, 0, 0, + 124, 0, 125, 126, 7, 7, 106, 0, 36, 36, 36, 36, 36, 36, 0, 0, + 36, 36, 127, 0, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 0, 0, + 36, 36, 36, 36, 36, 36, 36, 88, 44, 44, 44, 0, 44, 44, 44, 0, + 0, 90, 7, 7, 36, 36, 36, 36, 36, 36, 36, 41, 36, 88, 0, 0, + 36, 36, 36, 0, 44, 44, 44, 44, 69, 36, 86, 0, 7, 7, 106, 0, + 36, 36, 36, 36, 36, 66, 44, 0, 36, 36, 36, 36, 36, 85, 44, 64, + 44, 44, 44, 44, 44, 44, 44, 91, 7, 7, 106, 0, 7, 7, 106, 0, + 0, 96, 128, 0, 0, 0, 0, 0, 44, 69, 36, 36, 36, 36, 36, 36, + 44, 69, 36, 0, 7, 7, 113, 129, 0, 0, 93, 44, 44, 0, 0, 0, + 112, 36, 36, 36, 36, 36, 36, 36, 85, 44, 44, 74, 7, 7, 75, 36, + 36, 81, 44, 44, 44, 0, 0, 0, 36, 44, 44, 44, 44, 44, 9, 117, + 7, 7, 106, 80, 7, 7, 75, 36, 36, 36, 36, 36, 36, 36, 36, 130, + 0, 0, 0, 0, 64, 44, 44, 44, 44, 44, 69, 79, 81, 131, 0, 0, + 44, 64, 0, 0, 0, 0, 0, 44, 25, 25, 25, 25, 25, 34, 15, 27, + 15, 15, 11, 11, 15, 39, 11, 132, 15, 15, 11, 11, 15, 15, 11, 11, + 15, 39, 11, 132, 15, 15, 133, 133, 15, 15, 11, 11, 15, 15, 15, 39, + 15, 15, 11, 11, 15, 134, 11, 135, 46, 134, 11, 136, 15, 46, 11, 0, + 15, 15, 11, 136, 46, 134, 11, 136, 137, 137, 138, 139, 140, 141, 142, 142, + 0, 143, 144, 145, 0, 0, 146, 147, 0, 148, 147, 0, 0, 0, 0, 149, + 61, 150, 61, 61, 21, 0, 0, 151, 0, 0, 0, 146, 15, 15, 15, 42, + 0, 0, 0, 0, 44, 44, 44, 44, 44, 44, 44, 44, 111, 0, 0, 0, + 47, 152, 153, 154, 23, 115, 10, 132, 0, 155, 48, 156, 11, 38, 157, 33, + 0, 158, 39, 159, 0, 0, 0, 0, 160, 38, 88, 0, 0, 0, 0, 0, + 0, 0, 142, 0, 0, 0, 0, 0, 0, 0, 146, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 161, 11, 11, 15, 15, 39, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 162, 0, 0, 142, 142, 142, 5, 0, 0, + 0, 146, 0, 0, 0, 0, 0, 0, 0, 163, 142, 142, 0, 0, 0, 0, + 4, 142, 142, 142, 142, 142, 121, 0, 0, 0, 0, 0, 0, 0, 142, 0, + 0, 0, 0, 0, 0, 0, 0, 5, 11, 11, 11, 22, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 24, 31, 164, 26, 32, 25, 29, 15, 33, + 25, 42, 152, 165, 53, 0, 0, 0, 15, 166, 0, 21, 36, 36, 36, 36, + 36, 36, 0, 96, 0, 0, 0, 93, 36, 36, 36, 36, 36, 60, 0, 0, + 36, 60, 36, 60, 36, 60, 36, 60, 142, 142, 142, 5, 0, 0, 0, 5, + 142, 142, 5, 167, 0, 0, 0, 0, 168, 80, 142, 142, 5, 142, 142, 169, + 80, 36, 81, 44, 80, 41, 36, 88, 36, 36, 36, 36, 36, 60, 59, 80, + 0, 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 80, 36, 36, 36, + 36, 36, 36, 60, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 60, 0, + 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 36, 88, 0, 0, 0, 0, + 36, 36, 36, 36, 36, 36, 36, 170, 36, 36, 36, 171, 36, 36, 36, 36, + 7, 7, 75, 0, 0, 0, 0, 0, 25, 25, 25, 172, 64, 44, 44, 173, + 25, 25, 25, 25, 25, 25, 0, 93, 36, 36, 36, 36, 174, 9, 0, 0, + 0, 0, 0, 0, 0, 96, 36, 36, 175, 25, 25, 25, 27, 25, 25, 25, + 25, 25, 25, 25, 15, 15, 26, 30, 25, 25, 176, 177, 25, 0, 0, 0, + 25, 25, 178, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 179, 36, + 180, 180, 66, 36, 36, 36, 36, 36, 66, 44, 0, 0, 0, 0, 0, 0, + 36, 36, 36, 36, 36, 129, 0, 0, 74, 36, 36, 36, 36, 36, 36, 36, + 44, 111, 0, 129, 7, 7, 106, 0, 44, 44, 44, 44, 74, 36, 96, 0, + 36, 81, 44, 174, 36, 36, 36, 36, 36, 66, 44, 44, 44, 0, 0, 0, + 36, 36, 36, 36, 66, 44, 44, 44, 111, 0, 147, 96, 7, 7, 106, 0, + 36, 36, 85, 44, 44, 64, 0, 0, 66, 36, 36, 86, 7, 7, 106, 181, + 36, 36, 36, 36, 36, 60, 182, 0, 36, 36, 36, 36, 89, 72, 69, 81, + 127, 0, 0, 0, 0, 0, 96, 41, 36, 36, 66, 44, 183, 184, 0, 0, + 80, 60, 80, 60, 80, 60, 0, 0, 36, 60, 36, 60, 0, 0, 0, 0, + 66, 44, 185, 86, 7, 7, 106, 0, 36, 0, 0, 0, 36, 36, 36, 36, + 36, 60, 96, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, + 36, 36, 36, 41, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 0, + 15, 24, 0, 0, 186, 15, 0, 187, 36, 36, 87, 36, 36, 60, 36, 43, + 94, 87, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 0, 0, 0, + 0, 0, 0, 0, 96, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 188, + 36, 36, 36, 36, 40, 36, 36, 36, 36, 36, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 36, 36, 36, 0, 44, 44, 44, 44, 189, 4, 121, 0, + 44, 64, 0, 0, 190, 169, 142, 142, 142, 191, 121, 0, 6, 192, 193, 162, + 140, 0, 0, 0, 36, 87, 36, 36, 36, 36, 36, 36, 36, 36, 36, 194, + 56, 0, 5, 6, 0, 0, 195, 9, 14, 15, 15, 15, 15, 15, 16, 196, + 197, 198, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 81, + 36, 36, 36, 36, 36, 36, 36, 60, 40, 36, 40, 36, 40, 36, 40, 88, + 0, 0, 0, 0, 0, 0, 199, 0, 36, 36, 36, 80, 36, 36, 36, 36, + 36, 60, 36, 36, 36, 36, 60, 94, 36, 36, 36, 41, 36, 36, 36, 41, + 0, 0, 0, 0, 0, 0, 0, 98, 36, 36, 36, 36, 88, 0, 0, 0, + 36, 36, 60, 0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 36, 41, + 36, 0, 36, 36, 80, 41, 0, 0, 11, 11, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 36, 36, 36, 36, 36, 41, 87, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 94, 88, 76, 36, 36, 36, 36, 36, 36, 0, 40, + 85, 59, 0, 44, 36, 80, 80, 36, 36, 36, 36, 36, 36, 0, 64, 93, + 0, 0, 0, 0, 0, 129, 0, 0, 36, 36, 36, 36, 60, 0, 0, 0, + 36, 36, 88, 0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 44, 44, + 44, 185, 117, 0, 0, 0, 0, 0, 36, 36, 36, 36, 44, 44, 64, 200, + 147, 0, 0, 0, 36, 36, 36, 36, 36, 36, 88, 0, 7, 7, 106, 0, + 36, 66, 44, 44, 44, 201, 7, 7, 181, 0, 0, 0, 0, 0, 0, 0, + 69, 202, 0, 0, 7, 7, 106, 0, 36, 36, 66, 44, 44, 44, 0, 0, + 60, 0, 0, 0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 88, 0, + 36, 88, 0, 0, 85, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 64, + 0, 0, 0, 93, 112, 36, 36, 36, 41, 0, 0, 0, 0, 0, 0, 0, + 0, 57, 86, 57, 203, 61, 204, 44, 64, 57, 44, 0, 0, 0, 0, 0, + 0, 0, 100, 86, 0, 0, 0, 0, 100, 111, 0, 0, 0, 0, 0, 0, + 11, 11, 11, 11, 11, 11, 154, 15, 15, 15, 15, 15, 15, 11, 11, 11, + 11, 11, 11, 154, 15, 134, 15, 15, 15, 15, 11, 11, 11, 11, 11, 11, + 154, 15, 15, 15, 15, 15, 15, 48, 47, 205, 10, 48, 11, 154, 166, 14, + 15, 14, 15, 15, 11, 11, 11, 11, 11, 11, 154, 15, 15, 15, 15, 15, + 15, 49, 22, 10, 11, 48, 11, 206, 15, 15, 15, 15, 15, 15, 49, 22, + 11, 155, 161, 11, 206, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11, + 11, 154, 15, 15, 15, 15, 15, 15, 11, 11, 11, 154, 15, 15, 15, 15, + 154, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11, 11, 154, 15, 15, + 15, 15, 15, 15, 11, 11, 11, 11, 15, 39, 11, 11, 11, 11, 11, 11, + 206, 15, 15, 15, 15, 15, 24, 15, 33, 11, 11, 11, 11, 11, 22, 15, + 15, 15, 15, 15, 15, 134, 15, 11, 11, 11, 11, 11, 11, 206, 15, 15, + 15, 15, 15, 24, 15, 33, 11, 11, 15, 15, 134, 15, 11, 11, 11, 11, + 11, 11, 206, 15, 15, 15, 15, 15, 24, 15, 27, 95, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 36, 80, 36, 36, 36, 36, 36, 36, + 97, 76, 80, 36, 60, 36, 107, 0, 103, 96, 107, 80, 97, 76, 107, 107, + 97, 76, 60, 36, 60, 36, 80, 43, 36, 36, 94, 36, 36, 36, 36, 0, + 80, 80, 94, 36, 36, 36, 36, 0, 20, 0, 0, 0, 0, 0, 0, 0, + 61, 61, 61, 61, 61, 61, 61, 61, 44, 44, 44, 44, 0, 0, 0, 0, +}; + +static RE_UINT8 re_sentence_break_stage_5[] = { + 0, 0, 0, 0, 0, 6, 2, 6, 6, 1, 0, 0, 6, 12, 13, 0, + 0, 0, 0, 13, 13, 13, 0, 0, 14, 14, 11, 0, 10, 10, 10, 10, + 10, 10, 14, 0, 0, 0, 0, 12, 0, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 13, 0, 13, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 13, 0, 4, 0, 0, 6, 0, 0, 0, 0, 0, 7, 13, + 0, 5, 0, 0, 0, 7, 0, 0, 8, 8, 8, 0, 8, 8, 8, 7, + 7, 7, 7, 0, 8, 7, 8, 7, 7, 8, 7, 8, 7, 7, 8, 7, + 8, 8, 7, 8, 7, 8, 7, 7, 7, 8, 8, 7, 8, 7, 8, 8, + 7, 8, 8, 8, 7, 7, 8, 8, 8, 7, 7, 7, 8, 7, 7, 9, + 9, 9, 9, 9, 9, 7, 7, 7, 7, 9, 9, 9, 7, 7, 0, 0, + 0, 0, 9, 9, 9, 9, 0, 0, 7, 0, 0, 0, 9, 0, 9, 0, + 3, 3, 3, 3, 9, 0, 8, 7, 0, 0, 7, 7, 0, 0, 8, 0, + 8, 0, 8, 8, 8, 8, 0, 8, 7, 7, 7, 8, 8, 7, 0, 8, + 8, 7, 0, 3, 3, 3, 8, 7, 0, 9, 0, 0, 12, 14, 12, 0, + 0, 12, 0, 0, 0, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, + 9, 9, 9, 0, 5, 5, 5, 5, 5, 0, 0, 0, 14, 14, 0, 0, + 3, 3, 3, 0, 5, 0, 0, 12, 9, 9, 9, 3, 10, 10, 0, 10, + 10, 0, 9, 9, 3, 9, 9, 9, 12, 9, 3, 3, 3, 5, 0, 3, + 3, 9, 9, 3, 3, 0, 3, 3, 3, 3, 9, 9, 10, 10, 9, 9, + 9, 0, 0, 9, 12, 12, 12, 0, 0, 0, 0, 5, 9, 3, 9, 9, + 0, 9, 9, 9, 9, 9, 3, 3, 3, 9, 0, 0, 14, 12, 9, 0, + 3, 3, 9, 3, 9, 3, 3, 3, 3, 3, 0, 0, 9, 0, 9, 9, + 9, 0, 0, 0, 3, 9, 3, 3, 12, 12, 10, 10, 3, 0, 0, 3, + 3, 3, 9, 0, 0, 0, 0, 3, 9, 9, 0, 9, 0, 0, 10, 10, + 0, 0, 0, 9, 0, 9, 9, 0, 0, 3, 0, 0, 9, 3, 0, 0, + 0, 0, 3, 3, 0, 0, 3, 9, 0, 9, 3, 3, 0, 0, 9, 0, + 0, 0, 3, 0, 3, 0, 3, 0, 10, 10, 0, 0, 0, 9, 0, 9, + 0, 3, 0, 3, 0, 3, 13, 13, 13, 13, 3, 3, 3, 0, 0, 0, + 3, 3, 3, 9, 10, 10, 12, 12, 10, 10, 3, 3, 0, 8, 0, 0, + 0, 0, 12, 0, 12, 0, 0, 0, 9, 0, 12, 9, 6, 9, 9, 9, + 9, 9, 9, 13, 13, 0, 0, 0, 3, 12, 12, 0, 9, 0, 3, 3, + 0, 0, 14, 12, 14, 12, 0, 3, 3, 3, 5, 0, 9, 3, 9, 0, + 12, 12, 12, 12, 0, 0, 12, 12, 9, 9, 12, 12, 3, 9, 9, 0, + 8, 8, 0, 0, 0, 8, 0, 8, 7, 0, 7, 7, 8, 0, 7, 0, + 8, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 5, 3, 3, 5, 5, + 0, 0, 0, 14, 14, 0, 0, 0, 13, 13, 13, 13, 11, 0, 0, 0, + 4, 4, 5, 5, 5, 5, 5, 6, 0, 13, 13, 0, 12, 12, 0, 0, + 0, 13, 13, 12, 0, 0, 0, 6, 5, 0, 5, 5, 0, 13, 13, 7, + 0, 0, 0, 8, 0, 0, 7, 8, 8, 8, 7, 7, 8, 0, 8, 0, + 8, 8, 0, 7, 9, 7, 0, 0, 0, 8, 7, 7, 0, 0, 7, 0, + 9, 9, 9, 8, 0, 0, 8, 8, 13, 13, 13, 0, 0, 0, 13, 13, + 8, 7, 7, 8, 7, 8, 7, 3, 7, 7, 0, 7, 0, 0, 12, 9, + 6, 14, 12, 0, 0, 13, 13, 13, 9, 9, 0, 12, 9, 0, 12, 12, + 8, 7, 9, 3, 3, 3, 0, 9, 3, 3, 0, 12, 0, 0, 8, 7, + 9, 0, 0, 8, 7, 8, 7, 0, 8, 7, 8, 0, 7, 7, 7, 9, + 9, 9, 3, 9, 0, 12, 12, 12, 0, 0, 9, 3, 12, 12, 9, 9, + 9, 3, 3, 0, 3, 3, 3, 12, 0, 0, 0, 7, 0, 9, 3, 9, + 9, 9, 13, 13, 14, 14, 0, 14, 0, 14, 14, 0, 13, 0, 0, 13, + 0, 14, 12, 12, 14, 13, 13, 13, 9, 0, 0, 5, 0, 0, 14, 0, + 0, 13, 0, 13, 13, 12, 13, 13, 14, 0, 9, 9, 0, 5, 5, 5, + 0, 5, 12, 12, 3, 0, 10, 10, 9, 12, 12, 0, 3, 3, 3, 5, + 5, 5, 5, 3, 0, 8, 8, 0, 8, 0, 7, 7, +}; + +/* Sentence_Break: 5596 bytes. */ + +RE_UINT32 re_get_sentence_break(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_sentence_break_stage_1[f] << 4; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_sentence_break_stage_2[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_sentence_break_stage_3[pos + f] << 3; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_sentence_break_stage_4[pos + f] << 2; + value = re_sentence_break_stage_5[pos + code]; + + return value; +} + +/* Math. */ + +static RE_UINT8 re_math_stage_1[] = { + 0, 1, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, +}; + +static RE_UINT8 re_math_stage_2[] = { + 0, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 6, 1, 1, +}; + +static RE_UINT8 re_math_stage_3[] = { + 0, 1, 1, 2, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 4, 5, 6, 7, 1, 8, 9, 10, 1, 6, 6, 11, 1, 1, 1, 1, + 1, 1, 1, 12, 1, 1, 13, 14, 1, 1, 1, 1, 15, 16, 17, 18, + 1, 1, 1, 1, 1, 1, 19, 1, +}; + +static RE_UINT8 re_math_stage_4[] = { + 0, 1, 2, 3, 0, 4, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, + 9, 10, 11, 12, 13, 0, 14, 15, 16, 17, 18, 0, 19, 20, 21, 22, + 23, 23, 23, 23, 23, 23, 23, 23, 24, 25, 0, 26, 27, 28, 29, 30, + 0, 0, 0, 0, 0, 31, 32, 33, 34, 0, 35, 36, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 23, 23, 0, 19, 37, 0, 0, 0, 0, 0, + 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 0, + 1, 3, 3, 0, 0, 0, 0, 40, 23, 23, 41, 23, 42, 43, 44, 23, + 45, 46, 47, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 48, 23, 23, + 23, 23, 23, 23, 23, 23, 49, 23, 44, 50, 51, 52, 53, 54, 0, 55, +}; + +static RE_UINT8 re_math_stage_5[] = { + 0, 0, 0, 0, 0, 8, 0, 112, 0, 0, 0, 64, 0, 0, 0, 80, + 0, 16, 2, 0, 0, 0, 128, 0, 0, 0, 39, 0, 0, 0, 115, 0, + 192, 1, 0, 0, 0, 0, 64, 0, 0, 0, 28, 0, 17, 0, 4, 0, + 30, 0, 0, 124, 0, 124, 0, 0, 0, 0, 255, 31, 98, 248, 0, 0, + 132, 252, 47, 63, 16, 179, 251, 241, 255, 11, 0, 0, 0, 0, 255, 255, + 255, 126, 195, 240, 255, 255, 255, 47, 48, 0, 240, 255, 255, 255, 255, 255, + 0, 15, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 248, + 255, 255, 191, 0, 0, 0, 1, 240, 7, 0, 0, 0, 3, 192, 255, 240, + 195, 140, 15, 0, 148, 31, 0, 255, 96, 0, 0, 0, 5, 0, 0, 0, + 15, 224, 0, 0, 159, 31, 0, 0, 0, 2, 0, 0, 126, 1, 0, 0, + 4, 30, 0, 0, 255, 255, 223, 255, 255, 255, 255, 223, 100, 222, 255, 235, + 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, + 63, 255, 255, 255, 255, 207, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, + 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, 0, 0, 3, 0, +}; + +/* Math: 538 bytes. */ + +RE_UINT32 re_get_math(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_math_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_math_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_math_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_math_stage_4[pos + f] << 5; + pos += code; + value = (re_math_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Alphabetic. */ + +static RE_UINT8 re_alphabetic_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, +}; + +static RE_UINT8 re_alphabetic_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, + 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +}; + +static RE_UINT8 re_alphabetic_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, + 29, 30, 31, 31, 32, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 31, + 36, 37, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39, + 1, 1, 1, 1, 40, 1, 41, 42, 43, 44, 45, 46, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 47, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 1, 48, 49, 1, 50, 51, 52, 53, 54, 55, 56, 57, 31, 31, 31, + 58, 59, 60, 61, 62, 31, 31, 31, 63, 64, 31, 31, 31, 31, 65, 31, + 1, 1, 1, 66, 67, 31, 31, 31, 1, 1, 1, 1, 68, 31, 31, 31, + 1, 1, 69, 31, 31, 31, 31, 70, 71, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 72, 73, 74, 75, 31, 31, 31, 31, 31, 31, 76, 31, + 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, 1, 1, 1, 1, 78, + 79, 31, 31, 31, 31, 31, 31, 31, 1, 1, 79, 31, 31, 31, 31, 31, +}; + +static RE_UINT8 re_alphabetic_stage_4[] = { + 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 5, 6, 0, 0, 7, 8, 9, 10, 4, 11, + 4, 4, 4, 4, 12, 4, 4, 4, 4, 13, 14, 15, 16, 17, 18, 19, + 20, 4, 21, 22, 4, 4, 23, 24, 25, 4, 26, 4, 4, 27, 28, 29, + 30, 31, 32, 0, 0, 33, 0, 34, 4, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 38, 47, 50, 51, 52, 53, 54, 0, + 55, 56, 57, 49, 58, 56, 59, 60, 58, 61, 62, 63, 64, 65, 66, 67, + 15, 68, 69, 0, 70, 71, 72, 0, 73, 0, 74, 75, 76, 77, 0, 0, + 4, 78, 25, 79, 80, 4, 81, 82, 4, 4, 83, 4, 84, 85, 86, 4, + 87, 4, 88, 0, 89, 4, 4, 90, 15, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 91, 1, 4, 4, 92, 93, 94, 94, 95, 4, 96, 97, 0, + 0, 4, 4, 98, 4, 99, 4, 100, 77, 101, 25, 102, 4, 103, 104, 0, + 105, 4, 106, 107, 0, 108, 0, 0, 4, 109, 110, 0, 4, 111, 4, 112, + 4, 100, 113, 114, 0, 0, 0, 115, 4, 4, 4, 4, 4, 4, 0, 0, + 116, 4, 117, 114, 4, 118, 119, 120, 0, 0, 0, 121, 122, 0, 0, 0, + 123, 124, 125, 4, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 127, 4, 104, 4, 128, 106, 4, 4, 4, 4, 129, + 4, 81, 4, 130, 131, 132, 132, 4, 0, 133, 0, 0, 0, 0, 0, 0, + 134, 135, 15, 4, 136, 15, 4, 82, 137, 138, 4, 4, 139, 68, 0, 25, + 4, 4, 4, 4, 4, 100, 0, 0, 4, 4, 4, 4, 4, 4, 31, 0, + 4, 4, 4, 4, 31, 0, 25, 114, 140, 141, 4, 142, 143, 4, 4, 89, + 144, 145, 4, 4, 146, 147, 0, 148, 149, 16, 4, 94, 4, 4, 49, 150, + 28, 99, 151, 77, 4, 152, 133, 0, 4, 131, 153, 154, 4, 106, 155, 156, + 157, 158, 0, 0, 0, 0, 4, 147, 4, 4, 4, 4, 4, 159, 160, 105, + 4, 4, 4, 161, 4, 4, 162, 0, 163, 164, 165, 4, 4, 27, 166, 4, + 4, 114, 25, 4, 167, 4, 16, 168, 0, 0, 0, 169, 4, 4, 4, 77, + 0, 1, 1, 170, 4, 106, 171, 0, 172, 173, 174, 0, 4, 4, 4, 68, + 0, 0, 4, 90, 0, 0, 0, 0, 0, 0, 0, 0, 77, 4, 175, 0, + 106, 25, 147, 0, 114, 4, 176, 0, 4, 4, 4, 4, 114, 0, 0, 0, + 177, 178, 100, 0, 0, 0, 0, 0, 100, 162, 0, 0, 4, 179, 0, 0, + 180, 94, 0, 77, 0, 0, 0, 0, 4, 100, 100, 151, 0, 0, 0, 0, + 4, 4, 126, 0, 0, 0, 0, 0, 4, 4, 181, 0, 145, 32, 25, 126, + 4, 151, 0, 0, 4, 4, 182, 0, 0, 0, 0, 0, 4, 100, 0, 0, + 4, 4, 4, 139, 0, 0, 0, 0, 4, 4, 4, 183, 0, 0, 0, 0, + 4, 139, 0, 0, 0, 0, 0, 0, 4, 32, 0, 0, 0, 0, 0, 0, + 4, 4, 184, 106, 166, 0, 0, 0, 185, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 186, 4, 187, 188, 189, 4, 190, 191, 192, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 193, 194, 82, 186, 186, 128, 128, 195, 195, 196, 0, + 189, 197, 198, 199, 200, 201, 0, 0, 4, 4, 4, 4, 4, 4, 131, 0, + 4, 90, 4, 4, 4, 4, 4, 4, 114, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_alphabetic_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, + 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 32, 0, 0, 0, + 0, 0, 223, 60, 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, + 3, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, + 255, 0, 0, 0, 0, 0, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, + 0, 0, 255, 7, 255, 255, 255, 254, 0, 192, 255, 255, 255, 255, 239, 31, + 254, 225, 0, 156, 0, 0, 255, 255, 0, 224, 255, 255, 255, 255, 3, 0, + 0, 252, 255, 255, 255, 7, 48, 4, 255, 255, 255, 252, 255, 31, 0, 0, + 255, 255, 255, 1, 253, 31, 0, 0, 240, 3, 255, 127, 255, 255, 255, 239, + 255, 223, 225, 255, 15, 0, 254, 254, 238, 159, 249, 255, 255, 253, 197, 227, + 159, 89, 128, 176, 15, 0, 3, 0, 238, 135, 249, 255, 255, 253, 109, 195, + 135, 25, 2, 94, 0, 0, 63, 0, 238, 191, 251, 255, 255, 253, 237, 227, + 191, 27, 1, 0, 15, 0, 0, 0, 159, 25, 192, 176, 15, 0, 2, 0, + 236, 199, 61, 214, 24, 199, 255, 195, 199, 29, 129, 0, 238, 223, 253, 255, + 255, 253, 239, 227, 223, 29, 96, 3, 236, 223, 253, 255, 223, 29, 96, 64, + 15, 0, 6, 0, 255, 255, 255, 231, 223, 93, 128, 0, 15, 0, 0, 252, + 236, 255, 127, 252, 255, 255, 251, 47, 127, 128, 95, 255, 0, 0, 12, 0, + 255, 255, 255, 7, 127, 32, 0, 0, 150, 37, 240, 254, 174, 236, 255, 59, + 95, 32, 0, 240, 1, 0, 0, 0, 255, 254, 255, 255, 255, 31, 254, 255, + 3, 255, 255, 254, 255, 255, 255, 31, 255, 255, 127, 249, 231, 193, 255, 255, + 127, 64, 0, 48, 191, 32, 255, 255, 255, 255, 255, 247, 255, 61, 127, 61, + 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, 255, 255, 61, 255, + 255, 255, 255, 135, 255, 255, 0, 0, 255, 255, 31, 0, 255, 159, 255, 255, + 255, 199, 1, 0, 255, 223, 15, 0, 255, 255, 15, 0, 255, 223, 13, 0, + 255, 255, 207, 255, 255, 1, 128, 16, 255, 255, 255, 0, 255, 7, 255, 255, + 255, 255, 63, 0, 255, 15, 255, 1, 255, 63, 31, 0, 255, 15, 255, 255, + 255, 3, 0, 0, 255, 255, 255, 15, 255, 255, 255, 127, 254, 255, 31, 0, + 128, 0, 0, 0, 255, 255, 239, 255, 239, 15, 0, 0, 255, 243, 0, 252, + 191, 255, 3, 0, 0, 224, 0, 252, 255, 255, 255, 63, 0, 222, 111, 0, + 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, + 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, 132, 252, 47, 62, + 80, 189, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, 0, 0, 192, 255, + 255, 127, 255, 255, 31, 120, 12, 0, 255, 128, 0, 0, 255, 255, 127, 0, + 127, 127, 127, 127, 0, 128, 0, 0, 224, 0, 0, 0, 254, 3, 62, 31, + 255, 255, 127, 224, 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, + 255, 31, 255, 255, 0, 12, 0, 0, 255, 127, 240, 143, 255, 255, 255, 128, + 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, 255, 7, 0, 0, + 0, 0, 0, 255, 187, 247, 255, 255, 0, 0, 252, 8, 255, 255, 7, 0, + 255, 255, 247, 255, 255, 63, 0, 0, 255, 255, 127, 4, 5, 0, 0, 56, + 255, 255, 60, 0, 126, 126, 126, 0, 127, 127, 0, 0, 15, 0, 255, 255, + 127, 248, 255, 255, 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 224, + 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, + 0, 0, 255, 15, 0, 0, 223, 255, 192, 255, 255, 255, 252, 252, 252, 28, + 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, 255, 255, 1, 0, + 15, 255, 62, 0, 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, + 111, 240, 239, 254, 63, 0, 0, 0, 30, 0, 0, 0, 7, 0, 0, 0, + 31, 0, 255, 255, 3, 0, 0, 0, 255, 255, 223, 255, 255, 255, 255, 223, + 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, + 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, + 247, 15, 0, 0, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, + 255, 251, 255, 15, 238, 251, 255, 15, +}; + +/* Alphabetic: 1817 bytes. */ + +RE_UINT32 re_get_alphabetic(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_alphabetic_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_alphabetic_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_alphabetic_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_alphabetic_stage_4[pos + f] << 5; + pos += code; + value = (re_alphabetic_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Lowercase. */ + +static RE_UINT8 re_lowercase_stage_1[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, +}; + +static RE_UINT8 re_lowercase_stage_2[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, + 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static RE_UINT8 re_lowercase_stage_3[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 5, + 6, 3, 7, 3, 3, 3, 8, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 10, 3, 11, + 3, 3, 12, 3, 3, 3, 3, 3, 3, 3, 13, 14, 3, 3, 3, 3, +}; + +static RE_UINT8 re_lowercase_stage_4[] = { + 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 5, 13, 14, 15, 16, 17, 18, 19, 0, 0, 20, 21, 22, 23, 24, 25, + 0, 26, 15, 5, 27, 5, 28, 5, 5, 29, 0, 30, 31, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 15, 0, 0, + 5, 5, 5, 5, 32, 5, 5, 5, 33, 34, 35, 36, 34, 37, 38, 39, + 0, 0, 0, 40, 41, 0, 0, 0, 42, 43, 44, 26, 45, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 26, 46, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 26, 47, 48, 5, 5, 5, 49, 15, 50, 0, 0, 0, 0, 0, 0, + 0, 0, 5, 51, 52, 0, 0, 0, 0, 53, 5, 54, 55, 56, 0, 57, + 0, 0, 0, 0, 0, 0, 0, 0, 58, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 59, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 61, 62, 63, 31, 64, 65, 66, 67, 68, 69, 70, 71, 72, 61, 62, 73, + 31, 64, 74, 60, 67, 75, 76, 77, 78, 74, 79, 26, 80, 67, 81, 0, +}; + +static RE_UINT8 re_lowercase_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 0, 0, 0, 128, + 255, 255, 127, 255, 170, 170, 170, 170, 170, 170, 170, 85, 85, 171, 170, 170, + 170, 170, 170, 212, 41, 49, 36, 78, 42, 45, 81, 230, 64, 82, 85, 181, + 170, 170, 41, 170, 170, 170, 250, 147, 133, 170, 255, 255, 255, 255, 255, 255, + 255, 255, 239, 255, 255, 255, 255, 1, 3, 0, 0, 0, 31, 0, 0, 0, + 32, 0, 0, 0, 0, 0, 138, 60, 0, 0, 1, 0, 0, 240, 255, 255, + 255, 127, 227, 170, 170, 170, 47, 25, 0, 0, 255, 255, 2, 168, 170, 170, + 84, 213, 170, 170, 170, 0, 0, 0, 254, 255, 255, 255, 255, 0, 0, 0, + 170, 170, 234, 191, 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 255, 0, + 255, 0, 255, 63, 255, 0, 223, 64, 220, 0, 207, 0, 255, 0, 220, 0, + 0, 0, 2, 128, 0, 0, 255, 31, 0, 196, 8, 0, 0, 128, 16, 50, + 192, 67, 0, 0, 16, 0, 0, 0, 255, 3, 0, 0, 255, 255, 255, 127, + 98, 21, 218, 63, 26, 80, 8, 0, 191, 32, 0, 0, 170, 42, 0, 0, + 170, 170, 170, 0, 168, 170, 171, 170, 170, 170, 255, 149, 170, 80, 10, 0, + 170, 2, 0, 0, 0, 0, 0, 7, 127, 0, 248, 0, 0, 255, 255, 255, + 255, 255, 0, 0, 0, 0, 0, 252, 255, 255, 15, 0, 0, 192, 223, 255, + 252, 255, 255, 15, 0, 0, 192, 235, 239, 255, 0, 0, 0, 252, 255, 255, + 15, 0, 0, 192, 255, 255, 255, 0, 0, 0, 252, 255, 255, 15, 0, 0, + 192, 255, 255, 255, 0, 192, 255, 255, 0, 0, 192, 255, 63, 0, 0, 0, + 252, 255, 255, 247, 3, 0, 0, 240, 255, 255, 223, 15, 255, 127, 63, 0, + 255, 253, 0, 0, 247, 11, 0, 0, +}; + +/* Lowercase: 697 bytes. */ + +RE_UINT32 re_get_lowercase(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_lowercase_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_lowercase_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_lowercase_stage_3[pos + f] << 4; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_lowercase_stage_4[pos + f] << 5; + pos += code; + value = (re_lowercase_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Uppercase. */ + +static RE_UINT8 re_uppercase_stage_1[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, +}; + +static RE_UINT8 re_uppercase_stage_2[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, + 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static RE_UINT8 re_uppercase_stage_3[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 5, + 6, 3, 7, 3, 3, 3, 8, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 10, + 3, 3, 11, 3, 3, 3, 3, 3, 3, 3, 12, 13, 3, 3, 3, 3, +}; + +static RE_UINT8 re_uppercase_stage_4[] = { + 0, 0, 1, 0, 0, 0, 2, 0, 3, 4, 5, 6, 7, 8, 9, 10, + 3, 11, 12, 0, 0, 0, 0, 0, 0, 0, 0, 13, 14, 15, 16, 17, + 18, 19, 0, 3, 20, 3, 21, 3, 3, 22, 23, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 18, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 3, 25, 3, 3, 3, 26, 27, 28, 29, 0, 30, 31, 32, + 0, 0, 0, 0, 0, 0, 0, 0, 33, 34, 35, 19, 36, 0, 0, 0, + 0, 0, 0, 0, 0, 37, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 18, 38, 0, 39, 3, 3, 3, 40, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 41, 42, 0, 0, 0, 0, 43, 3, 44, 45, 46, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 18, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 48, 49, 50, + 51, 61, 62, 54, 55, 51, 63, 64, 65, 66, 37, 38, 54, 67, 68, 0, +}; + +static RE_UINT8 re_uppercase_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 255, 255, 127, 127, 85, 85, 85, 85, + 85, 85, 85, 170, 170, 84, 85, 85, 85, 85, 85, 43, 214, 206, 219, 177, + 213, 210, 174, 17, 144, 164, 170, 74, 85, 85, 210, 85, 85, 85, 5, 108, + 122, 85, 0, 0, 0, 0, 69, 0, 64, 215, 254, 255, 251, 15, 0, 0, + 0, 128, 28, 85, 85, 85, 144, 230, 255, 255, 255, 255, 255, 255, 0, 0, + 1, 84, 85, 85, 171, 42, 85, 85, 85, 0, 254, 255, 255, 255, 127, 0, + 191, 32, 0, 0, 85, 85, 21, 64, 0, 255, 0, 63, 0, 255, 0, 255, + 0, 63, 0, 170, 0, 255, 0, 0, 0, 0, 0, 15, 0, 15, 0, 15, + 0, 31, 0, 15, 132, 56, 39, 62, 80, 61, 15, 192, 32, 0, 0, 0, + 8, 0, 0, 0, 0, 0, 192, 255, 255, 127, 0, 0, 157, 234, 37, 192, + 5, 40, 4, 0, 85, 21, 0, 0, 85, 85, 85, 0, 84, 85, 84, 85, + 85, 85, 0, 106, 85, 40, 5, 0, 85, 5, 0, 0, 255, 0, 0, 0, + 255, 255, 255, 3, 0, 0, 240, 255, 255, 63, 0, 0, 0, 255, 255, 255, + 3, 0, 0, 208, 100, 222, 63, 0, 0, 0, 255, 255, 255, 3, 0, 0, + 176, 231, 223, 31, 0, 0, 0, 123, 95, 252, 1, 0, 0, 240, 255, 255, + 63, 0, 0, 0, 3, 0, 0, 240, 255, 255, 63, 0, 1, 0, 0, 0, + 252, 255, 255, 7, 0, 0, 0, 240, 255, 255, 31, 0, 255, 1, 0, 0, + 0, 4, 0, 0, +}; + +/* Uppercase: 629 bytes. */ + +RE_UINT32 re_get_uppercase(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_uppercase_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_uppercase_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_uppercase_stage_3[pos + f] << 4; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_uppercase_stage_4[pos + f] << 5; + pos += code; + value = (re_uppercase_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Cased. */ + +static RE_UINT8 re_cased_stage_1[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, +}; + +static RE_UINT8 re_cased_stage_2[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, + 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static RE_UINT8 re_cased_stage_3[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 5, 6, + 7, 3, 8, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 11, 3, 12, + 3, 3, 13, 3, 3, 3, 3, 3, 3, 3, 14, 15, 3, 3, 3, 3, +}; + +static RE_UINT8 re_cased_stage_4[] = { + 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 5, 6, 4, + 4, 4, 4, 4, 7, 8, 9, 10, 0, 0, 11, 12, 13, 14, 4, 15, + 4, 4, 4, 4, 16, 4, 4, 4, 4, 17, 18, 19, 20, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 4, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, + 4, 4, 4, 4, 4, 4, 4, 4, 22, 4, 23, 24, 4, 25, 26, 27, + 0, 0, 0, 28, 29, 0, 0, 0, 30, 31, 32, 4, 33, 0, 0, 0, + 0, 0, 0, 0, 0, 34, 4, 35, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 36, 37, 4, 4, 4, 4, 38, 4, 21, 0, 0, 0, 0, 0, 0, + 0, 0, 4, 39, 40, 0, 0, 0, 0, 41, 4, 4, 42, 43, 0, 44, + 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, + 4, 4, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 47, 4, 48, 49, 50, 4, 51, 52, 53, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 54, 55, 5, 47, 47, 36, 36, 56, 56, 57, 0, +}; + +static RE_UINT8 re_cased_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, + 255, 255, 255, 255, 255, 255, 255, 247, 240, 255, 255, 255, 255, 255, 239, 255, + 255, 255, 255, 1, 3, 0, 0, 0, 31, 0, 0, 0, 32, 0, 0, 0, + 0, 0, 207, 60, 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, + 3, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 0, 254, 255, 255, 255, + 255, 0, 0, 0, 191, 32, 0, 0, 255, 255, 63, 63, 63, 63, 255, 170, + 255, 255, 255, 63, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, + 0, 0, 2, 128, 0, 0, 255, 31, 132, 252, 47, 62, 80, 189, 31, 242, + 224, 67, 0, 0, 24, 0, 0, 0, 0, 0, 192, 255, 255, 3, 0, 0, + 255, 127, 255, 255, 255, 255, 255, 127, 31, 120, 12, 0, 255, 63, 0, 0, + 255, 255, 255, 0, 252, 255, 255, 255, 255, 120, 15, 0, 255, 7, 0, 0, + 0, 0, 0, 7, 127, 0, 248, 0, 255, 255, 0, 0, 255, 255, 223, 255, + 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, + 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, + 255, 253, 255, 255, 247, 15, 0, 0, +}; + +/* Cased: 617 bytes. */ + +RE_UINT32 re_get_cased(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_cased_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_cased_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_cased_stage_3[pos + f] << 4; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_cased_stage_4[pos + f] << 5; + pos += code; + value = (re_cased_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Case_Ignorable. */ + +static RE_UINT8 re_case_ignorable_stage_1[] = { + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, + 4, 4, +}; + +static RE_UINT8 re_case_ignorable_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 8, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, + 11, 12, 13, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 14, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 15, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 16, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +}; + +static RE_UINT8 re_case_ignorable_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 1, 17, 1, 1, 1, 18, 19, 20, 21, 22, 23, 24, 1, 25, + 26, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 28, 29, 1, + 30, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 31, 1, 1, 1, 32, 1, 33, 34, 35, 36, 37, 38, 1, 1, 1, 1, + 1, 1, 1, 39, 1, 1, 40, 41, 1, 42, 1, 1, 1, 1, 1, 1, + 1, 1, 43, 1, 1, 1, 1, 1, 44, 45, 1, 1, 1, 1, 46, 1, + 1, 1, 1, 1, 1, 1, 1, 47, 1, 48, 49, 1, 1, 1, 1, 1, + 50, 51, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_case_ignorable_stage_4[] = { + 0, 1, 2, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 5, 6, 6, 6, 6, 6, 7, 8, 0, 0, 0, + 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 10, 0, 11, 12, 13, 14, + 15, 0, 16, 17, 0, 0, 18, 19, 20, 5, 21, 0, 0, 22, 0, 23, + 24, 25, 26, 0, 0, 0, 0, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 33, 37, 38, 36, 33, 39, 35, 32, 40, 41, 35, 42, 0, 43, 0, + 0, 44, 45, 35, 0, 40, 46, 35, 0, 0, 34, 35, 0, 0, 47, 0, + 0, 48, 49, 0, 0, 50, 51, 0, 52, 53, 0, 54, 55, 56, 57, 0, + 0, 58, 59, 60, 61, 0, 0, 33, 0, 0, 62, 0, 0, 0, 0, 0, + 63, 63, 64, 64, 0, 65, 66, 0, 67, 0, 68, 0, 0, 69, 0, 0, + 0, 70, 0, 0, 0, 0, 0, 0, 71, 0, 72, 73, 0, 74, 0, 0, + 75, 76, 42, 77, 78, 79, 0, 80, 0, 81, 0, 82, 0, 0, 83, 84, + 0, 85, 6, 86, 87, 6, 6, 88, 0, 0, 0, 0, 0, 89, 90, 91, + 92, 93, 0, 94, 95, 0, 5, 96, 0, 0, 0, 97, 0, 0, 0, 98, + 0, 0, 0, 99, 0, 0, 0, 6, 0, 100, 0, 0, 0, 0, 0, 0, + 101, 102, 0, 0, 103, 0, 0, 104, 105, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 82, 106, 0, 0, 107, 108, 0, 0, 109, + 6, 78, 0, 17, 110, 0, 0, 52, 111, 112, 0, 0, 0, 0, 113, 114, + 0, 115, 116, 0, 28, 117, 100, 0, 0, 118, 119, 17, 0, 120, 121, 122, + 0, 0, 0, 0, 0, 0, 0, 123, 2, 0, 0, 0, 0, 124, 78, 0, + 125, 126, 127, 0, 0, 0, 0, 108, 1, 2, 3, 17, 44, 0, 0, 128, + 0, 0, 0, 0, 0, 0, 0, 129, 130, 131, 0, 0, 0, 0, 0, 0, + 32, 132, 126, 0, 78, 133, 0, 0, 28, 134, 0, 0, 78, 135, 0, 0, + 0, 0, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 137, 0, 0, 0, + 0, 0, 0, 138, 139, 140, 0, 0, 0, 0, 141, 0, 0, 0, 0, 0, + 32, 6, 6, 6, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 142, +}; + +static RE_UINT8 re_case_ignorable_stage_5[] = { + 0, 0, 0, 0, 128, 64, 0, 4, 0, 0, 0, 64, 1, 0, 0, 0, + 0, 161, 144, 1, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, 48, 4, + 176, 0, 0, 0, 248, 3, 0, 0, 0, 0, 0, 2, 0, 0, 254, 255, + 255, 255, 255, 191, 182, 0, 0, 0, 0, 0, 16, 0, 31, 0, 255, 23, + 1, 248, 255, 255, 0, 0, 1, 0, 0, 0, 192, 191, 255, 61, 0, 0, + 0, 128, 2, 0, 255, 7, 0, 0, 192, 255, 1, 0, 0, 248, 63, 4, + 0, 0, 192, 255, 255, 63, 0, 0, 0, 0, 0, 14, 240, 255, 255, 127, + 7, 0, 0, 0, 0, 0, 0, 20, 254, 33, 254, 0, 12, 0, 2, 0, + 2, 0, 0, 0, 0, 0, 0, 16, 30, 32, 0, 0, 12, 0, 0, 0, + 6, 0, 0, 0, 134, 57, 2, 0, 0, 0, 35, 0, 190, 33, 0, 0, + 0, 0, 0, 144, 30, 32, 64, 0, 4, 0, 0, 0, 1, 32, 0, 0, + 0, 0, 0, 192, 193, 61, 96, 0, 64, 48, 0, 0, 0, 4, 92, 0, + 0, 0, 242, 7, 192, 127, 0, 0, 0, 0, 242, 27, 64, 63, 0, 0, + 0, 0, 0, 3, 0, 0, 160, 2, 0, 0, 254, 127, 223, 224, 255, 254, + 255, 255, 255, 31, 64, 0, 0, 0, 0, 224, 253, 102, 0, 0, 0, 195, + 1, 0, 30, 0, 100, 32, 0, 32, 0, 0, 0, 224, 0, 0, 28, 0, + 0, 0, 12, 0, 0, 0, 176, 63, 64, 254, 143, 32, 0, 120, 0, 0, + 8, 0, 0, 0, 0, 2, 0, 0, 135, 1, 4, 14, 0, 0, 128, 9, + 0, 0, 64, 127, 229, 31, 248, 159, 128, 0, 0, 0, 15, 0, 0, 0, + 0, 0, 208, 23, 0, 248, 15, 0, 3, 0, 0, 0, 60, 11, 0, 0, + 64, 163, 3, 0, 0, 240, 207, 0, 0, 0, 0, 63, 0, 0, 247, 255, + 253, 33, 16, 0, 0, 240, 255, 255, 255, 7, 0, 1, 0, 0, 0, 248, + 127, 0, 0, 240, 0, 0, 0, 160, 3, 224, 0, 224, 0, 224, 0, 96, + 0, 248, 0, 3, 144, 124, 0, 0, 223, 255, 2, 128, 0, 0, 255, 31, + 255, 255, 1, 0, 0, 0, 0, 48, 0, 128, 3, 0, 0, 128, 0, 128, + 0, 128, 0, 0, 32, 0, 0, 0, 0, 60, 62, 8, 0, 0, 0, 126, + 0, 0, 0, 112, 0, 0, 32, 0, 0, 16, 0, 0, 0, 128, 247, 191, + 0, 0, 0, 128, 0, 0, 3, 0, 0, 7, 0, 0, 68, 8, 0, 0, + 96, 0, 0, 0, 16, 0, 0, 0, 255, 255, 3, 0, 192, 63, 0, 0, + 128, 255, 3, 0, 0, 0, 200, 19, 0, 126, 102, 0, 8, 16, 0, 0, + 0, 0, 157, 193, 2, 0, 0, 32, 0, 48, 88, 0, 32, 33, 0, 0, + 0, 0, 252, 255, 255, 255, 8, 0, 127, 0, 0, 0, 0, 0, 36, 0, + 8, 0, 0, 14, 0, 0, 0, 32, 110, 240, 0, 0, 0, 0, 0, 135, + 0, 0, 0, 255, 0, 0, 120, 38, 128, 239, 31, 0, 0, 0, 192, 127, + 0, 40, 191, 0, 0, 128, 255, 255, 128, 3, 248, 255, 231, 15, 0, 0, + 0, 60, 0, 0, 28, 0, 0, 0, 255, 255, 0, 0, +}; + +/* Case_Ignorable: 1254 bytes. */ + +RE_UINT32 re_get_case_ignorable(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_case_ignorable_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_case_ignorable_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_case_ignorable_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_case_ignorable_stage_4[pos + f] << 5; + pos += code; + value = (re_case_ignorable_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Changes_When_Lowercased. */ + +static RE_UINT8 re_changes_when_lowercased_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, +}; + +static RE_UINT8 re_changes_when_lowercased_stage_2[] = { + 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, + 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_changes_when_lowercased_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, + 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, 6, 6, 6, 6, 6, 15, + 6, 6, 6, 6, 16, 6, 6, 6, +}; + +static RE_UINT8 re_changes_when_lowercased_stage_4[] = { + 0, 0, 1, 0, 0, 0, 2, 0, 3, 4, 5, 6, 7, 8, 9, 10, + 3, 11, 12, 0, 0, 0, 0, 0, 0, 0, 0, 13, 14, 15, 16, 17, + 18, 19, 0, 3, 20, 3, 21, 3, 3, 22, 23, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 24, 0, + 3, 3, 3, 3, 25, 3, 3, 3, 26, 27, 28, 29, 27, 30, 31, 32, + 0, 33, 0, 19, 34, 0, 0, 0, 0, 0, 0, 0, 0, 35, 19, 0, + 18, 36, 0, 37, 3, 3, 3, 38, 0, 0, 3, 39, 40, 0, 0, 0, + 0, 41, 3, 42, 43, 44, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 18, 45, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_changes_when_lowercased_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 255, 255, 127, 127, 85, 85, 85, 85, + 85, 85, 85, 170, 170, 84, 85, 85, 85, 85, 85, 43, 214, 206, 219, 177, + 213, 210, 174, 17, 176, 173, 170, 74, 85, 85, 214, 85, 85, 85, 5, 108, + 122, 85, 0, 0, 0, 0, 69, 0, 64, 215, 254, 255, 251, 15, 0, 0, + 0, 128, 0, 85, 85, 85, 144, 230, 255, 255, 255, 255, 255, 255, 0, 0, + 1, 84, 85, 85, 171, 42, 85, 85, 85, 0, 254, 255, 255, 255, 127, 0, + 191, 32, 0, 0, 85, 85, 21, 64, 0, 255, 0, 63, 0, 255, 0, 255, + 0, 63, 0, 170, 0, 255, 0, 0, 0, 255, 0, 31, 0, 31, 0, 15, + 0, 31, 0, 31, 64, 12, 4, 0, 8, 0, 0, 0, 0, 0, 192, 255, + 255, 127, 0, 0, 157, 234, 37, 192, 5, 40, 4, 0, 85, 21, 0, 0, + 85, 85, 85, 0, 84, 85, 84, 85, 85, 85, 0, 106, 85, 40, 5, 0, + 85, 5, 0, 0, 255, 0, 0, 0, +}; + +/* Changes_When_Lowercased: 490 bytes. */ + +RE_UINT32 re_get_changes_when_lowercased(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_changes_when_lowercased_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_changes_when_lowercased_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_changes_when_lowercased_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_changes_when_lowercased_stage_4[pos + f] << 5; + pos += code; + value = (re_changes_when_lowercased_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Changes_When_Uppercased. */ + +static RE_UINT8 re_changes_when_uppercased_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, +}; + +static RE_UINT8 re_changes_when_uppercased_stage_2[] = { + 0, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, + 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_changes_when_uppercased_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 7, 8, 9, 6, 10, 6, 6, 11, 6, 6, 6, + 6, 6, 6, 6, 12, 13, 6, 6, 6, 6, 6, 6, 6, 6, 14, 15, + 6, 6, 6, 16, 6, 6, 6, 17, 6, 6, 6, 6, 18, 6, 6, 6, +}; + +static RE_UINT8 re_changes_when_uppercased_stage_4[] = { + 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 5, 13, 14, 15, 16, 0, 0, 0, 0, 0, 17, 18, 19, 20, 21, 22, + 0, 23, 24, 5, 25, 5, 26, 5, 5, 27, 0, 28, 29, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, + 5, 5, 5, 5, 31, 5, 5, 5, 32, 33, 34, 35, 24, 36, 37, 38, + 0, 0, 39, 23, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 41, + 0, 23, 42, 43, 5, 5, 5, 44, 24, 45, 0, 0, 0, 0, 0, 0, + 0, 0, 5, 46, 47, 0, 0, 0, 0, 48, 5, 49, 50, 51, 0, 0, + 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 53, 54, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_changes_when_uppercased_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 0, 0, 0, 128, + 255, 255, 127, 255, 170, 170, 170, 170, 170, 170, 170, 84, 85, 171, 170, 170, + 170, 170, 170, 212, 41, 17, 36, 70, 42, 33, 81, 162, 96, 91, 85, 181, + 170, 170, 45, 170, 168, 170, 10, 144, 133, 170, 223, 10, 105, 139, 38, 32, + 9, 31, 4, 0, 32, 0, 0, 0, 0, 0, 138, 56, 0, 0, 1, 0, + 0, 240, 255, 255, 255, 127, 227, 170, 170, 170, 39, 9, 0, 0, 255, 255, + 255, 255, 255, 255, 2, 168, 170, 170, 84, 213, 170, 170, 170, 0, 0, 0, + 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 34, 170, 170, 234, 15, + 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 255, 0, 255, 0, 255, 63, + 255, 255, 223, 80, 220, 16, 207, 0, 255, 0, 220, 16, 0, 64, 0, 0, + 16, 0, 0, 0, 255, 3, 0, 0, 255, 255, 255, 127, 98, 21, 72, 0, + 10, 80, 8, 0, 191, 32, 0, 0, 170, 42, 0, 0, 170, 170, 170, 0, + 168, 170, 168, 170, 170, 170, 0, 148, 170, 16, 10, 0, 170, 2, 0, 0, + 127, 0, 248, 0, 0, 255, 255, 255, 255, 255, 0, 0, +}; + +/* Changes_When_Uppercased: 534 bytes. */ + +RE_UINT32 re_get_changes_when_uppercased(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_changes_when_uppercased_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_changes_when_uppercased_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_changes_when_uppercased_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_changes_when_uppercased_stage_4[pos + f] << 5; + pos += code; + value = (re_changes_when_uppercased_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Changes_When_Titlecased. */ + +static RE_UINT8 re_changes_when_titlecased_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, +}; + +static RE_UINT8 re_changes_when_titlecased_stage_2[] = { + 0, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, + 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_changes_when_titlecased_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 7, 8, 9, 6, 10, 6, 6, 11, 6, 6, 6, + 6, 6, 6, 6, 12, 13, 6, 6, 6, 6, 6, 6, 6, 6, 14, 15, + 6, 6, 6, 16, 6, 6, 6, 17, 6, 6, 6, 6, 18, 6, 6, 6, +}; + +static RE_UINT8 re_changes_when_titlecased_stage_4[] = { + 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 5, 13, 14, 15, 16, 0, 0, 0, 0, 0, 17, 18, 19, 20, 21, 22, + 0, 23, 24, 5, 25, 5, 26, 5, 5, 27, 0, 28, 29, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, + 5, 5, 5, 5, 31, 5, 5, 5, 32, 33, 34, 35, 33, 36, 37, 38, + 0, 0, 39, 23, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 41, + 0, 23, 42, 43, 5, 5, 5, 44, 24, 45, 0, 0, 0, 0, 0, 0, + 0, 0, 5, 46, 47, 0, 0, 0, 0, 48, 5, 49, 50, 51, 0, 0, + 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 53, 54, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_changes_when_titlecased_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 0, 0, 0, 128, + 255, 255, 127, 255, 170, 170, 170, 170, 170, 170, 170, 84, 85, 171, 170, 170, + 170, 170, 170, 212, 41, 17, 36, 70, 42, 33, 81, 162, 208, 86, 85, 181, + 170, 170, 43, 170, 168, 170, 10, 144, 133, 170, 223, 10, 105, 139, 38, 32, + 9, 31, 4, 0, 32, 0, 0, 0, 0, 0, 138, 56, 0, 0, 1, 0, + 0, 240, 255, 255, 255, 127, 227, 170, 170, 170, 39, 9, 0, 0, 255, 255, + 255, 255, 255, 255, 2, 168, 170, 170, 84, 213, 170, 170, 170, 0, 0, 0, + 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 34, 170, 170, 234, 15, + 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 255, 0, 255, 0, 255, 63, + 255, 0, 223, 64, 220, 0, 207, 0, 255, 0, 220, 0, 0, 64, 0, 0, + 16, 0, 0, 0, 255, 3, 0, 0, 255, 255, 255, 127, 98, 21, 72, 0, + 10, 80, 8, 0, 191, 32, 0, 0, 170, 42, 0, 0, 170, 170, 170, 0, + 168, 170, 168, 170, 170, 170, 0, 148, 170, 16, 10, 0, 170, 2, 0, 0, + 127, 0, 248, 0, 0, 255, 255, 255, 255, 255, 0, 0, +}; + +/* Changes_When_Titlecased: 534 bytes. */ + +RE_UINT32 re_get_changes_when_titlecased(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_changes_when_titlecased_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_changes_when_titlecased_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_changes_when_titlecased_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_changes_when_titlecased_stage_4[pos + f] << 5; + pos += code; + value = (re_changes_when_titlecased_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Changes_When_Casefolded. */ + +static RE_UINT8 re_changes_when_casefolded_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, +}; + +static RE_UINT8 re_changes_when_casefolded_stage_2[] = { + 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, + 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_changes_when_casefolded_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, + 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, 6, 15, 6, 6, 6, 16, + 6, 6, 6, 6, 17, 6, 6, 6, +}; + +static RE_UINT8 re_changes_when_casefolded_stage_4[] = { + 0, 0, 1, 0, 0, 2, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, + 4, 12, 13, 0, 0, 0, 0, 0, 0, 0, 14, 15, 16, 17, 18, 19, + 20, 21, 0, 4, 22, 4, 23, 4, 4, 24, 25, 0, 26, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 27, 0, + 4, 4, 4, 4, 28, 4, 4, 4, 29, 30, 31, 32, 20, 33, 34, 35, + 0, 36, 0, 21, 37, 0, 0, 0, 0, 0, 0, 0, 0, 38, 21, 0, + 20, 39, 0, 40, 4, 4, 4, 41, 0, 0, 4, 42, 43, 0, 0, 0, + 0, 44, 4, 45, 46, 47, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 0, 0, 0, 0, 20, 49, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_changes_when_casefolded_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 255, 255, 127, 255, + 85, 85, 85, 85, 85, 85, 85, 170, 170, 86, 85, 85, 85, 85, 85, 171, + 214, 206, 219, 177, 213, 210, 174, 17, 176, 173, 170, 74, 85, 85, 214, 85, + 85, 85, 5, 108, 122, 85, 0, 0, 32, 0, 0, 0, 0, 0, 69, 0, + 64, 215, 254, 255, 251, 15, 0, 0, 4, 128, 99, 85, 85, 85, 179, 230, + 255, 255, 255, 255, 255, 255, 0, 0, 1, 84, 85, 85, 171, 42, 85, 85, + 85, 0, 254, 255, 255, 255, 127, 0, 128, 0, 0, 0, 191, 32, 0, 0, + 85, 85, 21, 76, 0, 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 170, + 0, 255, 0, 0, 255, 255, 156, 31, 156, 31, 0, 15, 0, 31, 156, 31, + 64, 12, 4, 0, 8, 0, 0, 0, 0, 0, 192, 255, 255, 127, 0, 0, + 157, 234, 37, 192, 5, 40, 4, 0, 85, 21, 0, 0, 85, 85, 85, 0, + 84, 85, 84, 85, 85, 85, 0, 106, 85, 40, 5, 0, 85, 5, 0, 0, + 127, 0, 248, 0, 255, 0, 0, 0, +}; + +/* Changes_When_Casefolded: 514 bytes. */ + +RE_UINT32 re_get_changes_when_casefolded(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_changes_when_casefolded_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_changes_when_casefolded_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_changes_when_casefolded_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_changes_when_casefolded_stage_4[pos + f] << 5; + pos += code; + value = (re_changes_when_casefolded_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Changes_When_Casemapped. */ + +static RE_UINT8 re_changes_when_casemapped_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, +}; + +static RE_UINT8 re_changes_when_casemapped_stage_2[] = { + 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, + 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_changes_when_casemapped_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, 10, + 6, 11, 6, 6, 12, 6, 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, + 6, 6, 6, 6, 6, 6, 15, 16, 6, 6, 6, 17, 6, 6, 6, 18, + 6, 6, 6, 6, 19, 6, 6, 6, +}; + +static RE_UINT8 re_changes_when_casemapped_stage_4[] = { + 0, 0, 1, 1, 0, 2, 3, 3, 4, 5, 4, 4, 6, 7, 8, 4, + 4, 9, 10, 11, 12, 0, 0, 0, 0, 0, 13, 14, 15, 16, 17, 18, + 4, 4, 4, 4, 19, 4, 4, 4, 4, 20, 21, 22, 23, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 24, 0, + 0, 0, 0, 25, 0, 0, 0, 0, 4, 4, 4, 4, 26, 4, 4, 4, + 27, 4, 28, 29, 4, 30, 31, 32, 0, 33, 34, 4, 35, 0, 0, 0, + 0, 0, 0, 0, 0, 36, 4, 37, 4, 38, 39, 40, 4, 4, 4, 41, + 4, 24, 0, 0, 0, 0, 0, 0, 0, 0, 4, 42, 43, 0, 0, 0, + 0, 44, 4, 45, 46, 47, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 0, 0, 0, 0, 0, 4, 4, 49, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_changes_when_casemapped_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 255, 255, 127, 255, + 255, 255, 255, 255, 255, 255, 255, 254, 255, 223, 255, 247, 255, 243, 255, 179, + 240, 255, 255, 255, 253, 255, 15, 252, 255, 255, 223, 10, 105, 139, 38, 32, + 9, 31, 4, 0, 32, 0, 0, 0, 0, 0, 207, 56, 64, 215, 255, 255, + 251, 255, 255, 255, 255, 255, 227, 255, 255, 255, 183, 239, 3, 252, 255, 255, + 255, 0, 254, 255, 255, 255, 127, 0, 254, 255, 255, 255, 255, 0, 0, 0, + 191, 32, 0, 0, 0, 0, 0, 34, 255, 255, 255, 79, 255, 255, 63, 63, + 63, 63, 255, 170, 255, 255, 255, 63, 255, 255, 223, 95, 220, 31, 207, 15, + 255, 31, 220, 31, 64, 12, 4, 0, 0, 64, 0, 0, 24, 0, 0, 0, + 0, 0, 192, 255, 255, 3, 0, 0, 255, 127, 255, 255, 255, 255, 255, 127, + 255, 255, 109, 192, 15, 120, 12, 0, 255, 63, 0, 0, 255, 255, 255, 0, + 252, 255, 252, 255, 255, 255, 0, 254, 255, 56, 15, 0, 255, 7, 0, 0, + 127, 0, 248, 0, 255, 255, 0, 0, +}; + +/* Changes_When_Casemapped: 530 bytes. */ + +RE_UINT32 re_get_changes_when_casemapped(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_changes_when_casemapped_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_changes_when_casemapped_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_changes_when_casemapped_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_changes_when_casemapped_stage_4[pos + f] << 5; + pos += code; + value = (re_changes_when_casemapped_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* ID_Start. */ + +static RE_UINT8 re_id_start_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, +}; + +static RE_UINT8 re_id_start_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, + 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +}; + +static RE_UINT8 re_id_start_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, + 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 31, 31, + 34, 35, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, + 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 1, 46, 47, 1, 48, 49, 50, 51, 52, 53, 54, 55, 31, 31, 31, + 56, 57, 58, 59, 60, 31, 31, 31, 61, 62, 31, 31, 31, 31, 63, 31, + 1, 1, 1, 64, 65, 31, 31, 31, 1, 1, 1, 1, 66, 31, 31, 31, + 1, 1, 67, 31, 31, 31, 31, 68, 69, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 70, 71, 72, 73, 31, 31, 31, 31, 31, 31, 74, 31, + 1, 1, 1, 1, 1, 1, 75, 1, 1, 1, 1, 1, 1, 1, 1, 76, + 77, 31, 31, 31, 31, 31, 31, 31, 1, 1, 77, 31, 31, 31, 31, 31, +}; + +static RE_UINT8 re_id_start_stage_4[] = { + 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 5, 6, 0, 0, 0, 7, 8, 9, 4, 10, + 4, 4, 4, 4, 11, 4, 4, 4, 4, 12, 13, 14, 15, 0, 16, 17, + 0, 4, 18, 19, 4, 4, 20, 21, 22, 23, 24, 4, 4, 25, 26, 27, + 28, 29, 30, 0, 0, 31, 0, 0, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 36, 45, 48, 49, 50, 51, 46, 0, + 52, 53, 54, 47, 52, 53, 55, 56, 52, 57, 58, 59, 60, 61, 62, 0, + 14, 63, 62, 0, 64, 65, 66, 0, 67, 0, 68, 69, 70, 0, 0, 0, + 4, 71, 72, 73, 74, 4, 75, 76, 4, 4, 77, 4, 78, 79, 80, 4, + 81, 4, 82, 0, 23, 4, 4, 83, 14, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 84, 1, 4, 4, 85, 86, 87, 87, 88, 4, 89, 90, 0, + 0, 4, 4, 91, 4, 92, 4, 93, 94, 0, 16, 95, 4, 96, 97, 0, + 98, 4, 83, 0, 0, 99, 0, 0, 100, 89, 101, 0, 102, 103, 4, 104, + 4, 105, 106, 107, 0, 0, 0, 108, 4, 4, 4, 4, 4, 4, 0, 0, + 109, 4, 110, 107, 4, 111, 112, 113, 0, 0, 0, 114, 115, 0, 0, 0, + 116, 117, 118, 4, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 120, 121, 4, 4, 4, 4, 122, 4, 75, 4, 123, 98, 124, 124, 0, + 125, 126, 14, 4, 127, 14, 4, 76, 100, 128, 4, 4, 129, 82, 0, 16, + 4, 4, 4, 4, 4, 93, 0, 0, 4, 4, 4, 4, 4, 4, 69, 0, + 4, 4, 4, 4, 69, 0, 16, 107, 130, 131, 4, 132, 91, 4, 4, 23, + 133, 134, 4, 4, 135, 18, 0, 136, 137, 138, 4, 89, 134, 89, 0, 139, + 26, 140, 62, 94, 32, 141, 142, 0, 4, 119, 143, 144, 4, 145, 146, 147, + 148, 149, 0, 0, 0, 0, 4, 138, 4, 4, 4, 4, 4, 150, 151, 152, + 4, 4, 4, 153, 4, 4, 154, 0, 155, 156, 157, 4, 4, 87, 158, 4, + 4, 107, 16, 4, 159, 4, 15, 160, 0, 0, 0, 161, 4, 4, 4, 94, + 0, 1, 1, 162, 4, 121, 163, 0, 164, 165, 166, 0, 4, 4, 4, 82, + 0, 0, 4, 83, 0, 0, 0, 0, 0, 0, 0, 0, 94, 4, 167, 0, + 121, 16, 18, 0, 107, 4, 168, 0, 4, 4, 4, 4, 107, 0, 0, 0, + 169, 170, 93, 0, 0, 0, 0, 0, 93, 154, 0, 0, 4, 171, 0, 0, + 172, 89, 0, 94, 0, 0, 0, 0, 4, 93, 93, 141, 0, 0, 0, 0, + 4, 4, 119, 0, 0, 0, 0, 0, 102, 91, 0, 0, 102, 23, 16, 119, + 102, 62, 0, 0, 102, 141, 173, 0, 0, 0, 0, 0, 4, 18, 0, 0, + 4, 4, 4, 129, 0, 0, 0, 0, 4, 4, 4, 138, 0, 0, 0, 0, + 4, 129, 0, 0, 0, 0, 0, 0, 4, 30, 0, 0, 0, 0, 0, 0, + 4, 4, 174, 0, 158, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 175, 4, 176, 177, 178, 4, 179, 180, 181, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 182, 183, 76, 175, 175, 120, 120, 184, 184, 143, 0, + 178, 185, 186, 187, 188, 189, 0, 0, 4, 4, 4, 4, 4, 4, 98, 0, + 4, 83, 4, 4, 4, 4, 4, 4, 107, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_id_start_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, + 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 0, 0, 223, 60, + 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, 3, 252, 255, 255, + 255, 0, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, 255, 0, 0, 0, + 0, 0, 255, 255, 255, 7, 7, 0, 255, 7, 0, 0, 0, 192, 254, 255, + 255, 255, 47, 0, 96, 192, 0, 156, 0, 0, 253, 255, 255, 255, 0, 0, + 0, 224, 255, 255, 63, 0, 2, 0, 0, 252, 255, 255, 255, 7, 48, 4, + 255, 255, 63, 4, 16, 1, 0, 0, 255, 255, 255, 1, 253, 31, 0, 0, + 240, 255, 255, 255, 255, 255, 255, 35, 0, 0, 1, 255, 3, 0, 254, 254, + 224, 159, 249, 255, 255, 253, 197, 35, 0, 64, 0, 176, 3, 0, 3, 0, + 224, 135, 249, 255, 255, 253, 109, 3, 0, 0, 0, 94, 0, 0, 28, 0, + 224, 191, 251, 255, 255, 253, 237, 35, 0, 0, 1, 0, 3, 0, 0, 0, + 0, 0, 0, 176, 3, 0, 2, 0, 232, 199, 61, 214, 24, 199, 255, 3, + 224, 223, 253, 255, 255, 253, 239, 35, 0, 0, 0, 3, 0, 0, 0, 64, + 3, 0, 6, 0, 255, 255, 255, 39, 0, 64, 0, 0, 3, 0, 0, 252, + 224, 255, 127, 252, 255, 255, 251, 47, 127, 0, 0, 0, 255, 255, 13, 0, + 150, 37, 240, 254, 174, 236, 13, 32, 95, 0, 0, 240, 1, 0, 0, 0, + 255, 254, 255, 255, 255, 31, 0, 0, 0, 31, 0, 0, 255, 7, 0, 128, + 0, 0, 63, 60, 98, 192, 225, 255, 3, 64, 0, 0, 191, 32, 255, 255, + 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, + 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 7, 255, 255, 31, 0, + 255, 159, 255, 255, 255, 199, 1, 0, 255, 223, 3, 0, 255, 255, 3, 0, + 255, 223, 1, 0, 255, 255, 15, 0, 0, 0, 128, 16, 255, 255, 255, 0, + 255, 5, 255, 255, 255, 255, 63, 0, 255, 255, 255, 31, 255, 63, 31, 0, + 255, 15, 0, 0, 254, 0, 0, 0, 255, 255, 127, 0, 128, 0, 0, 0, + 224, 255, 255, 255, 224, 15, 0, 0, 248, 255, 255, 255, 1, 192, 0, 252, + 63, 0, 0, 0, 15, 0, 0, 0, 0, 224, 0, 252, 255, 255, 255, 63, + 0, 222, 99, 0, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, + 220, 31, 207, 15, 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, + 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, + 255, 127, 255, 255, 255, 255, 255, 127, 31, 120, 12, 0, 255, 128, 0, 0, + 127, 127, 127, 127, 224, 0, 0, 0, 254, 3, 62, 31, 255, 255, 127, 248, + 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 255, 255, 0, 12, 0, 0, + 255, 127, 0, 128, 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, + 0, 0, 0, 255, 187, 247, 255, 255, 7, 0, 0, 0, 0, 0, 252, 8, + 63, 0, 255, 255, 255, 255, 7, 0, 0, 128, 0, 0, 247, 15, 0, 0, + 255, 255, 127, 4, 255, 255, 98, 62, 5, 0, 0, 56, 255, 7, 28, 0, + 126, 126, 126, 0, 127, 127, 0, 0, 15, 0, 255, 255, 127, 248, 255, 255, + 255, 255, 255, 15, 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 160, + 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, + 0, 0, 255, 15, 0, 0, 223, 255, 192, 255, 255, 255, 252, 252, 252, 28, + 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, 255, 255, 1, 0, + 15, 255, 62, 0, 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, + 1, 0, 239, 254, 30, 0, 0, 0, 31, 0, 1, 0, 255, 255, 223, 255, + 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, + 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, + 255, 253, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, + 255, 251, 255, 15, 238, 251, 255, 15, +}; + +/* ID_Start: 1753 bytes. */ + +RE_UINT32 re_get_id_start(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_id_start_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_id_start_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_id_start_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_id_start_stage_4[pos + f] << 5; + pos += code; + value = (re_id_start_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* ID_Continue. */ + +static RE_UINT8 re_id_continue_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, + 6, 6, +}; + +static RE_UINT8 re_id_continue_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, + 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 28, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +}; + +static RE_UINT8 re_id_continue_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, + 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 31, 31, + 34, 35, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, + 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 1, 46, 47, 1, 48, 49, 50, 51, 52, 53, 54, 55, 31, 31, 31, + 56, 57, 58, 59, 60, 31, 31, 31, 61, 62, 31, 31, 31, 31, 63, 31, + 1, 1, 1, 64, 65, 31, 31, 31, 1, 1, 1, 1, 66, 31, 31, 31, + 1, 1, 67, 31, 31, 31, 31, 68, 69, 31, 31, 31, 31, 31, 31, 31, + 31, 70, 71, 31, 72, 73, 74, 75, 31, 31, 31, 31, 31, 31, 76, 31, + 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, 1, 1, 1, 1, 78, + 79, 31, 31, 31, 31, 31, 31, 31, 1, 1, 79, 31, 31, 31, 31, 31, + 31, 80, 31, 31, 31, 31, 31, 31, +}; + +static RE_UINT8 re_id_continue_stage_4[] = { + 0, 1, 2, 3, 0, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 7, 8, 6, 6, 6, 9, 10, 11, 6, 12, + 6, 6, 6, 6, 13, 6, 6, 6, 6, 14, 15, 16, 14, 17, 18, 19, + 20, 6, 6, 21, 6, 6, 22, 23, 24, 6, 25, 6, 6, 26, 6, 27, + 6, 28, 29, 0, 0, 30, 0, 31, 6, 6, 6, 32, 33, 34, 35, 36, + 37, 38, 39, 40, 41, 42, 43, 44, 33, 42, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 44, 54, 55, 56, 57, 54, 58, 59, 60, 61, 62, 63, 64, + 16, 65, 66, 0, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 0, + 6, 6, 77, 6, 78, 6, 79, 80, 6, 6, 81, 6, 82, 83, 84, 6, + 85, 6, 58, 86, 87, 6, 6, 88, 16, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 89, 3, 6, 6, 90, 91, 88, 92, 93, 6, 6, 94, 95, + 96, 6, 6, 97, 6, 98, 6, 99, 75, 100, 101, 102, 6, 103, 104, 0, + 29, 6, 105, 106, 107, 108, 0, 0, 6, 6, 109, 110, 6, 6, 6, 92, + 6, 97, 111, 78, 0, 0, 112, 113, 6, 6, 6, 6, 6, 6, 6, 114, + 115, 6, 116, 78, 6, 117, 118, 119, 0, 120, 121, 122, 123, 0, 123, 124, + 125, 126, 127, 6, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 6, 129, 105, 6, 6, 6, 6, 130, 6, 79, 6, 131, 113, 132, 132, 6, + 133, 134, 16, 6, 135, 16, 6, 80, 136, 137, 6, 6, 138, 65, 0, 24, + 6, 6, 6, 6, 6, 99, 0, 0, 6, 6, 6, 6, 6, 6, 139, 0, + 6, 6, 6, 6, 139, 0, 24, 78, 140, 141, 6, 142, 143, 6, 6, 26, + 144, 145, 6, 6, 146, 147, 0, 148, 6, 149, 6, 92, 6, 6, 150, 151, + 6, 152, 92, 75, 6, 6, 153, 0, 6, 113, 154, 155, 6, 6, 156, 157, + 158, 159, 0, 0, 0, 0, 6, 160, 6, 6, 6, 6, 6, 161, 162, 29, + 6, 6, 6, 152, 6, 6, 163, 0, 164, 165, 166, 6, 6, 26, 167, 6, + 6, 78, 24, 6, 168, 6, 149, 169, 87, 170, 171, 172, 6, 6, 6, 75, + 1, 2, 3, 101, 6, 105, 173, 0, 174, 175, 176, 0, 6, 6, 6, 65, + 0, 0, 6, 88, 0, 0, 0, 177, 0, 0, 0, 0, 75, 6, 178, 0, + 105, 24, 147, 0, 78, 6, 179, 0, 6, 6, 6, 6, 78, 95, 0, 0, + 180, 181, 99, 0, 0, 0, 0, 0, 99, 163, 0, 0, 6, 182, 0, 0, + 183, 184, 0, 75, 0, 0, 0, 0, 6, 99, 99, 185, 0, 0, 0, 0, + 6, 6, 128, 0, 0, 0, 0, 0, 6, 6, 186, 50, 6, 65, 24, 187, + 6, 188, 0, 0, 6, 6, 150, 0, 0, 0, 0, 0, 6, 97, 95, 0, + 6, 6, 6, 138, 0, 0, 0, 0, 6, 6, 6, 189, 0, 0, 0, 0, + 6, 138, 0, 0, 0, 0, 0, 0, 6, 190, 0, 0, 0, 0, 0, 0, + 6, 6, 191, 105, 192, 0, 0, 0, 193, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 194, 195, 196, 0, 0, 0, 0, 197, 0, 0, 0, 0, 0, + 6, 6, 188, 6, 198, 199, 200, 6, 201, 202, 203, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 204, 205, 80, 188, 188, 129, 129, 206, 206, 207, 6, + 200, 208, 209, 210, 211, 212, 0, 0, 6, 6, 6, 6, 6, 6, 113, 0, + 6, 88, 6, 6, 6, 6, 6, 6, 78, 0, 0, 0, 0, 0, 0, 0, + 6, 6, 6, 6, 6, 6, 6, 87, +}; + +static RE_UINT8 re_id_continue_stage_5[] = { + 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7, + 0, 4, 160, 4, 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, + 31, 80, 0, 0, 255, 255, 223, 60, 192, 215, 255, 255, 251, 255, 255, 255, + 255, 255, 191, 255, 251, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 2, + 254, 255, 255, 255, 255, 255, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, + 0, 0, 255, 7, 255, 195, 255, 255, 255, 255, 239, 159, 255, 253, 255, 159, + 0, 0, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, 255, 255, 63, 4, + 255, 63, 0, 0, 255, 255, 255, 15, 253, 31, 0, 0, 240, 255, 255, 127, + 207, 255, 254, 254, 238, 159, 249, 255, 255, 253, 197, 243, 159, 121, 128, 176, + 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 211, 135, 57, 2, 94, + 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, 191, 59, 1, 0, + 207, 255, 0, 0, 159, 57, 192, 176, 207, 255, 2, 0, 236, 199, 61, 214, + 24, 199, 255, 195, 199, 61, 129, 0, 192, 255, 0, 0, 238, 223, 253, 255, + 255, 253, 239, 227, 223, 61, 96, 3, 236, 223, 253, 255, 255, 253, 239, 243, + 223, 61, 96, 64, 207, 255, 6, 0, 255, 255, 255, 231, 223, 125, 128, 0, + 207, 255, 0, 252, 236, 255, 127, 252, 255, 255, 251, 47, 127, 132, 95, 255, + 0, 0, 12, 0, 255, 255, 255, 7, 255, 127, 255, 3, 150, 37, 240, 254, + 174, 236, 255, 59, 95, 63, 255, 243, 1, 0, 0, 3, 255, 3, 160, 194, + 255, 254, 255, 255, 255, 31, 254, 255, 223, 255, 255, 254, 255, 255, 255, 31, + 64, 0, 0, 0, 255, 3, 255, 255, 255, 255, 255, 63, 191, 32, 255, 255, + 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, + 61, 255, 127, 255, 255, 255, 61, 255, 0, 254, 3, 0, 255, 255, 0, 0, + 255, 255, 31, 0, 255, 159, 255, 255, 255, 199, 1, 0, 255, 223, 31, 0, + 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 143, 48, 255, 3, 0, 0, + 0, 56, 255, 3, 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, + 255, 15, 255, 15, 192, 255, 255, 255, 255, 63, 31, 0, 255, 15, 255, 255, + 255, 3, 255, 7, 255, 255, 255, 127, 255, 255, 255, 159, 255, 3, 255, 3, + 128, 0, 0, 0, 255, 15, 255, 3, 0, 248, 15, 0, 255, 227, 255, 255, + 0, 0, 247, 255, 255, 255, 127, 0, 127, 0, 0, 240, 255, 255, 63, 63, + 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, + 0, 0, 0, 128, 1, 0, 16, 0, 0, 0, 2, 128, 0, 0, 255, 31, + 226, 255, 1, 0, 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, + 255, 1, 0, 0, 255, 127, 255, 255, 31, 248, 15, 0, 255, 128, 0, 128, + 127, 127, 127, 127, 224, 0, 0, 0, 254, 255, 62, 31, 255, 255, 127, 254, + 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 0, 0, + 255, 31, 255, 255, 255, 15, 0, 0, 255, 255, 240, 191, 255, 255, 255, 128, + 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, 255, 7, 0, 0, + 0, 0, 0, 255, 255, 0, 0, 0, 31, 0, 255, 3, 255, 255, 255, 8, + 255, 63, 255, 255, 1, 128, 255, 3, 255, 63, 255, 3, 255, 255, 127, 12, + 7, 0, 0, 56, 255, 255, 124, 0, 126, 126, 126, 0, 127, 127, 0, 0, + 255, 55, 255, 3, 15, 0, 255, 255, 127, 248, 255, 255, 255, 255, 255, 3, + 127, 0, 248, 224, 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, + 255, 255, 252, 255, 0, 0, 255, 15, 127, 0, 24, 0, 0, 224, 0, 0, + 0, 0, 223, 255, 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, + 255, 63, 255, 63, 0, 0, 0, 32, 255, 255, 1, 0, 15, 255, 62, 0, + 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, 111, 240, 239, 254, + 255, 255, 15, 135, 255, 255, 7, 0, 127, 0, 0, 0, 255, 1, 255, 3, + 255, 255, 223, 255, 7, 0, 0, 0, 255, 255, 255, 1, 31, 0, 255, 255, + 0, 128, 255, 255, 3, 0, 0, 0, 224, 227, 7, 248, 231, 15, 0, 0, + 0, 60, 0, 0, 28, 0, 0, 0, 255, 255, 255, 223, 100, 222, 255, 235, + 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, + 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, 247, 207, 255, 255, + 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, + 238, 251, 255, 15, +}; + +/* ID_Continue: 1894 bytes. */ + +RE_UINT32 re_get_id_continue(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_id_continue_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_id_continue_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_id_continue_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_id_continue_stage_4[pos + f] << 5; + pos += code; + value = (re_id_continue_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* XID_Start. */ + +static RE_UINT8 re_xid_start_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, +}; + +static RE_UINT8 re_xid_start_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, + 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +}; + +static RE_UINT8 re_xid_start_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, + 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 31, 31, + 34, 35, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, + 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 1, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 31, 31, 31, + 57, 58, 59, 60, 61, 31, 31, 31, 62, 63, 31, 31, 31, 31, 64, 31, + 1, 1, 1, 65, 66, 31, 31, 31, 1, 1, 1, 1, 67, 31, 31, 31, + 1, 1, 68, 31, 31, 31, 31, 69, 70, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 71, 72, 73, 74, 31, 31, 31, 31, 31, 31, 75, 31, + 1, 1, 1, 1, 1, 1, 76, 1, 1, 1, 1, 1, 1, 1, 1, 77, + 78, 31, 31, 31, 31, 31, 31, 31, 1, 1, 78, 31, 31, 31, 31, 31, +}; + +static RE_UINT8 re_xid_start_stage_4[] = { + 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 5, 6, 0, 0, 0, 7, 8, 9, 4, 10, + 4, 4, 4, 4, 11, 4, 4, 4, 4, 12, 13, 14, 15, 0, 16, 17, + 0, 4, 18, 19, 4, 4, 20, 21, 22, 23, 24, 4, 4, 25, 26, 27, + 28, 29, 30, 0, 0, 31, 0, 0, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 36, 45, 48, 49, 50, 51, 46, 0, + 52, 53, 54, 47, 52, 53, 55, 56, 52, 57, 58, 59, 60, 61, 62, 0, + 14, 63, 62, 0, 64, 65, 66, 0, 67, 0, 68, 69, 70, 0, 0, 0, + 4, 71, 72, 73, 74, 4, 75, 76, 4, 4, 77, 4, 78, 79, 80, 4, + 81, 4, 82, 0, 23, 4, 4, 83, 14, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 84, 1, 4, 4, 85, 86, 87, 87, 88, 4, 89, 90, 0, + 0, 4, 4, 91, 4, 92, 4, 93, 94, 0, 16, 95, 4, 96, 97, 0, + 98, 4, 83, 0, 0, 99, 0, 0, 100, 89, 101, 0, 102, 103, 4, 104, + 4, 105, 106, 107, 0, 0, 0, 108, 4, 4, 4, 4, 4, 4, 0, 0, + 109, 4, 110, 107, 4, 111, 112, 113, 0, 0, 0, 114, 115, 0, 0, 0, + 116, 117, 118, 4, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 120, 121, 4, 4, 4, 4, 122, 4, 75, 4, 123, 98, 124, 124, 0, + 125, 126, 14, 4, 127, 14, 4, 76, 100, 128, 4, 4, 129, 82, 0, 16, + 4, 4, 4, 4, 4, 93, 0, 0, 4, 4, 4, 4, 4, 4, 69, 0, + 4, 4, 4, 4, 69, 0, 16, 107, 130, 131, 4, 132, 91, 4, 4, 23, + 133, 134, 4, 4, 135, 18, 0, 136, 137, 138, 4, 89, 134, 89, 0, 139, + 26, 140, 62, 94, 32, 141, 142, 0, 4, 119, 143, 144, 4, 145, 146, 147, + 148, 149, 0, 0, 0, 0, 4, 138, 4, 4, 4, 4, 4, 150, 151, 152, + 4, 4, 4, 153, 4, 4, 154, 0, 155, 156, 157, 4, 4, 87, 158, 4, + 4, 4, 107, 32, 4, 4, 4, 4, 4, 107, 16, 4, 159, 4, 15, 160, + 0, 0, 0, 161, 4, 4, 4, 94, 0, 1, 1, 162, 107, 121, 163, 0, + 164, 165, 166, 0, 4, 4, 4, 82, 0, 0, 4, 83, 0, 0, 0, 0, + 0, 0, 0, 0, 94, 4, 167, 0, 121, 16, 18, 0, 107, 4, 168, 0, + 4, 4, 4, 4, 107, 0, 0, 0, 169, 170, 93, 0, 0, 0, 0, 0, + 93, 154, 0, 0, 4, 171, 0, 0, 172, 89, 0, 94, 0, 0, 0, 0, + 4, 93, 93, 141, 0, 0, 0, 0, 4, 4, 119, 0, 0, 0, 0, 0, + 102, 91, 0, 0, 102, 23, 16, 119, 102, 62, 0, 0, 102, 141, 173, 0, + 0, 0, 0, 0, 4, 18, 0, 0, 4, 4, 4, 129, 0, 0, 0, 0, + 4, 4, 4, 138, 0, 0, 0, 0, 4, 129, 0, 0, 0, 0, 0, 0, + 4, 30, 0, 0, 0, 0, 0, 0, 4, 4, 174, 0, 158, 0, 0, 0, + 47, 0, 0, 0, 0, 0, 0, 0, 4, 4, 175, 4, 176, 177, 178, 4, + 179, 180, 181, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 182, 183, 76, + 175, 175, 120, 120, 184, 184, 143, 0, 178, 185, 186, 187, 188, 189, 0, 0, + 4, 4, 4, 4, 4, 4, 98, 0, 4, 83, 4, 4, 4, 4, 4, 4, + 107, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_xid_start_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, + 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 0, 0, 223, 56, + 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, 3, 252, 255, 255, + 255, 0, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, 255, 0, 0, 0, + 0, 0, 255, 255, 255, 7, 7, 0, 255, 7, 0, 0, 0, 192, 254, 255, + 255, 255, 47, 0, 96, 192, 0, 156, 0, 0, 253, 255, 255, 255, 0, 0, + 0, 224, 255, 255, 63, 0, 2, 0, 0, 252, 255, 255, 255, 7, 48, 4, + 255, 255, 63, 4, 16, 1, 0, 0, 255, 255, 255, 1, 253, 31, 0, 0, + 240, 255, 255, 255, 255, 255, 255, 35, 0, 0, 1, 255, 3, 0, 254, 254, + 224, 159, 249, 255, 255, 253, 197, 35, 0, 64, 0, 176, 3, 0, 3, 0, + 224, 135, 249, 255, 255, 253, 109, 3, 0, 0, 0, 94, 0, 0, 28, 0, + 224, 191, 251, 255, 255, 253, 237, 35, 0, 0, 1, 0, 3, 0, 0, 0, + 0, 0, 0, 176, 3, 0, 2, 0, 232, 199, 61, 214, 24, 199, 255, 3, + 224, 223, 253, 255, 255, 253, 239, 35, 0, 0, 0, 3, 0, 0, 0, 64, + 3, 0, 6, 0, 255, 255, 255, 39, 0, 64, 0, 0, 3, 0, 0, 252, + 224, 255, 127, 252, 255, 255, 251, 47, 127, 0, 0, 0, 255, 255, 5, 0, + 150, 37, 240, 254, 174, 236, 5, 32, 95, 0, 0, 240, 1, 0, 0, 0, + 255, 254, 255, 255, 255, 31, 0, 0, 0, 31, 0, 0, 255, 7, 0, 128, + 0, 0, 63, 60, 98, 192, 225, 255, 3, 64, 0, 0, 191, 32, 255, 255, + 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, + 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 7, 255, 255, 31, 0, + 255, 159, 255, 255, 255, 199, 1, 0, 255, 223, 3, 0, 255, 255, 3, 0, + 255, 223, 1, 0, 255, 255, 15, 0, 0, 0, 128, 16, 255, 255, 255, 0, + 255, 5, 255, 255, 255, 255, 63, 0, 255, 255, 255, 31, 255, 63, 31, 0, + 255, 15, 0, 0, 254, 0, 0, 0, 255, 255, 127, 0, 128, 0, 0, 0, + 224, 255, 255, 255, 224, 15, 0, 0, 248, 255, 255, 255, 1, 192, 0, 252, + 63, 0, 0, 0, 15, 0, 0, 0, 0, 224, 0, 252, 255, 255, 255, 63, + 0, 222, 99, 0, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, + 220, 31, 207, 15, 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, + 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, + 255, 127, 255, 255, 255, 255, 255, 127, 31, 120, 12, 0, 255, 128, 0, 0, + 127, 127, 127, 127, 224, 0, 0, 0, 254, 3, 62, 31, 255, 255, 127, 224, + 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 255, 255, 0, 12, 0, 0, + 255, 127, 0, 128, 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, + 0, 0, 0, 255, 187, 247, 255, 255, 7, 0, 0, 0, 0, 0, 252, 8, + 63, 0, 255, 255, 255, 255, 7, 0, 0, 128, 0, 0, 247, 15, 0, 0, + 255, 255, 127, 4, 255, 255, 98, 62, 5, 0, 0, 56, 255, 7, 28, 0, + 126, 126, 126, 0, 127, 127, 0, 0, 15, 0, 255, 255, 127, 248, 255, 255, + 255, 255, 255, 15, 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 160, + 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, + 0, 0, 255, 3, 0, 0, 138, 170, 192, 255, 255, 255, 252, 252, 252, 28, + 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, 255, 255, 1, 0, + 15, 255, 62, 0, 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, + 1, 0, 239, 254, 30, 0, 0, 0, 31, 0, 1, 0, 255, 255, 223, 255, + 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, + 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, + 255, 253, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, + 255, 251, 255, 15, 238, 251, 255, 15, +}; + +/* XID_Start: 1761 bytes. */ + +RE_UINT32 re_get_xid_start(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_xid_start_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_xid_start_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_xid_start_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_xid_start_stage_4[pos + f] << 5; + pos += code; + value = (re_xid_start_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* XID_Continue. */ + +static RE_UINT8 re_xid_continue_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, + 6, 6, +}; + +static RE_UINT8 re_xid_continue_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, + 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 28, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +}; + +static RE_UINT8 re_xid_continue_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, + 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 31, 31, + 34, 35, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, + 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 1, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 31, 31, 31, + 57, 58, 59, 60, 61, 31, 31, 31, 62, 63, 31, 31, 31, 31, 64, 31, + 1, 1, 1, 65, 66, 31, 31, 31, 1, 1, 1, 1, 67, 31, 31, 31, + 1, 1, 68, 31, 31, 31, 31, 69, 70, 31, 31, 31, 31, 31, 31, 31, + 31, 71, 72, 31, 73, 74, 75, 76, 31, 31, 31, 31, 31, 31, 77, 31, + 1, 1, 1, 1, 1, 1, 78, 1, 1, 1, 1, 1, 1, 1, 1, 79, + 80, 31, 31, 31, 31, 31, 31, 31, 1, 1, 80, 31, 31, 31, 31, 31, + 31, 81, 31, 31, 31, 31, 31, 31, +}; + +static RE_UINT8 re_xid_continue_stage_4[] = { + 0, 1, 2, 3, 0, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 7, 8, 6, 6, 6, 9, 10, 11, 6, 12, + 6, 6, 6, 6, 13, 6, 6, 6, 6, 14, 15, 16, 14, 17, 18, 19, + 20, 6, 6, 21, 6, 6, 22, 23, 24, 6, 25, 6, 6, 26, 6, 27, + 6, 28, 29, 0, 0, 30, 0, 31, 6, 6, 6, 32, 33, 34, 35, 36, + 37, 38, 39, 40, 41, 42, 43, 44, 33, 42, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 44, 54, 55, 56, 57, 54, 58, 59, 60, 61, 62, 63, 64, + 16, 65, 66, 0, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 0, + 6, 6, 77, 6, 78, 6, 79, 80, 6, 6, 81, 6, 82, 83, 84, 6, + 85, 6, 58, 86, 87, 6, 6, 88, 16, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 89, 3, 6, 6, 90, 91, 88, 92, 93, 6, 6, 94, 95, + 96, 6, 6, 97, 6, 98, 6, 99, 75, 100, 101, 102, 6, 103, 104, 0, + 29, 6, 105, 106, 107, 108, 0, 0, 6, 6, 109, 110, 6, 6, 6, 92, + 6, 97, 111, 78, 0, 0, 112, 113, 6, 6, 6, 6, 6, 6, 6, 114, + 115, 6, 116, 78, 6, 117, 118, 119, 0, 120, 121, 122, 123, 0, 123, 124, + 125, 126, 127, 6, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 6, 129, 105, 6, 6, 6, 6, 130, 6, 79, 6, 131, 113, 132, 132, 6, + 133, 134, 16, 6, 135, 16, 6, 80, 136, 137, 6, 6, 138, 65, 0, 24, + 6, 6, 6, 6, 6, 99, 0, 0, 6, 6, 6, 6, 6, 6, 139, 0, + 6, 6, 6, 6, 139, 0, 24, 78, 140, 141, 6, 142, 143, 6, 6, 26, + 144, 145, 6, 6, 146, 147, 0, 148, 6, 149, 6, 92, 6, 6, 150, 151, + 6, 152, 92, 75, 6, 6, 153, 0, 6, 113, 154, 155, 6, 6, 156, 157, + 158, 159, 0, 0, 0, 0, 6, 160, 6, 6, 6, 6, 6, 161, 162, 29, + 6, 6, 6, 152, 6, 6, 163, 0, 164, 165, 166, 6, 6, 26, 167, 6, + 6, 6, 78, 168, 6, 6, 6, 6, 6, 78, 24, 6, 169, 6, 149, 1, + 87, 170, 171, 172, 6, 6, 6, 75, 1, 2, 3, 101, 6, 105, 173, 0, + 174, 175, 176, 0, 6, 6, 6, 65, 0, 0, 6, 88, 0, 0, 0, 177, + 0, 0, 0, 0, 75, 6, 178, 0, 105, 24, 147, 0, 78, 6, 179, 0, + 6, 6, 6, 6, 78, 95, 0, 0, 180, 181, 99, 0, 0, 0, 0, 0, + 99, 163, 0, 0, 6, 182, 0, 0, 183, 184, 0, 75, 0, 0, 0, 0, + 6, 99, 99, 185, 0, 0, 0, 0, 6, 6, 128, 0, 0, 0, 0, 0, + 6, 6, 186, 50, 6, 65, 24, 187, 6, 188, 0, 0, 6, 6, 150, 0, + 0, 0, 0, 0, 6, 97, 95, 0, 6, 6, 6, 138, 0, 0, 0, 0, + 6, 6, 6, 189, 0, 0, 0, 0, 6, 138, 0, 0, 0, 0, 0, 0, + 6, 190, 0, 0, 0, 0, 0, 0, 6, 6, 191, 105, 192, 0, 0, 0, + 193, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 194, 195, 196, 0, 0, + 0, 0, 197, 0, 0, 0, 0, 0, 6, 6, 188, 6, 198, 199, 200, 6, + 201, 202, 203, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 204, 205, 80, + 188, 188, 129, 129, 206, 206, 207, 6, 200, 208, 209, 210, 211, 212, 0, 0, + 6, 6, 6, 6, 6, 6, 113, 0, 6, 88, 6, 6, 6, 6, 6, 6, + 78, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 87, +}; + +static RE_UINT8 re_xid_continue_stage_5[] = { + 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7, + 0, 4, 160, 4, 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, + 31, 80, 0, 0, 255, 255, 223, 56, 192, 215, 255, 255, 251, 255, 255, 255, + 255, 255, 191, 255, 251, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 2, + 254, 255, 255, 255, 255, 255, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, + 0, 0, 255, 7, 255, 195, 255, 255, 255, 255, 239, 159, 255, 253, 255, 159, + 0, 0, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, 255, 255, 63, 4, + 255, 63, 0, 0, 255, 255, 255, 15, 253, 31, 0, 0, 240, 255, 255, 127, + 207, 255, 254, 254, 238, 159, 249, 255, 255, 253, 197, 243, 159, 121, 128, 176, + 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 211, 135, 57, 2, 94, + 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, 191, 59, 1, 0, + 207, 255, 0, 0, 159, 57, 192, 176, 207, 255, 2, 0, 236, 199, 61, 214, + 24, 199, 255, 195, 199, 61, 129, 0, 192, 255, 0, 0, 238, 223, 253, 255, + 255, 253, 239, 227, 223, 61, 96, 3, 236, 223, 253, 255, 255, 253, 239, 243, + 223, 61, 96, 64, 207, 255, 6, 0, 255, 255, 255, 231, 223, 125, 128, 0, + 207, 255, 0, 252, 236, 255, 127, 252, 255, 255, 251, 47, 127, 132, 95, 255, + 0, 0, 12, 0, 255, 255, 255, 7, 255, 127, 255, 3, 150, 37, 240, 254, + 174, 236, 255, 59, 95, 63, 255, 243, 1, 0, 0, 3, 255, 3, 160, 194, + 255, 254, 255, 255, 255, 31, 254, 255, 223, 255, 255, 254, 255, 255, 255, 31, + 64, 0, 0, 0, 255, 3, 255, 255, 255, 255, 255, 63, 191, 32, 255, 255, + 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, + 61, 255, 127, 255, 255, 255, 61, 255, 0, 254, 3, 0, 255, 255, 0, 0, + 255, 255, 31, 0, 255, 159, 255, 255, 255, 199, 1, 0, 255, 223, 31, 0, + 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 143, 48, 255, 3, 0, 0, + 0, 56, 255, 3, 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, + 255, 15, 255, 15, 192, 255, 255, 255, 255, 63, 31, 0, 255, 15, 255, 255, + 255, 3, 255, 7, 255, 255, 255, 127, 255, 255, 255, 159, 255, 3, 255, 3, + 128, 0, 0, 0, 255, 15, 255, 3, 0, 248, 15, 0, 255, 227, 255, 255, + 0, 0, 247, 255, 255, 255, 127, 0, 127, 0, 0, 240, 255, 255, 63, 63, + 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, + 0, 0, 0, 128, 1, 0, 16, 0, 0, 0, 2, 128, 0, 0, 255, 31, + 226, 255, 1, 0, 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, + 255, 1, 0, 0, 255, 127, 255, 255, 31, 248, 15, 0, 255, 128, 0, 128, + 127, 127, 127, 127, 224, 0, 0, 0, 254, 255, 62, 31, 255, 255, 127, 230, + 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 0, 0, + 255, 31, 255, 255, 255, 15, 0, 0, 255, 255, 240, 191, 255, 255, 255, 128, + 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, 255, 7, 0, 0, + 0, 0, 0, 255, 255, 0, 0, 0, 31, 0, 255, 3, 255, 255, 255, 8, + 255, 63, 255, 255, 1, 128, 255, 3, 255, 63, 255, 3, 255, 255, 127, 12, + 7, 0, 0, 56, 255, 255, 124, 0, 126, 126, 126, 0, 127, 127, 0, 0, + 255, 55, 255, 3, 15, 0, 255, 255, 127, 248, 255, 255, 255, 255, 255, 3, + 127, 0, 248, 224, 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, + 240, 255, 255, 255, 255, 255, 252, 255, 127, 0, 24, 0, 0, 224, 0, 0, + 0, 0, 138, 170, 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, + 255, 63, 255, 63, 0, 0, 0, 32, 255, 255, 1, 0, 15, 255, 62, 0, + 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, 111, 240, 239, 254, + 255, 255, 15, 135, 255, 255, 7, 0, 127, 0, 0, 0, 255, 1, 255, 3, + 255, 255, 223, 255, 7, 0, 0, 0, 255, 255, 255, 1, 31, 0, 255, 255, + 0, 128, 255, 255, 3, 0, 0, 0, 224, 227, 7, 248, 231, 15, 0, 0, + 0, 60, 0, 0, 28, 0, 0, 0, 255, 255, 255, 223, 100, 222, 255, 235, + 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, + 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, 247, 207, 255, 255, + 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, + 238, 251, 255, 15, +}; + +/* XID_Continue: 1902 bytes. */ + +RE_UINT32 re_get_xid_continue(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_xid_continue_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_xid_continue_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_xid_continue_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_xid_continue_stage_4[pos + f] << 5; + pos += code; + value = (re_xid_continue_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Default_Ignorable_Code_Point. */ + +static RE_UINT8 re_default_ignorable_code_point_stage_1[] = { + 0, 1, 1, 2, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, +}; + +static RE_UINT8 re_default_ignorable_code_point_stage_2[] = { + 0, 1, 2, 3, 4, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 7, 1, 1, 1, 1, 1, + 8, 8, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_default_ignorable_code_point_stage_3[] = { + 0, 1, 1, 2, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 4, 1, 1, 1, 1, 1, 5, 6, 1, 1, 1, 1, 1, 1, 1, + 7, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 9, 10, 1, 11, 1, 1, 1, 1, 1, 1, + 12, 12, 12, 12, 12, 12, 12, 12, +}; + +static RE_UINT8 re_default_ignorable_code_point_stage_4[] = { + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, + 7, 0, 0, 0, 0, 0, 0, 0, 8, 9, 0, 10, 0, 0, 0, 0, + 0, 0, 0, 11, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 4, + 0, 0, 0, 0, 0, 5, 0, 12, 0, 0, 0, 13, 0, 0, 0, 0, + 14, 14, 14, 14, 14, 14, 14, 14, +}; + +static RE_UINT8 re_default_ignorable_code_point_stage_5[] = { + 0, 0, 0, 0, 0, 32, 0, 0, 0, 128, 0, 0, 0, 0, 0, 16, + 0, 0, 0, 128, 1, 0, 0, 0, 0, 0, 48, 0, 0, 120, 0, 0, + 0, 248, 0, 0, 0, 124, 0, 0, 255, 255, 0, 0, 16, 0, 0, 0, + 0, 0, 255, 1, 0, 0, 248, 7, 255, 255, 255, 255, +}; + +/* Default_Ignorable_Code_Point: 344 bytes. */ + +RE_UINT32 re_get_default_ignorable_code_point(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 14; + code = ch ^ (f << 14); + pos = (RE_UINT32)re_default_ignorable_code_point_stage_1[f] << 3; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_default_ignorable_code_point_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_default_ignorable_code_point_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_default_ignorable_code_point_stage_4[pos + f] << 5; + pos += code; + value = (re_default_ignorable_code_point_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Grapheme_Extend. */ + +static RE_UINT8 re_grapheme_extend_stage_1[] = { + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, + 4, 4, +}; + +static RE_UINT8 re_grapheme_extend_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 8, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, + 11, 12, 13, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 14, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 15, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 16, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +}; + +static RE_UINT8 re_grapheme_extend_stage_3[] = { + 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 0, 0, 15, 0, 0, 0, 16, 17, 18, 19, 20, 21, 22, 0, 0, + 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 25, 0, 0, + 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 27, 0, 28, 29, 30, 31, 0, 0, 0, 0, + 0, 0, 0, 32, 0, 0, 33, 34, 0, 35, 0, 0, 0, 0, 0, 0, + 0, 0, 36, 0, 0, 0, 0, 0, 37, 38, 0, 0, 0, 0, 39, 0, + 0, 0, 0, 0, 0, 0, 0, 40, 0, 41, 42, 0, 0, 0, 0, 0, + 0, 43, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_grapheme_extend_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 0, + 7, 0, 8, 9, 0, 0, 10, 11, 12, 13, 14, 0, 0, 15, 0, 16, + 17, 18, 19, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 24, + 28, 29, 30, 31, 28, 29, 32, 24, 25, 33, 34, 24, 35, 36, 37, 0, + 0, 38, 39, 24, 0, 40, 41, 24, 0, 36, 27, 24, 0, 0, 42, 0, + 0, 43, 44, 0, 0, 45, 46, 0, 47, 48, 0, 49, 50, 51, 52, 0, + 0, 53, 54, 55, 56, 0, 0, 0, 0, 0, 57, 0, 0, 0, 0, 0, + 58, 58, 59, 59, 0, 60, 61, 0, 62, 0, 0, 0, 0, 63, 0, 0, + 0, 64, 0, 0, 0, 0, 0, 0, 65, 0, 66, 67, 0, 0, 0, 0, + 68, 69, 35, 16, 70, 71, 0, 72, 0, 73, 0, 0, 0, 0, 74, 75, + 0, 0, 0, 0, 0, 0, 1, 76, 77, 0, 0, 0, 0, 0, 13, 78, + 0, 0, 0, 0, 0, 0, 0, 79, 0, 0, 0, 80, 0, 0, 0, 1, + 0, 81, 0, 0, 82, 0, 0, 0, 0, 0, 0, 83, 80, 0, 0, 84, + 85, 86, 0, 0, 0, 0, 87, 88, 0, 89, 90, 0, 21, 91, 0, 0, + 0, 92, 93, 0, 0, 94, 25, 95, 0, 0, 0, 0, 0, 0, 0, 96, + 36, 0, 0, 0, 0, 0, 0, 0, 2, 97, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 98, + 99, 100, 0, 0, 0, 0, 0, 0, 25, 101, 97, 0, 70, 102, 0, 0, + 21, 103, 0, 0, 70, 104, 0, 0, 0, 0, 0, 0, 0, 105, 0, 0, + 0, 0, 0, 0, 106, 0, 0, 0, 0, 0, 0, 107, 108, 109, 0, 0, + 0, 0, 110, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, +}; + +static RE_UINT8 re_grapheme_extend_stage_5[] = { + 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 0, 0, 248, 3, 0, 0, + 0, 0, 254, 255, 255, 255, 255, 191, 182, 0, 0, 0, 0, 0, 255, 7, + 0, 248, 255, 255, 0, 0, 1, 0, 0, 0, 192, 159, 159, 61, 0, 0, + 0, 0, 2, 0, 0, 0, 255, 255, 255, 7, 0, 0, 192, 255, 1, 0, + 0, 248, 15, 0, 0, 0, 192, 251, 239, 62, 0, 0, 0, 0, 0, 14, + 240, 255, 255, 127, 7, 0, 0, 0, 0, 0, 0, 20, 254, 33, 254, 0, + 12, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 80, 30, 32, 128, 0, + 6, 0, 0, 0, 0, 0, 0, 16, 134, 57, 2, 0, 0, 0, 35, 0, + 190, 33, 0, 0, 0, 0, 0, 208, 30, 32, 192, 0, 4, 0, 0, 0, + 0, 0, 0, 64, 1, 32, 128, 0, 0, 0, 0, 192, 193, 61, 96, 0, + 0, 0, 0, 144, 68, 48, 96, 0, 0, 132, 92, 128, 0, 0, 242, 7, + 128, 127, 0, 0, 0, 0, 242, 27, 0, 63, 0, 0, 0, 0, 0, 3, + 0, 0, 160, 2, 0, 0, 254, 127, 223, 224, 255, 254, 255, 255, 255, 31, + 64, 0, 0, 0, 0, 224, 253, 102, 0, 0, 0, 195, 1, 0, 30, 0, + 100, 32, 0, 32, 0, 0, 0, 224, 0, 0, 28, 0, 0, 0, 12, 0, + 0, 0, 176, 63, 64, 254, 15, 32, 0, 56, 0, 0, 0, 2, 0, 0, + 135, 1, 4, 14, 0, 0, 128, 9, 0, 0, 64, 127, 229, 31, 248, 159, + 15, 0, 0, 0, 0, 0, 208, 23, 3, 0, 0, 0, 60, 11, 0, 0, + 64, 163, 3, 0, 0, 240, 207, 0, 0, 0, 247, 255, 253, 33, 16, 0, + 127, 0, 0, 240, 0, 48, 0, 0, 255, 255, 1, 0, 0, 128, 3, 0, + 0, 0, 0, 128, 0, 252, 0, 0, 0, 0, 0, 6, 0, 128, 247, 63, + 0, 0, 3, 0, 68, 8, 0, 0, 96, 0, 0, 0, 16, 0, 0, 0, + 255, 255, 3, 0, 192, 63, 0, 0, 128, 255, 3, 0, 0, 0, 200, 19, + 0, 126, 102, 0, 8, 16, 0, 0, 0, 0, 157, 193, 0, 48, 64, 0, + 32, 33, 0, 0, 127, 0, 0, 0, 0, 0, 0, 32, 110, 240, 0, 0, + 0, 0, 0, 135, 0, 0, 0, 255, 0, 0, 120, 6, 128, 239, 31, 0, + 0, 0, 192, 127, 0, 40, 191, 0, 0, 128, 7, 0, 160, 195, 7, 248, + 231, 15, 0, 0, 0, 60, 0, 0, 28, 0, 0, 0, +}; + +/* Grapheme_Extend: 1062 bytes. */ + +RE_UINT32 re_get_grapheme_extend(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_grapheme_extend_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_grapheme_extend_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_grapheme_extend_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_grapheme_extend_stage_4[pos + f] << 5; + pos += code; + value = (re_grapheme_extend_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Grapheme_Base. */ + +static RE_UINT8 re_grapheme_base_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, +}; + +static RE_UINT8 re_grapheme_base_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, + 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 24, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 25, 7, 26, 27, 13, 13, 13, 13, 13, 13, 13, 28, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +}; + +static RE_UINT8 re_grapheme_base_stage_3[] = { + 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 1, 16, 17, 1, 1, 18, 19, 20, 21, 22, 23, 24, 25, 1, 26, + 27, 28, 1, 29, 30, 1, 1, 31, 1, 1, 1, 32, 33, 34, 35, 36, + 37, 38, 39, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41, + 1, 1, 1, 1, 42, 1, 43, 44, 45, 46, 47, 48, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 49, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 1, 51, 52, 1, 53, 54, 55, 56, 57, 58, 59, 60, 50, 50, 50, + 61, 62, 63, 64, 65, 50, 66, 50, 67, 68, 50, 50, 50, 50, 69, 50, + 1, 1, 1, 70, 71, 50, 50, 50, 1, 1, 1, 1, 72, 50, 50, 50, + 1, 1, 73, 50, 50, 50, 50, 74, 75, 50, 50, 50, 50, 50, 50, 50, + 76, 77, 78, 79, 80, 81, 82, 83, 50, 50, 50, 50, 50, 50, 84, 50, + 85, 86, 87, 88, 89, 90, 91, 92, 1, 1, 1, 1, 1, 1, 93, 1, + 1, 1, 1, 1, 1, 1, 1, 94, 95, 50, 50, 50, 50, 50, 50, 50, + 1, 1, 95, 50, 50, 50, 50, 50, +}; + +static RE_UINT8 re_grapheme_base_stage_4[] = { + 0, 1, 1, 2, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 4, 5, 6, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, + 1, 8, 9, 10, 11, 12, 13, 14, 15, 1, 16, 17, 1, 1, 18, 19, + 20, 21, 22, 1, 1, 23, 1, 24, 25, 26, 27, 0, 0, 28, 0, 0, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 33, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 56, 60, 61, 62, 63, 64, 65, 66, 10, 67, 68, 0, 69, 70, 71, 0, + 72, 73, 74, 75, 76, 77, 78, 0, 1, 79, 80, 81, 82, 1, 83, 1, + 1, 1, 84, 1, 85, 86, 87, 1, 88, 1, 89, 90, 91, 1, 1, 92, + 1, 1, 1, 1, 90, 1, 1, 93, 94, 95, 96, 97, 1, 98, 99, 100, + 101, 1, 1, 102, 1, 103, 1, 104, 90, 105, 106, 107, 1, 108, 109, 1, + 110, 1, 111, 112, 100, 113, 0, 0, 114, 115, 116, 117, 118, 119, 1, 120, + 1, 121, 122, 1, 0, 0, 123, 124, 1, 1, 1, 1, 1, 1, 0, 0, + 125, 1, 126, 127, 1, 128, 129, 130, 131, 132, 1, 133, 134, 89, 0, 0, + 1, 1, 1, 1, 135, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 136, + 1, 137, 16, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 138, 0, 0, 0, 0, 0, 1, 139, 2, 1, 1, 1, 1, 140, + 1, 83, 1, 141, 142, 143, 143, 0, 1, 144, 0, 0, 145, 1, 1, 136, + 1, 1, 1, 1, 1, 1, 104, 146, 1, 135, 10, 1, 147, 1, 1, 1, + 148, 149, 1, 1, 139, 89, 1, 150, 2, 1, 1, 1, 1, 1, 1, 2, + 1, 1, 1, 1, 1, 104, 1, 1, 1, 1, 1, 1, 1, 1, 151, 0, + 1, 1, 1, 1, 152, 1, 153, 1, 1, 154, 1, 155, 102, 1, 1, 156, + 1, 1, 1, 1, 157, 16, 0, 158, 159, 160, 1, 102, 1, 1, 161, 162, + 1, 163, 164, 90, 29, 165, 166, 0, 1, 167, 168, 144, 1, 169, 170, 171, + 172, 173, 0, 0, 0, 0, 1, 174, 1, 1, 1, 1, 1, 150, 175, 144, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 176, 1, 1, 91, 0, + 177, 178, 179, 1, 1, 1, 180, 1, 1, 1, 181, 1, 182, 1, 183, 184, + 185, 181, 186, 187, 1, 1, 1, 90, 10, 1, 1, 1, 127, 2, 188, 189, + 190, 191, 192, 0, 1, 1, 1, 89, 193, 194, 1, 1, 195, 0, 181, 90, + 0, 0, 0, 0, 90, 1, 93, 0, 2, 150, 16, 0, 196, 1, 197, 0, + 1, 1, 1, 1, 127, 198, 0, 0, 199, 200, 201, 0, 0, 0, 0, 0, + 202, 203, 0, 0, 1, 204, 0, 0, 205, 136, 206, 1, 0, 0, 0, 0, + 1, 207, 208, 209, 0, 0, 0, 0, 1, 1, 210, 0, 0, 0, 0, 0, + 0, 0, 0, 2, 0, 0, 0, 0, 211, 102, 212, 21, 118, 213, 214, 215, + 29, 216, 217, 0, 118, 218, 215, 0, 0, 0, 0, 0, 1, 219, 198, 0, + 1, 1, 1, 220, 0, 0, 0, 0, 1, 1, 1, 221, 0, 0, 0, 0, + 1, 220, 0, 0, 0, 0, 0, 0, 1, 222, 0, 0, 0, 0, 0, 0, + 1, 1, 223, 2, 224, 0, 0, 0, 225, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 104, 1, 226, 1, 227, 228, 229, 127, 0, + 1, 1, 230, 0, 0, 0, 0, 0, 1, 1, 142, 96, 0, 0, 0, 0, + 1, 1, 128, 1, 231, 232, 233, 1, 234, 235, 236, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 237, 1, 1, 1, 1, 1, 1, 1, 1, 238, 1, + 233, 239, 240, 241, 242, 243, 0, 244, 1, 108, 1, 1, 136, 245, 246, 0, + 131, 139, 1, 108, 89, 0, 0, 247, 248, 89, 249, 0, 0, 0, 0, 0, + 1, 250, 1, 90, 136, 1, 251, 93, 1, 2, 211, 1, 1, 1, 1, 252, + 1, 127, 150, 183, 0, 0, 0, 253, 1, 1, 254, 0, 1, 1, 255, 0, + 1, 1, 1, 136, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 142, 0, + 1, 92, 1, 1, 1, 1, 1, 1, 127, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_grapheme_base_stage_5[] = { + 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 127, 255, 223, 255, 255, + 0, 0, 255, 124, 240, 215, 255, 255, 251, 255, 255, 255, 7, 252, 255, 255, + 255, 0, 254, 255, 255, 255, 127, 254, 254, 255, 255, 255, 255, 134, 0, 0, + 0, 0, 0, 64, 73, 0, 255, 255, 255, 7, 31, 0, 192, 255, 0, 200, + 255, 7, 0, 0, 255, 255, 254, 255, 255, 255, 63, 64, 96, 194, 255, 255, + 255, 63, 253, 255, 255, 255, 0, 0, 0, 224, 255, 255, 63, 0, 2, 0, + 255, 7, 240, 7, 255, 255, 63, 4, 16, 1, 255, 127, 255, 255, 255, 65, + 253, 31, 0, 0, 248, 255, 255, 255, 255, 255, 255, 235, 1, 222, 1, 255, + 243, 255, 255, 254, 236, 159, 249, 255, 255, 253, 197, 163, 129, 89, 0, 176, + 195, 255, 255, 15, 232, 135, 249, 255, 255, 253, 109, 195, 1, 0, 0, 94, + 192, 255, 28, 0, 232, 191, 251, 255, 255, 253, 237, 227, 1, 26, 1, 0, + 195, 255, 3, 0, 255, 253, 237, 35, 129, 25, 0, 176, 195, 255, 255, 0, + 232, 199, 61, 214, 24, 199, 255, 131, 198, 29, 1, 0, 192, 255, 255, 7, + 238, 223, 253, 255, 255, 253, 239, 35, 30, 0, 0, 3, 195, 255, 0, 255, + 236, 223, 253, 255, 255, 253, 239, 99, 155, 13, 0, 64, 195, 255, 6, 0, + 255, 255, 255, 167, 193, 93, 0, 0, 195, 255, 63, 254, 236, 255, 127, 252, + 255, 255, 251, 47, 127, 0, 3, 127, 0, 0, 28, 0, 255, 255, 13, 128, + 127, 128, 255, 15, 150, 37, 240, 254, 174, 236, 13, 32, 95, 0, 255, 243, + 255, 255, 255, 252, 255, 255, 95, 253, 255, 254, 255, 255, 255, 31, 0, 128, + 32, 31, 0, 0, 0, 0, 0, 192, 191, 223, 255, 7, 255, 31, 2, 153, + 255, 255, 255, 60, 254, 255, 225, 255, 155, 223, 255, 223, 191, 32, 255, 255, + 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, + 255, 255, 61, 255, 255, 255, 255, 7, 255, 255, 255, 31, 255, 255, 255, 3, + 255, 255, 31, 0, 255, 255, 1, 0, 255, 223, 3, 0, 255, 255, 99, 0, + 255, 255, 3, 0, 255, 223, 1, 0, 255, 255, 79, 192, 191, 1, 240, 31, + 255, 3, 255, 3, 255, 7, 255, 3, 255, 255, 255, 0, 255, 5, 255, 255, + 255, 255, 63, 0, 120, 14, 251, 1, 241, 255, 255, 255, 255, 63, 31, 0, + 255, 15, 255, 255, 255, 3, 255, 199, 255, 255, 127, 198, 255, 255, 191, 0, + 26, 224, 7, 0, 255, 63, 0, 0, 240, 255, 255, 255, 255, 255, 47, 232, + 251, 15, 255, 255, 255, 7, 240, 31, 252, 255, 255, 255, 195, 244, 255, 255, + 191, 92, 12, 240, 255, 15, 48, 248, 255, 227, 255, 255, 255, 0, 8, 0, + 2, 222, 111, 0, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 255, 63, + 255, 255, 223, 255, 223, 255, 207, 239, 255, 255, 220, 127, 255, 7, 255, 255, + 255, 128, 255, 255, 0, 0, 243, 255, 255, 127, 255, 31, 255, 3, 255, 255, + 255, 255, 15, 0, 127, 0, 0, 0, 255, 31, 255, 3, 255, 127, 255, 255, + 255, 127, 12, 254, 255, 128, 1, 0, 255, 255, 127, 0, 127, 127, 127, 127, + 255, 255, 255, 15, 255, 255, 255, 251, 0, 0, 255, 15, 255, 255, 127, 248, + 224, 255, 255, 255, 255, 63, 254, 255, 15, 0, 255, 255, 255, 31, 0, 0, + 255, 31, 255, 255, 127, 0, 255, 255, 255, 15, 0, 0, 255, 127, 8, 192, + 255, 255, 252, 0, 255, 127, 15, 0, 0, 0, 0, 255, 187, 247, 255, 255, + 159, 15, 255, 3, 15, 192, 255, 3, 0, 0, 252, 15, 63, 192, 255, 255, + 127, 0, 12, 128, 255, 255, 55, 236, 255, 191, 255, 195, 255, 129, 25, 0, + 247, 47, 255, 243, 255, 255, 98, 62, 5, 0, 0, 248, 255, 207, 63, 0, + 126, 126, 126, 0, 127, 127, 0, 0, 223, 30, 255, 3, 127, 248, 255, 255, + 255, 63, 255, 255, 127, 0, 248, 160, 255, 255, 127, 95, 219, 255, 255, 255, + 3, 0, 248, 255, 0, 0, 255, 255, 255, 255, 252, 255, 255, 0, 0, 0, + 0, 0, 255, 63, 0, 0, 255, 3, 255, 255, 247, 255, 127, 15, 223, 255, + 252, 252, 252, 28, 127, 127, 0, 48, 255, 239, 255, 255, 127, 255, 255, 183, + 255, 63, 255, 63, 135, 255, 255, 255, 255, 255, 143, 255, 255, 7, 255, 15, + 255, 255, 255, 191, 15, 255, 63, 0, 255, 3, 0, 0, 63, 253, 255, 255, + 255, 255, 191, 145, 255, 255, 191, 255, 255, 255, 255, 143, 255, 255, 255, 131, + 255, 255, 255, 192, 1, 0, 239, 254, 255, 0, 255, 1, 255, 255, 63, 254, + 255, 255, 63, 255, 255, 255, 7, 255, 255, 1, 0, 0, 253, 255, 255, 255, + 128, 63, 252, 255, 255, 255, 135, 217, 3, 0, 255, 255, 255, 1, 255, 3, + 127, 16, 192, 255, 15, 0, 0, 0, 255, 255, 63, 128, 255, 215, 64, 0, + 255, 127, 0, 0, 7, 0, 15, 0, 255, 255, 255, 1, 31, 0, 255, 255, + 0, 0, 248, 255, 3, 0, 0, 0, 127, 254, 255, 255, 95, 60, 0, 0, + 24, 240, 255, 255, 255, 195, 255, 255, 35, 0, 0, 0, 255, 255, 255, 223, + 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, + 95, 252, 253, 255, 63, 255, 255, 255, 255, 207, 255, 255, 150, 254, 247, 10, + 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, + 0, 0, 3, 0, 255, 127, 254, 127, 254, 255, 254, 255, 192, 255, 255, 255, + 7, 0, 255, 255, 255, 1, 3, 0, 1, 0, 191, 255, 223, 7, 0, 0, + 255, 255, 255, 30, 0, 0, 0, 248, 225, 255, 0, 0, 63, 0, 0, 0, +}; + +/* Grapheme_Base: 2169 bytes. */ + +RE_UINT32 re_get_grapheme_base(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_grapheme_base_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_grapheme_base_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_grapheme_base_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_grapheme_base_stage_4[pos + f] << 5; + pos += code; + value = (re_grapheme_base_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Grapheme_Link. */ + +static RE_UINT8 re_grapheme_link_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, +}; + +static RE_UINT8 re_grapheme_link_stage_2[] = { + 0, 1, 2, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_grapheme_link_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 3, 4, + 5, 0, 0, 0, 0, 0, 0, 6, 0, 0, 7, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, 10, 11, 12, 13, 0, 0, 0, 0, + 0, 0, 14, 0, 0, 0, 0, 0, 15, 16, 0, 0, 0, 0, 17, 0, +}; + +static RE_UINT8 re_grapheme_link_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 4, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, + 6, 6, 0, 0, 0, 0, 7, 0, 0, 0, 0, 8, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 9, 0, 10, 0, 0, 0, 11, 0, 0, 0, 0, + 12, 0, 0, 0, 0, 0, 4, 0, 0, 0, 13, 0, 0, 0, 8, 0, + 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 15, 0, 0, + 0, 16, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 14, 0, 0, +}; + +static RE_UINT8 re_grapheme_link_stage_5[] = { + 0, 0, 0, 0, 0, 32, 0, 0, 0, 4, 0, 0, 0, 0, 0, 4, + 16, 0, 0, 0, 0, 0, 0, 6, 0, 0, 16, 0, 0, 0, 4, 0, + 1, 0, 0, 0, 0, 12, 0, 0, 0, 0, 12, 0, 0, 0, 0, 128, + 64, 0, 0, 0, 0, 0, 8, 0, 0, 0, 64, 0, 0, 0, 0, 2, + 0, 0, 24, 0, +}; + +/* Grapheme_Link: 374 bytes. */ + +RE_UINT32 re_get_grapheme_link(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_grapheme_link_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_grapheme_link_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_grapheme_link_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_grapheme_link_stage_4[pos + f] << 5; + pos += code; + value = (re_grapheme_link_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* White_Space. */ + +static RE_UINT8 re_white_space_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_white_space_stage_2[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_white_space_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, + 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_white_space_stage_4[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 3, 1, 1, 1, 1, 1, 4, 5, 1, 1, 1, 1, 1, 1, + 3, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_white_space_stage_5[] = { + 0, 62, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 32, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 255, 7, 0, 0, 0, 131, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, +}; + +/* White_Space: 169 bytes. */ + +RE_UINT32 re_get_white_space(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_white_space_stage_1[f] << 3; + f = code >> 13; + code ^= f << 13; + pos = (RE_UINT32)re_white_space_stage_2[pos + f] << 4; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_white_space_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_white_space_stage_4[pos + f] << 6; + pos += code; + value = (re_white_space_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Bidi_Control. */ + +static RE_UINT8 re_bidi_control_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_bidi_control_stage_2[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_bidi_control_stage_3[] = { + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_bidi_control_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 2, 3, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_bidi_control_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, + 0, 192, 0, 0, 0, 124, 0, 0, 0, 0, 0, 0, 192, 3, 0, 0, +}; + +/* Bidi_Control: 129 bytes. */ + +RE_UINT32 re_get_bidi_control(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_bidi_control_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_bidi_control_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_bidi_control_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_bidi_control_stage_4[pos + f] << 6; + pos += code; + value = (re_bidi_control_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Join_Control. */ + +static RE_UINT8 re_join_control_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_join_control_stage_2[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_join_control_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_join_control_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_join_control_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, +}; + +/* Join_Control: 97 bytes. */ + +RE_UINT32 re_get_join_control(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_join_control_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_join_control_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_join_control_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_join_control_stage_4[pos + f] << 6; + pos += code; + value = (re_join_control_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Dash. */ + +static RE_UINT8 re_dash_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_dash_stage_2[] = { + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +}; + +static RE_UINT8 re_dash_stage_3[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 3, 1, 4, 1, 1, 1, + 5, 6, 1, 1, 1, 1, 1, 7, 8, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, +}; + +static RE_UINT8 re_dash_stage_4[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, + 4, 1, 1, 1, 1, 1, 1, 1, 5, 6, 7, 1, 1, 1, 1, 1, + 8, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, + 10, 1, 11, 1, 1, 1, 1, 1, 12, 13, 1, 1, 14, 1, 1, 1, +}; + +static RE_UINT8 re_dash_stage_5[] = { + 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 0, 0, 0, 0, 0, 64, 1, 0, 0, 0, 0, 0, 0, 0, + 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, 0, 8, 0, 8, 0, 0, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 128, 4, 0, 0, 0, 12, + 0, 0, 0, 16, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 1, 8, 0, 0, 0, + 0, 32, 0, 0, 0, 0, 0, 0, +}; + +/* Dash: 297 bytes. */ + +RE_UINT32 re_get_dash(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_dash_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_dash_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_dash_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_dash_stage_4[pos + f] << 6; + pos += code; + value = (re_dash_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Hyphen. */ + +static RE_UINT8 re_hyphen_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_hyphen_stage_2[] = { + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +}; + +static RE_UINT8 re_hyphen_stage_3[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, + 4, 1, 1, 1, 1, 1, 1, 5, 6, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, +}; + +static RE_UINT8 re_hyphen_stage_4[] = { + 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, + 4, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 6, 1, 1, 1, 1, 1, 7, 1, 1, 8, 9, 1, 1, +}; + +static RE_UINT8 re_hyphen_stage_5[] = { + 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, + 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, +}; + +/* Hyphen: 241 bytes. */ + +RE_UINT32 re_get_hyphen(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_hyphen_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_hyphen_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_hyphen_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_hyphen_stage_4[pos + f] << 6; + pos += code; + value = (re_hyphen_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Quotation_Mark. */ + +static RE_UINT8 re_quotation_mark_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_quotation_mark_stage_2[] = { + 0, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_quotation_mark_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 4, +}; + +static RE_UINT8 re_quotation_mark_stage_4[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, + 1, 5, 1, 1, 6, 7, 1, 1, +}; + +static RE_UINT8 re_quotation_mark_stage_5[] = { + 0, 0, 0, 0, 132, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 8, 0, 8, 0, 0, 0, 255, 0, 0, 0, 6, + 0, 240, 0, 224, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, + 132, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, +}; + +/* Quotation_Mark: 193 bytes. */ + +RE_UINT32 re_get_quotation_mark(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_quotation_mark_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_quotation_mark_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_quotation_mark_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_quotation_mark_stage_4[pos + f] << 6; + pos += code; + value = (re_quotation_mark_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Terminal_Punctuation. */ + +static RE_UINT8 re_terminal_punctuation_stage_1[] = { + 0, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, +}; + +static RE_UINT8 re_terminal_punctuation_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 12, 13, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 14, + 15, 9, 16, 9, 17, 9, 9, 9, 9, 18, 9, 9, 9, 9, 9, 9, +}; + +static RE_UINT8 re_terminal_punctuation_stage_3[] = { + 0, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 4, 5, 6, 7, 8, + 9, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11, 1, 12, 1, + 13, 1, 1, 1, 1, 1, 14, 1, 1, 1, 1, 1, 15, 16, 1, 17, + 18, 1, 19, 1, 1, 20, 21, 1, 22, 1, 1, 1, 1, 1, 1, 1, + 23, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 24, 1, 1, 1, 25, 1, 1, 1, 1, 1, 1, 1, + 1, 26, 1, 1, 27, 28, 1, 1, 29, 30, 31, 32, 33, 34, 1, 35, + 1, 1, 1, 1, 36, 1, 37, 1, 1, 1, 1, 1, 1, 1, 1, 38, + 39, 1, 40, 1, 1, 1, 41, 1, 42, 43, 44, 45, 1, 1, 1, 1, + 46, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_terminal_punctuation_stage_4[] = { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, + 4, 0, 5, 0, 6, 0, 0, 0, 0, 0, 7, 0, 8, 0, 0, 0, + 0, 0, 0, 9, 0, 10, 2, 0, 0, 0, 0, 11, 0, 0, 12, 0, + 13, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 15, 0, 0, 0, 16, + 0, 0, 0, 17, 0, 0, 18, 0, 19, 0, 0, 0, 0, 0, 11, 0, + 0, 20, 0, 0, 0, 0, 21, 0, 0, 22, 0, 23, 0, 24, 25, 0, + 0, 26, 0, 0, 27, 0, 0, 0, 0, 0, 0, 23, 28, 0, 0, 0, + 0, 0, 0, 29, 0, 0, 0, 30, 0, 0, 31, 0, 0, 32, 0, 0, + 0, 0, 25, 0, 0, 0, 33, 0, 0, 0, 34, 35, 0, 0, 0, 36, + 0, 0, 37, 0, 1, 0, 0, 38, 34, 0, 39, 0, 0, 0, 40, 0, + 34, 0, 0, 0, 0, 41, 0, 0, 0, 0, 42, 0, 0, 23, 43, 0, + 0, 0, 44, 0, 0, 0, 45, 0, 0, 0, 0, 46, +}; + +static RE_UINT8 re_terminal_punctuation_stage_5[] = { + 0, 0, 0, 0, 2, 80, 0, 140, 0, 0, 0, 64, 128, 0, 0, 0, + 0, 2, 0, 0, 8, 0, 0, 0, 0, 16, 0, 136, 0, 0, 16, 0, + 255, 23, 0, 0, 0, 0, 0, 3, 0, 0, 255, 127, 48, 0, 0, 0, + 0, 0, 0, 12, 0, 225, 7, 0, 0, 12, 0, 0, 254, 1, 0, 0, + 0, 96, 0, 0, 0, 56, 0, 0, 0, 0, 112, 4, 60, 3, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 236, 0, 0, 0, 248, 0, 0, 0, 192, + 0, 0, 0, 48, 128, 3, 0, 0, 0, 64, 0, 0, 6, 0, 0, 0, + 0, 224, 0, 0, 0, 0, 248, 0, 0, 0, 192, 0, 0, 192, 0, 0, + 0, 128, 0, 0, 0, 0, 0, 224, 0, 0, 0, 128, 0, 0, 3, 0, + 0, 8, 0, 0, 0, 0, 247, 0, 18, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 128, 0, 0, 0, 0, 252, 128, 63, 0, 0, 3, 0, 0, 0, + 14, 0, 0, 0, 96, 0, 0, 0, 0, 0, 15, 0, +}; + +/* Terminal_Punctuation: 676 bytes. */ + +RE_UINT32 re_get_terminal_punctuation(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 14; + code = ch ^ (f << 14); + pos = (RE_UINT32)re_terminal_punctuation_stage_1[f] << 4; + f = code >> 10; + code ^= f << 10; + pos = (RE_UINT32)re_terminal_punctuation_stage_2[pos + f] << 3; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_terminal_punctuation_stage_3[pos + f] << 2; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_terminal_punctuation_stage_4[pos + f] << 5; + pos += code; + value = (re_terminal_punctuation_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Other_Math. */ + +static RE_UINT8 re_other_math_stage_1[] = { + 0, 1, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, +}; + +static RE_UINT8 re_other_math_stage_2[] = { + 0, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 6, 1, 1, +}; + +static RE_UINT8 re_other_math_stage_3[] = { + 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 3, 4, 1, 5, 1, 6, 7, 8, 1, 9, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 10, 11, 1, 1, 1, 1, 12, 13, 14, 15, + 1, 1, 1, 1, 1, 1, 16, 1, +}; + +static RE_UINT8 re_other_math_stage_4[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 7, 8, 0, 9, 10, + 11, 12, 13, 0, 14, 15, 16, 17, 18, 0, 0, 0, 0, 19, 20, 21, + 0, 0, 0, 0, 0, 22, 23, 24, 25, 0, 26, 27, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 25, 28, 0, 0, 0, 0, 29, 0, 30, 31, + 0, 0, 0, 32, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, + 34, 34, 35, 34, 36, 37, 38, 34, 39, 40, 41, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 42, 43, 44, 35, 35, 45, 45, 46, 46, 47, 34, + 38, 48, 49, 50, 51, 52, 0, 0, +}; + +static RE_UINT8 re_other_math_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 39, 0, 0, 0, 51, 0, + 0, 0, 64, 0, 0, 0, 28, 0, 1, 0, 0, 0, 30, 0, 0, 96, + 0, 96, 0, 0, 0, 0, 255, 31, 98, 248, 0, 0, 132, 252, 47, 62, + 16, 179, 251, 241, 224, 3, 0, 0, 0, 0, 224, 243, 182, 62, 195, 240, + 255, 63, 235, 47, 48, 0, 0, 0, 0, 15, 0, 0, 0, 0, 176, 0, + 0, 0, 1, 0, 4, 0, 0, 0, 3, 192, 127, 240, 193, 140, 15, 0, + 148, 31, 0, 0, 96, 0, 0, 0, 5, 0, 0, 0, 15, 96, 0, 0, + 192, 255, 0, 0, 248, 255, 255, 1, 0, 0, 0, 15, 0, 0, 0, 48, + 10, 1, 0, 0, 0, 0, 0, 80, 255, 255, 255, 255, 255, 255, 223, 255, + 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, + 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, + 255, 255, 255, 247, 255, 127, 255, 255, 255, 253, 255, 255, 247, 207, 255, 255, + 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, + 238, 251, 255, 15, +}; + +/* Other_Math: 502 bytes. */ + +RE_UINT32 re_get_other_math(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_other_math_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_other_math_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_other_math_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_other_math_stage_4[pos + f] << 5; + pos += code; + value = (re_other_math_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Hex_Digit. */ + +static RE_UINT8 re_hex_digit_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_hex_digit_stage_2[] = { + 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_hex_digit_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 2, +}; + +static RE_UINT8 re_hex_digit_stage_4[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 1, +}; + +static RE_UINT8 re_hex_digit_stage_5[] = { + 0, 0, 0, 0, 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, 0, 0, 0, 0, +}; + +/* Hex_Digit: 129 bytes. */ + +RE_UINT32 re_get_hex_digit(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_hex_digit_stage_1[f] << 3; + f = code >> 13; + code ^= f << 13; + pos = (RE_UINT32)re_hex_digit_stage_2[pos + f] << 3; + f = code >> 10; + code ^= f << 10; + pos = (RE_UINT32)re_hex_digit_stage_3[pos + f] << 3; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_hex_digit_stage_4[pos + f] << 7; + pos += code; + value = (re_hex_digit_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* ASCII_Hex_Digit. */ + +static RE_UINT8 re_ascii_hex_digit_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_ascii_hex_digit_stage_2[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_ascii_hex_digit_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_ascii_hex_digit_stage_4[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_ascii_hex_digit_stage_5[] = { + 0, 0, 0, 0, 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/* ASCII_Hex_Digit: 97 bytes. */ + +RE_UINT32 re_get_ascii_hex_digit(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_ascii_hex_digit_stage_1[f] << 3; + f = code >> 13; + code ^= f << 13; + pos = (RE_UINT32)re_ascii_hex_digit_stage_2[pos + f] << 3; + f = code >> 10; + code ^= f << 10; + pos = (RE_UINT32)re_ascii_hex_digit_stage_3[pos + f] << 3; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_ascii_hex_digit_stage_4[pos + f] << 7; + pos += code; + value = (re_ascii_hex_digit_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Other_Alphabetic. */ + +static RE_UINT8 re_other_alphabetic_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, +}; + +static RE_UINT8 re_other_alphabetic_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, + 6, 10, 11, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, +}; + +static RE_UINT8 re_other_alphabetic_stage_3[] = { + 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 0, 0, 14, 0, 0, 0, 15, 16, 17, 18, 19, 20, 0, 0, 0, + 0, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 0, + 24, 25, 26, 27, 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, 0, + 0, 0, 29, 0, 0, 0, 0, 0, 30, 31, 0, 0, 0, 0, 32, 0, + 0, 0, 0, 0, 0, 0, 0, 33, +}; + +static RE_UINT8 re_other_alphabetic_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 3, 0, 4, 0, 5, 6, 0, 0, 7, 8, + 9, 10, 0, 0, 0, 11, 0, 0, 12, 13, 0, 0, 0, 0, 0, 14, + 15, 16, 17, 18, 19, 20, 21, 18, 19, 20, 22, 23, 19, 20, 24, 18, + 19, 20, 25, 18, 26, 20, 27, 0, 19, 20, 28, 18, 18, 20, 28, 18, + 18, 20, 29, 18, 18, 0, 30, 31, 0, 32, 33, 0, 0, 34, 33, 0, + 0, 0, 0, 35, 36, 37, 0, 0, 0, 38, 39, 40, 41, 0, 0, 0, + 0, 0, 42, 0, 0, 0, 0, 0, 31, 31, 31, 31, 0, 43, 44, 0, + 0, 0, 0, 0, 0, 45, 0, 0, 0, 46, 0, 0, 0, 10, 47, 0, + 48, 0, 49, 50, 0, 0, 0, 0, 51, 52, 15, 0, 53, 54, 0, 55, + 0, 56, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 43, 57, 58, + 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, 0, 59, 42, 0, 0, 0, + 0, 60, 0, 0, 61, 62, 15, 0, 0, 63, 64, 0, 15, 62, 0, 0, + 0, 65, 66, 0, 0, 67, 0, 68, 0, 0, 0, 0, 0, 0, 0, 69, + 70, 0, 0, 0, 0, 0, 0, 0, 71, 0, 0, 0, 0, 0, 0, 0, + 53, 72, 73, 0, 26, 74, 0, 0, 53, 64, 0, 0, 53, 75, 0, 0, + 0, 0, 0, 0, 0, 76, 0, 0, 0, 0, 35, 77, 0, 0, 0, 0, +}; + +static RE_UINT8 re_other_alphabetic_stage_5[] = { + 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 255, 191, 182, 0, 0, 0, + 0, 0, 255, 7, 0, 248, 255, 254, 0, 0, 1, 0, 0, 0, 192, 31, + 158, 33, 0, 0, 0, 0, 2, 0, 0, 0, 255, 255, 192, 255, 1, 0, + 0, 0, 192, 248, 239, 30, 0, 0, 240, 3, 255, 127, 15, 0, 0, 0, + 0, 0, 0, 204, 255, 223, 224, 0, 12, 0, 0, 0, 14, 0, 0, 0, + 0, 0, 0, 192, 159, 25, 128, 0, 135, 25, 2, 0, 0, 0, 35, 0, + 191, 27, 0, 0, 159, 25, 192, 0, 4, 0, 0, 0, 199, 29, 128, 0, + 223, 29, 96, 0, 223, 29, 128, 0, 0, 128, 95, 255, 0, 0, 12, 0, + 0, 0, 242, 7, 0, 32, 0, 0, 0, 0, 242, 27, 0, 0, 254, 255, + 3, 224, 255, 254, 255, 255, 255, 31, 0, 248, 127, 121, 0, 0, 192, 195, + 133, 1, 30, 0, 124, 0, 0, 48, 0, 0, 0, 128, 0, 0, 192, 255, + 255, 1, 0, 0, 0, 2, 0, 0, 255, 15, 255, 1, 1, 3, 0, 0, + 0, 0, 128, 15, 0, 0, 224, 127, 254, 255, 31, 0, 31, 0, 0, 0, + 0, 0, 224, 255, 7, 0, 0, 0, 254, 51, 0, 0, 128, 255, 3, 0, + 240, 255, 63, 0, 255, 255, 255, 255, 255, 3, 0, 0, 0, 0, 240, 15, + 248, 0, 0, 0, 3, 0, 0, 0, 0, 0, 240, 255, 192, 7, 0, 0, + 128, 255, 7, 0, 0, 254, 127, 0, 8, 48, 0, 0, 0, 0, 157, 65, + 0, 248, 32, 0, 248, 7, 0, 0, 0, 0, 0, 64, 110, 240, 0, 0, + 0, 0, 0, 255, 63, 0, 0, 0, 0, 0, 255, 1, 0, 0, 248, 255, + 0, 248, 63, 0, 255, 255, 255, 127, +}; + +/* Other_Alphabetic: 786 bytes. */ + +RE_UINT32 re_get_other_alphabetic(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_other_alphabetic_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_other_alphabetic_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_other_alphabetic_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_other_alphabetic_stage_4[pos + f] << 5; + pos += code; + value = (re_other_alphabetic_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Ideographic. */ + +static RE_UINT8 re_ideographic_stage_1[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_ideographic_stage_2[] = { + 0, 0, 0, 1, 2, 3, 3, 3, 3, 4, 0, 0, 0, 0, 0, 5, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 6, 7, 0, 0, 0, 8, +}; + +static RE_UINT8 re_ideographic_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 4, 0, 0, 0, 0, 5, 6, 0, 0, + 2, 2, 2, 7, 2, 2, 2, 2, 2, 2, 2, 8, 9, 0, 0, 0, + 0, 0, 0, 0, 2, 9, 0, 0, +}; + +static RE_UINT8 re_ideographic_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 0, + 2, 2, 2, 2, 2, 2, 2, 4, 0, 0, 0, 0, 2, 2, 2, 2, + 2, 5, 2, 6, 0, 0, 0, 0, 2, 2, 2, 7, 2, 2, 2, 2, + 2, 2, 2, 2, 8, 2, 2, 2, 9, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_ideographic_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 192, 0, 0, 0, 254, 3, 0, 7, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 63, 0, + 255, 31, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 63, 255, 255, + 255, 255, 255, 3, 0, 0, 0, 0, 255, 255, 127, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 255, 255, 31, 0, 255, 255, 255, 63, 0, 0, 0, 0, +}; + +/* Ideographic: 297 bytes. */ + +RE_UINT32 re_get_ideographic(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_ideographic_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_ideographic_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_ideographic_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_ideographic_stage_4[pos + f] << 6; + pos += code; + value = (re_ideographic_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Diacritic. */ + +static RE_UINT8 re_diacritic_stage_1[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, +}; + +static RE_UINT8 re_diacritic_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 7, 8, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, + 4, 4, 10, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 11, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 12, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +}; + +static RE_UINT8 re_diacritic_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 1, 1, 1, 1, 1, 17, 1, 18, 19, 20, 21, 22, 1, 23, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 24, 1, 25, 1, + 26, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 28, + 29, 30, 31, 32, 1, 1, 1, 1, 1, 1, 1, 33, 1, 1, 34, 35, + 36, 37, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 39, + 1, 40, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_diacritic_stage_4[] = { + 0, 0, 1, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 4, 5, 5, 5, 5, 6, 7, 8, 0, 0, 0, + 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 10, 0, 11, 12, 13, 0, + 0, 0, 14, 0, 0, 0, 15, 16, 0, 4, 17, 0, 0, 18, 0, 19, + 20, 0, 0, 0, 0, 0, 0, 21, 0, 22, 23, 24, 0, 22, 25, 0, + 0, 22, 25, 0, 0, 22, 25, 0, 0, 22, 25, 0, 0, 0, 25, 0, + 0, 0, 25, 0, 0, 22, 25, 0, 0, 0, 25, 0, 0, 0, 26, 0, + 0, 0, 27, 0, 0, 0, 28, 0, 20, 29, 0, 0, 30, 0, 31, 0, + 0, 32, 0, 0, 33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, 0, + 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 0, + 0, 37, 38, 39, 0, 40, 0, 0, 0, 41, 0, 42, 0, 0, 4, 43, + 0, 44, 5, 17, 0, 0, 45, 46, 0, 0, 0, 0, 0, 47, 48, 49, + 0, 0, 0, 0, 0, 0, 0, 50, 0, 51, 0, 0, 0, 0, 0, 0, + 0, 52, 0, 0, 53, 0, 0, 22, 0, 0, 0, 54, 0, 0, 0, 55, + 56, 57, 0, 0, 58, 0, 0, 20, 0, 0, 0, 0, 0, 0, 38, 59, + 0, 60, 61, 0, 0, 61, 2, 0, 0, 0, 0, 62, 0, 15, 63, 64, + 0, 0, 0, 0, 0, 0, 0, 65, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 66, 0, 0, 0, 0, 0, 0, 0, 1, 2, 67, 68, 0, 0, 69, + 0, 0, 0, 0, 0, 70, 0, 0, 0, 71, 0, 0, 0, 0, 2, 0, + 0, 0, 0, 0, 0, 41, 0, 0, 0, 0, 0, 0, 72, 0, 0, 0, + 0, 0, 0, 73, 74, 75, 0, 0, +}; + +static RE_UINT8 re_diacritic_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 64, 1, 0, 0, 0, 0, 129, 144, 1, + 0, 0, 255, 255, 255, 255, 255, 255, 255, 127, 255, 224, 7, 0, 48, 4, + 48, 0, 0, 0, 248, 0, 0, 0, 0, 0, 0, 2, 0, 0, 254, 255, + 251, 255, 255, 191, 22, 0, 0, 0, 0, 248, 135, 1, 0, 0, 0, 128, + 97, 28, 0, 0, 255, 7, 0, 0, 192, 255, 1, 0, 0, 248, 63, 0, + 0, 0, 0, 3, 240, 255, 255, 127, 0, 0, 0, 16, 0, 32, 30, 0, + 0, 0, 2, 0, 0, 32, 0, 0, 0, 4, 0, 0, 128, 95, 0, 0, + 0, 31, 0, 0, 0, 0, 160, 194, 220, 0, 0, 0, 64, 0, 0, 0, + 0, 0, 128, 6, 128, 191, 0, 12, 0, 254, 15, 32, 0, 0, 0, 14, + 0, 0, 224, 159, 0, 0, 16, 0, 16, 0, 0, 0, 0, 248, 15, 0, + 0, 12, 0, 0, 0, 0, 192, 0, 0, 0, 0, 63, 255, 33, 16, 0, + 0, 240, 255, 255, 240, 255, 0, 0, 0, 0, 0, 224, 0, 0, 0, 160, + 3, 224, 0, 224, 0, 224, 0, 96, 0, 128, 3, 0, 0, 128, 0, 0, + 0, 252, 0, 0, 0, 0, 0, 30, 0, 128, 0, 176, 0, 0, 3, 0, + 0, 0, 128, 255, 3, 0, 0, 0, 0, 1, 0, 0, 255, 255, 3, 0, + 0, 120, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 7, 0, 0, 0, + 0, 0, 64, 0, 0, 48, 0, 0, 127, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 192, 8, 0, 0, 0, 0, 0, 0, 6, 0, 0, 24, 0, + 0, 128, 255, 255, 128, 227, 7, 248, 231, 15, 0, 0, 0, 60, 0, 0, +}; + +/* Diacritic: 849 bytes. */ + +RE_UINT32 re_get_diacritic(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_diacritic_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_diacritic_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_diacritic_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_diacritic_stage_4[pos + f] << 5; + pos += code; + value = (re_diacritic_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Extender. */ + +static RE_UINT8 re_extender_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_extender_stage_2[] = { + 0, 1, 2, 3, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 5, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_extender_stage_3[] = { + 0, 1, 2, 1, 1, 1, 3, 4, 1, 1, 1, 1, 1, 1, 5, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 7, 1, 8, 1, 1, 1, + 9, 1, 1, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 11, 1, + 1, 12, 13, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 14, +}; + +static RE_UINT8 re_extender_stage_4[] = { + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 5, 0, 0, 0, 5, 0, + 6, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, + 0, 9, 0, 10, 0, 0, 0, 0, 11, 12, 0, 0, 13, 0, 0, 14, + 15, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 18, 0, 0, 19, 20, + 0, 0, 0, 18, 0, 0, 0, 0, +}; + +static RE_UINT8 re_extender_stage_5[] = { + 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 3, 0, 1, 0, 0, 0, + 0, 0, 0, 4, 64, 0, 0, 0, 0, 4, 0, 0, 8, 0, 0, 0, + 128, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 8, 32, 0, 0, 0, + 0, 0, 62, 0, 0, 0, 0, 96, 0, 0, 0, 112, 0, 0, 32, 0, + 0, 16, 0, 0, 0, 128, 0, 0, 0, 0, 1, 0, 0, 0, 0, 32, + 0, 0, 24, 0, +}; + +/* Extender: 349 bytes. */ + +RE_UINT32 re_get_extender(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_extender_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_extender_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_extender_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_extender_stage_4[pos + f] << 5; + pos += code; + value = (re_extender_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Other_Lowercase. */ + +static RE_UINT8 re_other_lowercase_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_other_lowercase_stage_2[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static RE_UINT8 re_other_lowercase_stage_3[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, + 4, 2, 5, 2, 2, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 7, 2, 2, 2, 2, +}; + +static RE_UINT8 re_other_lowercase_stage_4[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 4, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 0, + 0, 8, 9, 0, 0, 10, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, + 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 14, +}; + +static RE_UINT8 re_other_lowercase_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 4, + 0, 0, 0, 0, 0, 0, 255, 1, 3, 0, 0, 0, 31, 0, 0, 0, + 32, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 240, 255, 255, + 255, 255, 255, 255, 255, 7, 0, 1, 0, 0, 0, 248, 255, 255, 255, 255, + 0, 0, 0, 0, 0, 0, 2, 128, 0, 0, 255, 31, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 255, 255, 255, 3, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 3, +}; + +/* Other_Lowercase: 273 bytes. */ + +RE_UINT32 re_get_other_lowercase(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_other_lowercase_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_other_lowercase_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_other_lowercase_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_other_lowercase_stage_4[pos + f] << 6; + pos += code; + value = (re_other_lowercase_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Other_Uppercase. */ + +static RE_UINT8 re_other_uppercase_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_other_uppercase_stage_2[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_other_uppercase_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_other_uppercase_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 1, 0, +}; + +static RE_UINT8 re_other_uppercase_stage_5[] = { + 0, 0, 0, 0, 255, 255, 0, 0, 0, 0, 192, 255, +}; + +/* Other_Uppercase: 117 bytes. */ + +RE_UINT32 re_get_other_uppercase(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_other_uppercase_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_other_uppercase_stage_2[pos + f] << 4; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_other_uppercase_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_other_uppercase_stage_4[pos + f] << 5; + pos += code; + value = (re_other_uppercase_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Noncharacter_Code_Point. */ + +static RE_UINT8 re_noncharacter_code_point_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_noncharacter_code_point_stage_2[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, +}; + +static RE_UINT8 re_noncharacter_code_point_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, + 0, 0, 0, 0, 0, 0, 0, 2, +}; + +static RE_UINT8 re_noncharacter_code_point_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 2, +}; + +static RE_UINT8 re_noncharacter_code_point_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 192, +}; + +/* Noncharacter_Code_Point: 121 bytes. */ + +RE_UINT32 re_get_noncharacter_code_point(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_noncharacter_code_point_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_noncharacter_code_point_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_noncharacter_code_point_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_noncharacter_code_point_stage_4[pos + f] << 6; + pos += code; + value = (re_noncharacter_code_point_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Other_Grapheme_Extend. */ + +static RE_UINT8 re_other_grapheme_extend_stage_1[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, +}; + +static RE_UINT8 re_other_grapheme_extend_stage_2[] = { + 0, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_other_grapheme_extend_stage_3[] = { + 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 6, 7, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_other_grapheme_extend_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, + 0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 0, 3, 1, 2, 0, 4, + 5, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 8, 0, 0, +}; + +static RE_UINT8 re_other_grapheme_extend_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, + 0, 0, 128, 0, 0, 0, 0, 0, 4, 0, 96, 0, 0, 0, 0, 0, + 0, 128, 0, 128, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 192, 0, 0, 0, 0, 0, 192, 0, 0, 0, 0, + 0, 0, 0, 0, 32, 192, 7, 0, +}; + +/* Other_Grapheme_Extend: 249 bytes. */ + +RE_UINT32 re_get_other_grapheme_extend(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_other_grapheme_extend_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_other_grapheme_extend_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_other_grapheme_extend_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_other_grapheme_extend_stage_4[pos + f] << 6; + pos += code; + value = (re_other_grapheme_extend_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* IDS_Binary_Operator. */ + +static RE_UINT8 re_ids_binary_operator_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_ids_binary_operator_stage_2[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_ids_binary_operator_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, +}; + +static RE_UINT8 re_ids_binary_operator_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, +}; + +static RE_UINT8 re_ids_binary_operator_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 243, 15, +}; + +/* IDS_Binary_Operator: 97 bytes. */ + +RE_UINT32 re_get_ids_binary_operator(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_ids_binary_operator_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_ids_binary_operator_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_ids_binary_operator_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_ids_binary_operator_stage_4[pos + f] << 6; + pos += code; + value = (re_ids_binary_operator_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* IDS_Trinary_Operator. */ + +static RE_UINT8 re_ids_trinary_operator_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_ids_trinary_operator_stage_2[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_ids_trinary_operator_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, +}; + +static RE_UINT8 re_ids_trinary_operator_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, +}; + +static RE_UINT8 re_ids_trinary_operator_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, +}; + +/* IDS_Trinary_Operator: 97 bytes. */ + +RE_UINT32 re_get_ids_trinary_operator(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_ids_trinary_operator_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_ids_trinary_operator_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_ids_trinary_operator_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_ids_trinary_operator_stage_4[pos + f] << 6; + pos += code; + value = (re_ids_trinary_operator_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Radical. */ + +static RE_UINT8 re_radical_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_radical_stage_2[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_radical_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, +}; + +static RE_UINT8 re_radical_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 2, 2, 3, 2, 2, 2, 2, 2, 2, 4, 0, +}; + +static RE_UINT8 re_radical_stage_5[] = { + 0, 0, 0, 0, 255, 255, 255, 251, 255, 255, 255, 255, 255, 255, 15, 0, + 255, 255, 63, 0, +}; + +/* Radical: 117 bytes. */ + +RE_UINT32 re_get_radical(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_radical_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_radical_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_radical_stage_3[pos + f] << 4; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_radical_stage_4[pos + f] << 5; + pos += code; + value = (re_radical_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Unified_Ideograph. */ + +static RE_UINT8 re_unified_ideograph_stage_1[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_unified_ideograph_stage_2[] = { + 0, 0, 0, 1, 2, 3, 3, 3, 3, 4, 0, 0, 0, 0, 0, 5, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 6, 7, 0, 0, 0, 0, +}; + +static RE_UINT8 re_unified_ideograph_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 3, 0, 0, 0, 0, 0, 4, 0, 0, + 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 6, 7, 0, 0, 0, +}; + +static RE_UINT8 re_unified_ideograph_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 3, + 4, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 5, 1, 1, 1, 1, + 1, 1, 1, 1, 6, 1, 1, 1, 7, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_unified_ideograph_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 63, 0, 255, 31, 0, 0, 0, 0, 0, 0, + 0, 192, 26, 128, 154, 3, 0, 0, 255, 255, 127, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 255, 255, 31, 0, 255, 255, 255, 63, 0, 0, 0, 0, +}; + +/* Unified_Ideograph: 257 bytes. */ + +RE_UINT32 re_get_unified_ideograph(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_unified_ideograph_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_unified_ideograph_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_unified_ideograph_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_unified_ideograph_stage_4[pos + f] << 6; + pos += code; + value = (re_unified_ideograph_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Other_Default_Ignorable_Code_Point. */ + +static RE_UINT8 re_other_default_ignorable_code_point_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, + 1, +}; + +static RE_UINT8 re_other_default_ignorable_code_point_stage_2[] = { + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +}; + +static RE_UINT8 re_other_default_ignorable_code_point_stage_3[] = { + 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, + 4, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, + 7, 8, 8, 8, 8, 8, 8, 8, +}; + +static RE_UINT8 re_other_default_ignorable_code_point_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, + 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, + 0, 0, 0, 0, 0, 0, 6, 7, 8, 0, 9, 9, 0, 0, 0, 10, + 9, 9, 9, 9, 9, 9, 9, 9, +}; + +static RE_UINT8 re_other_default_ignorable_code_point_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, + 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 1, + 253, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, + 0, 0, 0, 0, 0, 0, 255, 255, +}; + +/* Other_Default_Ignorable_Code_Point: 281 bytes. */ + +RE_UINT32 re_get_other_default_ignorable_code_point(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_other_default_ignorable_code_point_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_other_default_ignorable_code_point_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_other_default_ignorable_code_point_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_other_default_ignorable_code_point_stage_4[pos + f] << 6; + pos += code; + value = (re_other_default_ignorable_code_point_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Deprecated. */ + +static RE_UINT8 re_deprecated_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, + 1, 1, +}; + +static RE_UINT8 re_deprecated_stage_2[] = { + 0, 1, 2, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static RE_UINT8 re_deprecated_stage_3[] = { + 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, + 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 0, 0, 6, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_deprecated_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, + 0, 6, 0, 0, 0, 0, 0, 0, 7, 8, 8, 8, 0, 0, 0, 0, +}; + +static RE_UINT8 re_deprecated_stage_5[] = { + 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 8, 0, 0, 0, 128, 2, + 24, 0, 0, 0, 0, 252, 0, 0, 0, 6, 0, 0, 2, 0, 0, 0, + 255, 255, 255, 255, +}; + +/* Deprecated: 230 bytes. */ + +RE_UINT32 re_get_deprecated(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_deprecated_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_deprecated_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_deprecated_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_deprecated_stage_4[pos + f] << 5; + pos += code; + value = (re_deprecated_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Soft_Dotted. */ + +static RE_UINT8 re_soft_dotted_stage_1[] = { + 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, +}; + +static RE_UINT8 re_soft_dotted_stage_2[] = { + 0, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_soft_dotted_stage_3[] = { + 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 6, 7, 5, 8, 9, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 5, 11, 12, 13, 5, +}; + +static RE_UINT8 re_soft_dotted_stage_4[] = { + 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, + 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 9, 10, 11, 0, 0, 0, 12, 0, 0, 0, 0, 13, 0, + 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, + 0, 0, 0, 16, 0, 0, 0, 0, 0, 17, 18, 0, 19, 20, 0, 21, + 0, 22, 23, 0, 24, 0, 17, 18, 0, 19, 20, 0, 21, 0, 0, 0, +}; + +static RE_UINT8 re_soft_dotted_stage_5[] = { + 0, 0, 0, 0, 0, 6, 0, 0, 0, 128, 0, 0, 0, 2, 0, 0, + 0, 1, 0, 0, 0, 0, 0, 32, 0, 0, 4, 0, 0, 0, 8, 0, + 0, 0, 64, 1, 4, 0, 0, 0, 0, 0, 64, 0, 16, 1, 0, 0, + 0, 32, 0, 0, 0, 8, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, + 0, 0, 0, 16, 12, 0, 0, 0, 0, 0, 192, 0, 0, 12, 0, 0, + 0, 0, 0, 192, 0, 0, 12, 0, 192, 0, 0, 0, 0, 0, 0, 12, + 0, 192, 0, 0, +}; + +/* Soft_Dotted: 342 bytes. */ + +RE_UINT32 re_get_soft_dotted(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_soft_dotted_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_soft_dotted_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_soft_dotted_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_soft_dotted_stage_4[pos + f] << 5; + pos += code; + value = (re_soft_dotted_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Logical_Order_Exception. */ + +static RE_UINT8 re_logical_order_exception_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_logical_order_exception_stage_2[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_logical_order_exception_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 0, 0, +}; + +static RE_UINT8 re_logical_order_exception_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, + 0, 0, 2, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_logical_order_exception_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 96, 26, +}; + +/* Logical_Order_Exception: 121 bytes. */ + +RE_UINT32 re_get_logical_order_exception(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_logical_order_exception_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_logical_order_exception_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_logical_order_exception_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_logical_order_exception_stage_4[pos + f] << 6; + pos += code; + value = (re_logical_order_exception_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Other_ID_Start. */ + +static RE_UINT8 re_other_id_start_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_other_id_start_stage_2[] = { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_other_id_start_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_other_id_start_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 2, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_other_id_start_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 64, 0, 0, + 0, 0, 0, 24, 0, 0, 0, 0, +}; + +/* Other_ID_Start: 113 bytes. */ + +RE_UINT32 re_get_other_id_start(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_other_id_start_stage_1[f] << 3; + f = code >> 13; + code ^= f << 13; + pos = (RE_UINT32)re_other_id_start_stage_2[pos + f] << 4; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_other_id_start_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_other_id_start_stage_4[pos + f] << 6; + pos += code; + value = (re_other_id_start_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Other_ID_Continue. */ + +static RE_UINT8 re_other_id_continue_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_other_id_continue_stage_2[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_other_id_continue_stage_3[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 4, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_other_id_continue_stage_4[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 4, +}; + +static RE_UINT8 re_other_id_continue_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, + 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 254, 3, 0, + 0, 0, 0, 4, 0, 0, 0, 0, +}; + +/* Other_ID_Continue: 145 bytes. */ + +RE_UINT32 re_get_other_id_continue(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_other_id_continue_stage_1[f] << 3; + f = code >> 13; + code ^= f << 13; + pos = (RE_UINT32)re_other_id_continue_stage_2[pos + f] << 4; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_other_id_continue_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_other_id_continue_stage_4[pos + f] << 6; + pos += code; + value = (re_other_id_continue_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* STerm. */ + +static RE_UINT8 re_sterm_stage_1[] = { + 0, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, +}; + +static RE_UINT8 re_sterm_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 3, 3, 9, 10, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 11, 12, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 13, + 3, 3, 14, 3, 15, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static RE_UINT8 re_sterm_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, + 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 9, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 11, 1, 12, 1, + 13, 1, 14, 1, 1, 15, 16, 1, 17, 1, 1, 1, 1, 1, 1, 1, + 18, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 19, 1, 1, 1, + 20, 1, 1, 1, 1, 1, 1, 1, 1, 21, 1, 1, 22, 23, 1, 1, + 24, 25, 26, 27, 28, 29, 1, 30, 1, 1, 1, 1, 31, 1, 32, 1, + 1, 1, 1, 1, 33, 1, 1, 1, 34, 35, 36, 37, 1, 1, 1, 1, +}; + +static RE_UINT8 re_sterm_stage_4[] = { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, + 4, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 7, + 0, 0, 0, 8, 0, 0, 9, 0, 0, 0, 0, 10, 0, 0, 0, 11, + 0, 12, 0, 0, 13, 0, 0, 0, 0, 0, 8, 0, 0, 14, 0, 0, + 0, 0, 15, 0, 0, 16, 0, 17, 0, 18, 19, 0, 0, 11, 0, 0, + 20, 0, 0, 0, 0, 0, 0, 4, 21, 0, 0, 0, 0, 0, 0, 22, + 0, 0, 0, 23, 0, 0, 21, 0, 0, 24, 0, 0, 0, 0, 25, 0, + 0, 0, 26, 0, 0, 0, 0, 27, 0, 0, 0, 28, 0, 0, 29, 0, + 1, 0, 0, 30, 0, 0, 23, 0, 0, 0, 31, 0, 0, 17, 32, 0, + 0, 0, 33, 0, 0, 0, 34, 0, +}; + +static RE_UINT8 re_sterm_stage_5[] = { + 0, 0, 0, 0, 2, 64, 0, 128, 0, 0, 0, 80, 0, 2, 0, 0, + 0, 0, 0, 128, 0, 0, 16, 0, 7, 0, 0, 0, 0, 0, 0, 2, + 48, 0, 0, 0, 0, 12, 0, 0, 132, 1, 0, 0, 0, 64, 0, 0, + 0, 0, 96, 0, 8, 2, 0, 0, 0, 15, 0, 0, 0, 0, 0, 204, + 0, 0, 0, 24, 0, 0, 0, 192, 0, 0, 0, 48, 128, 3, 0, 0, + 4, 0, 0, 0, 0, 192, 0, 0, 0, 0, 136, 0, 0, 0, 192, 0, + 0, 128, 0, 0, 0, 3, 0, 0, 0, 0, 0, 224, 0, 0, 3, 0, + 0, 8, 0, 0, 0, 0, 196, 0, 2, 0, 0, 0, 128, 1, 0, 0, + 3, 0, 0, 0, 14, 0, 0, 0, 96, 0, 0, 0, +}; + +/* STerm: 568 bytes. */ + +RE_UINT32 re_get_sterm(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 14; + code = ch ^ (f << 14); + pos = (RE_UINT32)re_sterm_stage_1[f] << 4; + f = code >> 10; + code ^= f << 10; + pos = (RE_UINT32)re_sterm_stage_2[pos + f] << 3; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_sterm_stage_3[pos + f] << 2; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_sterm_stage_4[pos + f] << 5; + pos += code; + value = (re_sterm_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Variation_Selector. */ + +static RE_UINT8 re_variation_selector_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, + 1, +}; + +static RE_UINT8 re_variation_selector_stage_2[] = { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_variation_selector_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_variation_selector_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, +}; + +static RE_UINT8 re_variation_selector_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 0, 0, 0, 0, 0, 0, + 255, 255, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 0, 0, +}; + +/* Variation_Selector: 169 bytes. */ + +RE_UINT32 re_get_variation_selector(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_variation_selector_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_variation_selector_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_variation_selector_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_variation_selector_stage_4[pos + f] << 6; + pos += code; + value = (re_variation_selector_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Pattern_White_Space. */ + +static RE_UINT8 re_pattern_white_space_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_pattern_white_space_stage_2[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_pattern_white_space_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_pattern_white_space_stage_4[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 3, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_pattern_white_space_stage_5[] = { + 0, 62, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 32, 0, 0, 0, 0, 0, 0, 0, 0, 192, 0, 0, 0, 3, 0, 0, +}; + +/* Pattern_White_Space: 129 bytes. */ + +RE_UINT32 re_get_pattern_white_space(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_pattern_white_space_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_pattern_white_space_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_pattern_white_space_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_pattern_white_space_stage_4[pos + f] << 6; + pos += code; + value = (re_pattern_white_space_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Pattern_Syntax. */ + +static RE_UINT8 re_pattern_syntax_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_pattern_syntax_stage_2[] = { + 0, 1, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_pattern_syntax_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 3, 4, 4, 5, 4, 4, 6, 4, 4, 4, 4, 1, 1, 7, 1, + 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 10, 1, +}; + +static RE_UINT8 re_pattern_syntax_stage_4[] = { + 0, 1, 2, 2, 0, 3, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, + 8, 8, 8, 9, 10, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, + 11, 12, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, + 0, 0, 14, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_pattern_syntax_stage_5[] = { + 0, 0, 0, 0, 254, 255, 0, 252, 1, 0, 0, 120, 254, 90, 67, 136, + 0, 0, 128, 0, 0, 0, 255, 255, 255, 0, 255, 127, 254, 255, 239, 127, + 255, 255, 255, 255, 255, 255, 63, 0, 0, 0, 240, 255, 14, 255, 255, 255, + 1, 0, 1, 0, 0, 0, 0, 192, 96, 0, 0, 0, +}; + +/* Pattern_Syntax: 277 bytes. */ + +RE_UINT32 re_get_pattern_syntax(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_pattern_syntax_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_pattern_syntax_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_pattern_syntax_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_pattern_syntax_stage_4[pos + f] << 5; + pos += code; + value = (re_pattern_syntax_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Hangul_Syllable_Type. */ + +static RE_UINT8 re_hangul_syllable_type_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_hangul_syllable_type_stage_2[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_hangul_syllable_type_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, 0, 0, 4, 5, 6, 7, 8, 9, 10, 4, + 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, + 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, + 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, + 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, + 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 11, +}; + +static RE_UINT8 re_hangul_syllable_type_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 4, + 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, + 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, + 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, + 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, + 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, + 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, + 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, + 6, 5, 6, 6, 8, 0, 2, 2, 9, 10, 3, 3, 3, 3, 3, 11, +}; + +static RE_UINT8 re_hangul_syllable_type_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 1, 1, 1, 1, 1, 0, 0, 0, 4, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, + 5, 5, 5, 5, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, + 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, +}; + +/* Hangul_Syllable_Type: 497 bytes. */ + +RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_hangul_syllable_type_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_hangul_syllable_type_stage_2[pos + f] << 4; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_hangul_syllable_type_stage_3[pos + f] << 4; + f = code >> 3; + code ^= f << 3; + pos = (RE_UINT32)re_hangul_syllable_type_stage_4[pos + f] << 3; + value = re_hangul_syllable_type_stage_5[pos + code]; + + return value; +} + +/* Bidi_Class. */ + +static RE_UINT8 re_bidi_class_stage_1[] = { + 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 7, + 8, 9, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 11, 12, 13, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 15, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, +}; + +static RE_UINT8 re_bidi_class_stage_2[] = { + 0, 1, 2, 2, 2, 3, 4, 5, 2, 6, 2, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 2, 2, 2, 2, 30, 31, 32, 2, 2, 2, 2, 33, 34, 35, + 36, 37, 38, 39, 40, 2, 41, 42, 43, 44, 2, 45, 2, 2, 2, 46, + 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 52, 52, 52, 57, 58, 52, + 2, 2, 52, 52, 52, 52, 59, 2, 2, 60, 61, 62, 63, 64, 52, 65, + 66, 67, 2, 68, 69, 70, 71, 72, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 73, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 74, 2, 2, 75, 76, 77, 78, + 79, 80, 81, 82, 83, 84, 2, 85, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 86, 87, 87, 87, 88, 89, 90, 91, 92, 93, + 2, 2, 94, 95, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 96, 96, 97, 96, 98, 96, 99, 96, 96, 96, 96, 96, 100, 96, 96, 96, + 101, 102, 103, 104, 2, 2, 2, 2, 2, 2, 2, 2, 2, 105, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 106, + 2, 2, 107, 108, 109, 2, 110, 2, 2, 2, 2, 2, 2, 111, 112, 113, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 87, 114, 96, 96, + 115, 116, 117, 2, 2, 2, 118, 119, 120, 121, 122, 123, 124, 125, 126, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 127, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 127, + 128, 128, 129, 130, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, +}; + +static RE_UINT8 re_bidi_class_stage_3[] = { + 0, 1, 2, 3, 4, 5, 4, 6, 7, 8, 9, 10, 11, 12, 11, 12, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 13, 14, 14, 15, 16, + 17, 17, 17, 17, 17, 17, 17, 18, 19, 11, 11, 11, 11, 11, 11, 20, + 21, 11, 11, 11, 11, 11, 11, 11, 22, 23, 17, 24, 25, 26, 26, 26, + 27, 28, 29, 29, 30, 17, 31, 32, 29, 29, 29, 29, 29, 33, 34, 35, + 29, 36, 29, 17, 28, 29, 29, 29, 29, 29, 37, 32, 26, 26, 38, 39, + 26, 40, 41, 26, 26, 42, 26, 26, 26, 26, 29, 29, 29, 29, 43, 44, + 45, 11, 11, 46, 47, 48, 49, 11, 50, 11, 11, 51, 52, 11, 49, 53, + 54, 11, 11, 51, 55, 50, 11, 56, 54, 11, 11, 51, 57, 11, 49, 58, + 50, 11, 11, 59, 52, 60, 49, 11, 61, 11, 11, 11, 62, 11, 11, 63, + 11, 11, 11, 64, 65, 66, 49, 67, 11, 11, 11, 51, 68, 11, 49, 11, + 11, 11, 11, 11, 52, 11, 49, 11, 11, 11, 11, 11, 69, 70, 11, 11, + 11, 11, 11, 71, 72, 11, 11, 11, 11, 11, 11, 73, 74, 11, 11, 11, + 11, 75, 11, 76, 11, 11, 11, 77, 78, 79, 17, 80, 60, 11, 11, 11, + 11, 11, 81, 82, 11, 83, 84, 85, 86, 87, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 81, 11, 11, 11, 88, 11, 11, 11, 11, 11, 11, + 4, 11, 11, 11, 11, 11, 11, 11, 89, 90, 11, 11, 11, 11, 11, 11, + 11, 91, 11, 91, 11, 49, 11, 49, 11, 11, 11, 92, 93, 94, 11, 88, + 95, 11, 11, 11, 11, 11, 11, 11, 11, 11, 96, 11, 11, 11, 11, 11, + 11, 11, 97, 98, 99, 11, 11, 11, 11, 11, 11, 11, 11, 100, 16, 16, + 11, 101, 11, 11, 11, 102, 103, 104, 105, 11, 11, 106, 61, 11, 107, 105, + 108, 11, 109, 11, 11, 11, 110, 108, 11, 11, 111, 112, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 113, 114, 115, 11, 11, 11, 11, 17, 17, 116, 111, + 11, 11, 11, 117, 118, 119, 119, 120, 121, 16, 122, 123, 124, 125, 126, 127, + 128, 11, 129, 129, 129, 17, 17, 84, 130, 131, 132, 133, 134, 16, 11, 11, + 135, 16, 16, 16, 16, 16, 16, 16, 16, 136, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 137, 11, 11, 11, 5, + 16, 138, 16, 16, 16, 16, 16, 139, 16, 16, 140, 11, 141, 11, 16, 16, + 142, 143, 11, 11, 11, 11, 144, 16, 16, 16, 145, 16, 16, 16, 16, 16, + 146, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 147, 88, 11, 11, + 11, 11, 11, 11, 11, 11, 148, 149, 11, 11, 11, 11, 11, 11, 11, 150, + 11, 11, 11, 11, 11, 11, 17, 17, 16, 16, 16, 151, 11, 11, 11, 11, + 16, 152, 16, 16, 16, 16, 16, 139, 16, 16, 16, 16, 16, 137, 11, 151, + 153, 16, 154, 155, 11, 11, 11, 11, 11, 156, 4, 11, 11, 11, 11, 157, + 11, 11, 11, 11, 16, 16, 139, 11, 11, 120, 11, 11, 11, 16, 11, 158, + 11, 11, 11, 146, 159, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 160, + 11, 11, 11, 11, 11, 100, 11, 161, 11, 11, 11, 11, 16, 16, 16, 16, + 11, 16, 16, 16, 140, 11, 11, 11, 119, 11, 11, 11, 11, 11, 150, 162, + 11, 150, 11, 11, 11, 11, 11, 108, 16, 16, 163, 11, 11, 11, 11, 11, + 164, 11, 11, 11, 11, 11, 11, 11, 165, 11, 166, 167, 11, 11, 11, 168, + 11, 11, 11, 11, 115, 11, 17, 108, 11, 11, 169, 11, 170, 108, 11, 11, + 45, 11, 11, 171, 11, 11, 11, 11, 11, 11, 172, 173, 174, 11, 11, 11, + 11, 11, 11, 175, 50, 11, 68, 60, 11, 11, 11, 11, 11, 11, 176, 11, + 11, 177, 178, 26, 26, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 179, 29, 29, 29, 29, 29, 29, 29, 29, 29, 8, 8, 180, + 17, 88, 116, 16, 16, 181, 182, 29, 29, 29, 29, 29, 29, 29, 29, 183, + 184, 3, 4, 5, 4, 5, 137, 11, 11, 11, 11, 11, 11, 11, 185, 186, + 187, 11, 11, 11, 16, 16, 16, 16, 141, 151, 11, 11, 11, 11, 11, 87, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 188, 26, 26, 26, 26, 26, 26, + 189, 26, 26, 190, 26, 26, 26, 26, 26, 26, 26, 191, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 192, 193, 50, 11, 11, 194, 116, 14, 137, 11, + 108, 11, 11, 195, 11, 11, 11, 11, 45, 11, 196, 197, 11, 11, 11, 11, + 108, 11, 11, 198, 11, 11, 11, 11, 11, 11, 199, 200, 11, 11, 11, 11, + 150, 45, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 201, 202, + 203, 11, 204, 11, 11, 11, 11, 11, 16, 16, 16, 16, 205, 11, 11, 11, + 16, 16, 16, 16, 16, 140, 11, 11, 11, 11, 11, 11, 11, 157, 11, 11, + 11, 206, 11, 11, 161, 11, 11, 11, 135, 11, 11, 11, 207, 208, 208, 208, + 29, 29, 29, 29, 29, 29, 29, 209, 16, 16, 151, 16, 16, 16, 16, 16, + 16, 139, 210, 211, 146, 146, 11, 11, 212, 11, 11, 11, 11, 11, 133, 11, + 16, 16, 4, 213, 16, 16, 16, 147, 16, 139, 16, 16, 214, 11, 16, 4, + 16, 16, 16, 210, 215, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 216, + 16, 16, 16, 217, 139, 16, 218, 11, 11, 11, 11, 11, 11, 11, 11, 5, + 16, 16, 16, 16, 219, 11, 11, 11, 16, 16, 16, 16, 137, 11, 11, 11, + 16, 16, 16, 16, 16, 16, 16, 139, 11, 11, 11, 11, 11, 11, 11, 220, + 8, 8, 8, 8, 8, 8, 8, 8, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 8, +}; + +static RE_UINT8 re_bidi_class_stage_4[] = { + 0, 0, 1, 2, 0, 0, 0, 3, 4, 5, 6, 7, 8, 8, 9, 10, + 11, 12, 12, 12, 12, 12, 13, 10, 12, 12, 13, 14, 0, 15, 0, 0, + 0, 0, 0, 0, 16, 5, 17, 18, 19, 20, 21, 10, 12, 12, 12, 12, + 12, 13, 12, 12, 12, 12, 22, 12, 23, 10, 10, 10, 12, 24, 10, 17, + 10, 10, 10, 10, 25, 25, 25, 25, 12, 26, 12, 27, 12, 17, 12, 12, + 12, 27, 12, 12, 28, 25, 29, 12, 12, 12, 27, 30, 31, 25, 25, 25, + 25, 25, 25, 32, 33, 32, 34, 34, 34, 34, 34, 34, 35, 36, 37, 38, + 25, 25, 39, 40, 40, 40, 40, 40, 40, 40, 41, 25, 35, 35, 42, 43, + 44, 40, 40, 40, 40, 45, 25, 46, 25, 47, 48, 49, 8, 8, 50, 40, + 51, 40, 40, 40, 40, 45, 25, 25, 34, 34, 52, 25, 25, 53, 54, 34, + 34, 55, 32, 25, 25, 31, 31, 56, 34, 34, 31, 34, 40, 25, 25, 25, + 25, 25, 25, 39, 57, 12, 12, 12, 12, 12, 58, 59, 60, 25, 59, 61, + 60, 25, 12, 12, 62, 12, 12, 12, 61, 12, 12, 12, 12, 12, 12, 59, + 60, 59, 12, 61, 63, 12, 30, 12, 64, 12, 12, 12, 64, 28, 65, 29, + 29, 61, 12, 12, 60, 66, 59, 61, 67, 12, 12, 12, 12, 12, 12, 65, + 12, 58, 12, 12, 58, 12, 12, 12, 59, 12, 12, 61, 13, 10, 68, 12, + 12, 12, 12, 62, 59, 62, 69, 29, 12, 64, 12, 12, 12, 12, 10, 70, + 12, 12, 12, 29, 12, 12, 58, 12, 62, 71, 12, 12, 61, 25, 57, 30, + 12, 28, 25, 57, 61, 25, 66, 59, 12, 12, 25, 29, 12, 12, 29, 12, + 12, 72, 73, 26, 60, 25, 25, 57, 25, 69, 12, 60, 25, 25, 60, 25, + 25, 25, 25, 59, 12, 12, 12, 60, 69, 25, 64, 64, 12, 12, 29, 62, + 59, 12, 12, 12, 60, 59, 12, 12, 58, 64, 12, 61, 12, 12, 12, 61, + 10, 10, 26, 12, 74, 12, 12, 12, 12, 12, 13, 11, 62, 59, 12, 12, + 12, 66, 25, 29, 12, 58, 60, 25, 25, 12, 30, 61, 10, 10, 75, 76, + 12, 12, 61, 12, 57, 28, 59, 12, 58, 12, 60, 12, 11, 26, 12, 12, + 12, 12, 12, 23, 12, 28, 65, 12, 12, 58, 25, 57, 71, 60, 25, 59, + 28, 25, 25, 65, 25, 12, 12, 12, 12, 69, 57, 59, 12, 12, 28, 25, + 29, 12, 12, 12, 62, 29, 66, 12, 12, 58, 29, 72, 12, 12, 12, 25, + 25, 62, 12, 12, 57, 25, 25, 25, 69, 25, 59, 61, 12, 59, 12, 12, + 25, 57, 12, 12, 12, 12, 12, 77, 26, 12, 12, 24, 12, 12, 12, 24, + 12, 12, 12, 22, 78, 78, 79, 80, 10, 10, 81, 82, 83, 84, 10, 10, + 10, 85, 10, 10, 10, 10, 10, 86, 0, 87, 88, 0, 89, 8, 90, 70, + 8, 8, 90, 70, 83, 83, 83, 83, 17, 70, 26, 12, 12, 20, 11, 23, + 10, 77, 91, 92, 12, 12, 23, 12, 10, 11, 23, 26, 12, 12, 91, 12, + 93, 10, 10, 10, 10, 26, 12, 12, 10, 20, 10, 10, 10, 12, 12, 12, + 10, 70, 12, 12, 10, 10, 70, 12, 10, 10, 8, 8, 8, 8, 8, 12, + 12, 12, 23, 10, 10, 10, 10, 24, 24, 10, 10, 10, 10, 10, 10, 11, + 12, 24, 70, 28, 29, 12, 24, 10, 12, 12, 12, 28, 10, 10, 10, 12, + 10, 10, 17, 10, 94, 11, 10, 10, 11, 12, 62, 29, 11, 23, 12, 24, + 12, 12, 95, 11, 12, 12, 13, 12, 12, 12, 12, 70, 12, 12, 12, 10, + 12, 13, 70, 12, 12, 12, 12, 13, 96, 25, 25, 97, 26, 12, 12, 12, + 12, 12, 11, 12, 58, 58, 28, 12, 12, 64, 10, 12, 12, 12, 98, 12, + 12, 10, 12, 12, 12, 62, 25, 29, 12, 28, 25, 25, 28, 62, 29, 59, + 12, 12, 60, 57, 64, 64, 12, 12, 28, 12, 12, 59, 69, 65, 59, 62, + 12, 61, 59, 61, 12, 12, 12, 99, 34, 34, 100, 34, 40, 40, 40, 101, + 40, 40, 40, 102, 103, 104, 10, 105, 106, 70, 107, 12, 40, 40, 40, 108, + 109, 5, 6, 7, 5, 110, 10, 70, 0, 0, 111, 112, 91, 12, 12, 12, + 34, 34, 34, 113, 31, 33, 34, 25, 34, 34, 114, 52, 34, 34, 115, 10, + 35, 35, 35, 35, 35, 35, 35, 116, 12, 12, 25, 25, 28, 57, 64, 12, + 12, 28, 25, 60, 25, 59, 12, 12, 12, 62, 25, 57, 12, 12, 28, 61, + 25, 66, 12, 12, 12, 28, 29, 12, 117, 0, 118, 25, 57, 60, 25, 12, + 12, 12, 62, 29, 119, 120, 12, 12, 12, 91, 12, 12, 13, 12, 12, 121, + 8, 8, 8, 8, 122, 40, 40, 40, 10, 10, 10, 70, 24, 10, 10, 70, + 8, 8, 123, 12, 10, 17, 10, 10, 10, 20, 70, 12, 20, 10, 10, 10, + 10, 10, 24, 11, 10, 10, 10, 26, 10, 10, 12, 12, 11, 24, 10, 10, + 12, 12, 12, 124, +}; + +static RE_UINT8 re_bidi_class_stage_5[] = { + 11, 11, 11, 11, 11, 8, 7, 8, 9, 7, 11, 11, 7, 7, 7, 8, + 9, 10, 10, 4, 4, 4, 10, 10, 10, 10, 10, 3, 6, 3, 6, 6, + 2, 2, 2, 2, 2, 2, 6, 10, 10, 10, 10, 10, 10, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 11, 11, 7, 11, 11, + 6, 10, 4, 4, 10, 10, 0, 10, 10, 11, 10, 10, 4, 4, 2, 2, + 10, 0, 10, 10, 10, 2, 0, 10, 0, 10, 10, 0, 0, 0, 10, 10, + 0, 10, 10, 10, 12, 12, 12, 12, 10, 10, 0, 0, 0, 0, 10, 0, + 0, 0, 0, 12, 12, 12, 0, 0, 0, 0, 0, 4, 1, 12, 12, 12, + 12, 12, 1, 12, 1, 12, 12, 1, 1, 1, 1, 1, 5, 5, 5, 5, + 5, 13, 10, 10, 13, 4, 4, 13, 6, 13, 10, 10, 12, 12, 12, 13, + 13, 13, 13, 13, 13, 13, 13, 12, 5, 5, 4, 5, 5, 13, 13, 13, + 12, 13, 13, 13, 13, 13, 12, 12, 12, 5, 10, 12, 12, 13, 13, 12, + 12, 10, 12, 12, 12, 12, 13, 13, 2, 2, 13, 13, 13, 12, 13, 13, + 1, 1, 1, 12, 1, 1, 10, 10, 10, 10, 1, 1, 1, 1, 12, 12, + 12, 12, 1, 1, 12, 12, 12, 0, 0, 0, 12, 0, 12, 0, 0, 0, + 0, 12, 12, 12, 0, 12, 0, 0, 0, 0, 12, 12, 0, 0, 4, 4, + 0, 12, 12, 0, 12, 0, 0, 12, 12, 12, 0, 12, 0, 4, 0, 0, + 10, 4, 10, 0, 12, 0, 12, 12, 10, 10, 10, 0, 12, 0, 12, 0, + 0, 12, 0, 12, 0, 12, 10, 10, 9, 0, 0, 0, 10, 10, 10, 12, + 12, 12, 11, 0, 0, 10, 0, 10, 9, 9, 9, 9, 9, 9, 9, 11, + 11, 11, 0, 1, 9, 7, 16, 17, 18, 14, 15, 6, 4, 4, 4, 4, + 4, 10, 10, 10, 6, 10, 10, 10, 10, 10, 10, 9, 11, 11, 19, 20, + 21, 22, 11, 11, 2, 0, 0, 0, 2, 2, 3, 3, 0, 10, 0, 0, + 0, 0, 4, 0, 10, 10, 3, 4, 9, 10, 10, 10, 0, 12, 12, 10, + 12, 12, 12, 10, 12, 12, 10, 10, 4, 4, 0, 0, 0, 1, 12, 1, + 1, 3, 1, 1, 13, 13, 10, 10, 13, 10, 13, 13, 6, 10, 6, 0, + 10, 6, 10, 10, 10, 10, 10, 4, 10, 10, 3, 3, 10, 4, 4, 10, + 13, 13, 13, 11, 0, 10, 10, 4, 10, 4, 4, 0, 11, 10, 10, 10, + 10, 10, 11, 11, 1, 1, 1, 10, 12, 12, 12, 1, 1, 10, 10, 10, + 5, 5, 5, 1, 0, 0, 0, 11, 11, 11, 11, 12, 10, 10, 12, 12, + 12, 10, 0, 0, 0, 0, 2, 2, 10, 10, 13, 13, 2, 2, 2, 0, + 0, 0, 11, 11, +}; + +/* Bidi_Class: 3216 bytes. */ + +RE_UINT32 re_get_bidi_class(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_bidi_class_stage_1[f] << 5; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_bidi_class_stage_2[pos + f] << 3; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_bidi_class_stage_3[pos + f] << 2; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_bidi_class_stage_4[pos + f] << 2; + value = re_bidi_class_stage_5[pos + code]; + + return value; +} + +/* Canonical_Combining_Class. */ + +static RE_UINT8 re_canonical_combining_class_stage_1[] = { + 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 2, 2, 2, 6, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_canonical_combining_class_stage_2[] = { + 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 0, 0, 15, 0, 0, 0, 16, 17, 18, 19, 20, 21, 22, 0, 0, + 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 25, 0, 0, + 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 27, 0, 28, 29, 30, 31, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 33, 0, + 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 35, 0, 0, 0, 0, 0, + 36, 37, 0, 0, 0, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 39, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_canonical_combining_class_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, + 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 0, + 9, 0, 10, 11, 0, 0, 12, 13, 14, 15, 16, 0, 0, 0, 0, 17, + 18, 19, 20, 0, 0, 0, 0, 21, 0, 22, 23, 0, 0, 22, 24, 0, + 0, 22, 24, 0, 0, 22, 24, 0, 0, 22, 24, 0, 0, 0, 24, 0, + 0, 0, 25, 0, 0, 22, 24, 0, 0, 0, 24, 0, 0, 0, 26, 0, + 0, 27, 28, 0, 0, 29, 30, 0, 31, 32, 0, 33, 34, 0, 35, 0, + 0, 36, 0, 0, 37, 0, 0, 0, 0, 0, 38, 0, 0, 0, 0, 0, + 39, 39, 0, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 41, 0, 0, + 0, 42, 0, 0, 0, 0, 0, 0, 43, 0, 0, 44, 0, 0, 0, 0, + 0, 45, 46, 47, 0, 48, 0, 49, 0, 50, 0, 0, 0, 0, 51, 52, + 0, 0, 0, 0, 0, 0, 53, 54, 0, 0, 0, 0, 0, 0, 55, 56, + 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, 0, 58, 0, 0, 0, 59, + 0, 60, 0, 0, 61, 0, 0, 0, 0, 0, 0, 62, 63, 0, 0, 64, + 65, 0, 0, 0, 0, 0, 46, 66, 0, 67, 68, 0, 0, 69, 70, 0, + 0, 0, 0, 0, 0, 71, 72, 73, 0, 0, 0, 0, 0, 0, 0, 24, + 74, 0, 0, 0, 0, 0, 0, 0, 0, 75, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 76, 77, 78, 0, 0, 0, 0, 0, 0, + 0, 0, 65, 0, 0, 79, 0, 0, 80, 81, 0, 0, 0, 0, 70, 0, + 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 83, 84, 85, 0, 0, + 0, 0, 86, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_canonical_combining_class_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4, + 5, 6, 7, 4, 4, 8, 9, 10, 1, 11, 12, 13, 14, 15, 16, 17, + 18, 1, 1, 1, 0, 0, 0, 0, 19, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 20, 21, 22, 1, 23, 4, 21, 24, 25, 26, 27, 28, + 29, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 31, 0, + 0, 0, 32, 33, 34, 35, 1, 36, 0, 0, 0, 0, 37, 0, 0, 0, + 0, 0, 0, 0, 0, 38, 1, 39, 14, 39, 40, 41, 0, 0, 0, 0, + 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 43, 36, 44, 45, + 21, 45, 46, 0, 0, 0, 0, 0, 0, 0, 19, 1, 21, 0, 0, 0, + 0, 0, 0, 0, 0, 38, 47, 1, 1, 48, 48, 49, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 50, 0, 0, 21, 43, 51, 52, 21, 35, 53, + 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 55, 56, 57, 0, 0, + 0, 0, 0, 55, 0, 0, 0, 0, 0, 0, 0, 55, 0, 58, 0, 0, + 0, 0, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 0, + 0, 0, 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, + 0, 0, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, + 0, 0, 0, 0, 0, 65, 66, 0, 0, 0, 0, 0, 67, 68, 69, 70, + 71, 72, 0, 0, 0, 0, 0, 0, 0, 73, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 74, 75, 0, 0, 0, 0, 76, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 77, 0, 0, + 0, 0, 0, 0, 59, 0, 0, 78, 0, 0, 79, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 19, 81, 0, + 77, 0, 0, 0, 0, 48, 1, 82, 0, 0, 0, 0, 0, 54, 0, 0, + 0, 77, 0, 0, 0, 0, 0, 0, 0, 0, 19, 10, 1, 0, 0, 0, + 0, 0, 83, 0, 0, 0, 0, 0, 0, 84, 0, 0, 83, 0, 0, 0, + 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 53, 9, 12, 4, + 85, 8, 86, 76, 0, 57, 0, 0, 21, 1, 21, 87, 88, 1, 1, 1, + 1, 53, 0, 0, 0, 0, 0, 89, 0, 0, 0, 0, 90, 1, 91, 57, + 78, 92, 93, 4, 57, 0, 0, 0, 0, 0, 0, 19, 49, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 94, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 95, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97, 0, + 0, 0, 0, 19, 0, 1, 1, 49, 0, 0, 0, 0, 0, 0, 0, 19, + 0, 0, 0, 0, 49, 0, 0, 0, 0, 59, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 49, 0, 0, 0, 0, 0, 98, 64, 0, 0, 0, 0, + 0, 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, + 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99, 100, 57, 38, + 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 101, 1, 53, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 76, 0, 0, 0, 102, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 103, 94, 0, 0, 0, 0, 0, 0, 104, 0, + 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 94, 77, 0, 0, + 0, 0, 0, 0, 0, 105, 0, 0, 0, 106, 107, 108, 109, 0, 98, 4, + 110, 48, 23, 0, 0, 0, 0, 0, 0, 0, 38, 49, 0, 0, 0, 0, + 38, 57, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_canonical_combining_class_stage_5[] = { + 0, 0, 0, 0, 50, 50, 50, 50, 50, 51, 45, 45, 45, 45, 51, 43, + 45, 45, 45, 45, 45, 41, 41, 45, 45, 45, 45, 41, 41, 45, 45, 45, + 1, 1, 1, 1, 1, 45, 45, 45, 45, 50, 50, 50, 50, 54, 50, 45, + 45, 45, 50, 50, 50, 45, 45, 0, 50, 50, 50, 45, 45, 45, 45, 50, + 51, 45, 45, 50, 52, 53, 53, 52, 53, 53, 52, 50, 0, 0, 0, 50, + 0, 45, 50, 50, 50, 50, 45, 50, 50, 50, 46, 45, 50, 50, 45, 45, + 50, 46, 49, 50, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 14, 15, + 16, 17, 0, 18, 0, 19, 20, 0, 50, 45, 0, 13, 25, 26, 27, 0, + 0, 0, 0, 22, 23, 24, 25, 26, 27, 28, 29, 50, 50, 45, 45, 50, + 45, 50, 50, 45, 30, 0, 0, 0, 0, 0, 50, 50, 50, 0, 0, 50, + 50, 0, 45, 50, 50, 45, 0, 0, 0, 31, 0, 0, 50, 45, 50, 50, + 45, 45, 50, 45, 45, 50, 45, 50, 45, 50, 50, 0, 50, 50, 0, 50, + 0, 50, 50, 50, 50, 50, 0, 0, 0, 45, 45, 45, 50, 45, 45, 45, + 22, 23, 24, 50, 50, 50, 50, 0, 2, 0, 0, 0, 0, 4, 0, 0, + 0, 50, 45, 50, 50, 0, 0, 0, 0, 32, 33, 0, 0, 0, 4, 0, + 34, 34, 4, 0, 35, 35, 35, 35, 36, 36, 0, 0, 37, 37, 37, 37, + 45, 45, 0, 0, 0, 45, 0, 45, 0, 43, 0, 0, 0, 38, 39, 0, + 40, 0, 0, 0, 0, 0, 39, 39, 39, 39, 0, 0, 39, 0, 50, 50, + 4, 0, 50, 50, 0, 0, 45, 0, 0, 0, 0, 2, 0, 4, 4, 0, + 0, 45, 0, 0, 4, 0, 0, 0, 0, 50, 0, 0, 0, 49, 0, 0, + 0, 46, 50, 45, 45, 0, 0, 0, 50, 0, 0, 45, 0, 0, 4, 4, + 0, 0, 2, 0, 50, 0, 1, 1, 1, 0, 0, 0, 50, 53, 42, 45, + 41, 50, 50, 50, 52, 45, 50, 45, 50, 50, 1, 1, 1, 1, 1, 50, + 0, 1, 1, 50, 45, 50, 1, 1, 0, 0, 0, 4, 0, 0, 44, 49, + 51, 46, 47, 47, 0, 3, 3, 0, 0, 0, 0, 45, 50, 0, 50, 50, + 45, 0, 0, 50, 0, 0, 21, 0, 0, 45, 0, 50, 50, 1, 45, 0, + 0, 4, 2, 0, 0, 0, 4, 2, 0, 43, 43, 1, 1, 1, 0, 0, + 0, 48, 43, 43, 43, 43, 43, 0, 45, 45, 45, 0, +}; + +/* Canonical_Combining_Class: 1828 bytes. */ + +RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_canonical_combining_class_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_canonical_combining_class_stage_2[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_canonical_combining_class_stage_3[pos + f] << 3; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_canonical_combining_class_stage_4[pos + f] << 2; + value = re_canonical_combining_class_stage_5[pos + code]; + + return value; +} + +/* Decomposition_Type. */ + +static RE_UINT8 re_decomposition_type_stage_1[] = { + 0, 1, 2, 2, 2, 3, 4, 5, 6, 2, 2, 2, 2, 2, 7, 8, + 2, 2, 2, 2, 2, 2, 2, 9, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_decomposition_type_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 16, 7, 17, 18, 19, + 20, 21, 22, 23, 24, 7, 7, 7, 7, 7, 25, 7, 26, 27, 28, 29, + 30, 31, 32, 33, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 34, 7, 7, 7, 7, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 36, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 35, 37, 38, 39, 40, 41, 42, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 43, 44, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 45, 7, 7, 46, 47, 48, 49, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 50, 7, + 7, 51, 52, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 35, 35, 53, 7, 7, 7, 7, 7, +}; + +static RE_UINT8 re_decomposition_type_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 3, 5, + 6, 7, 8, 9, 10, 11, 8, 12, 0, 0, 13, 14, 15, 16, 17, 18, + 6, 19, 20, 21, 0, 0, 0, 0, 0, 0, 0, 22, 0, 23, 24, 0, + 0, 0, 0, 0, 25, 0, 0, 26, 27, 14, 28, 14, 29, 30, 0, 31, + 32, 33, 0, 33, 0, 32, 0, 34, 0, 0, 0, 0, 35, 36, 37, 38, + 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 40, 0, 0, 0, 0, 41, 0, 0, 0, 0, 42, 43, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 33, 44, 0, 45, 0, 0, 0, 0, 0, 0, 46, 47, 0, 0, + 0, 0, 0, 48, 0, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 50, 51, 0, 0, 0, 52, 0, 0, 53, 0, 0, 0, + 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0, + 0, 0, 0, 0, 53, 0, 0, 0, 0, 0, 0, 0, 0, 56, 0, 0, + 0, 0, 0, 57, 0, 0, 0, 0, 0, 0, 0, 57, 0, 58, 0, 0, + 59, 0, 0, 0, 60, 61, 33, 62, 63, 60, 61, 33, 0, 0, 0, 0, + 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, + 66, 67, 0, 68, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 70, 71, 72, 73, 74, 75, 0, 76, 73, 73, 0, 0, 0, 0, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 77, 6, 6, 6, 6, 6, 78, + 6, 79, 6, 6, 79, 80, 6, 81, 6, 6, 6, 82, 83, 84, 6, 85, + 86, 87, 88, 89, 90, 91, 0, 92, 93, 94, 95, 0, 0, 0, 0, 0, + 96, 97, 98, 99, 100, 101, 102, 102, 103, 104, 105, 0, 106, 0, 0, 0, + 107, 0, 108, 109, 110, 0, 111, 112, 112, 0, 113, 0, 0, 0, 114, 0, + 0, 0, 115, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 116, 117, 102, 102, 102, 118, 116, 116, 119, 0, + 120, 0, 0, 0, 0, 0, 0, 121, 0, 0, 0, 0, 0, 122, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 125, 0, 0, 0, 0, 0, 57, + 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 126, 0, 0, + 127, 0, 0, 128, 129, 130, 131, 132, 0, 133, 129, 130, 131, 132, 0, 134, + 0, 0, 0, 135, 102, 102, 102, 102, 136, 137, 0, 0, 0, 0, 0, 0, + 102, 136, 102, 102, 138, 139, 116, 140, 116, 116, 116, 116, 141, 116, 116, 140, + 142, 142, 142, 142, 142, 143, 102, 144, 142, 142, 142, 142, 142, 142, 102, 145, + 0, 0, 0, 0, 0, 0, 0, 146, 0, 0, 0, 0, 0, 0, 0, 147, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 21, 0, 0, 0, 0, 0, + 81, 148, 149, 6, 6, 6, 81, 6, 6, 6, 6, 6, 6, 78, 0, 0, + 150, 151, 152, 153, 154, 155, 156, 156, 157, 156, 158, 159, 0, 160, 161, 162, + 163, 163, 163, 163, 163, 163, 164, 165, 165, 166, 167, 167, 167, 168, 169, 170, + 163, 171, 172, 173, 0, 174, 175, 176, 177, 178, 165, 179, 180, 0, 0, 181, + 0, 182, 0, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 192, 193, 194, + 195, 196, 196, 196, 196, 196, 197, 198, 198, 198, 198, 199, 200, 201, 202, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 203, 204, 0, 0, 0, 0, 0, + 0, 0, 205, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 205, 206, 0, 0, 0, 0, 207, 14, 0, 0, 0, + 208, 208, 208, 208, 208, 209, 208, 208, 208, 210, 211, 212, 213, 208, 208, 208, + 214, 215, 208, 216, 217, 218, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 219, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 220, 208, 208, 208, + 213, 208, 221, 222, 223, 224, 225, 226, 227, 228, 229, 228, 0, 0, 0, 0, + 230, 102, 231, 142, 142, 0, 232, 0, 0, 233, 0, 0, 0, 0, 0, 0, + 234, 142, 142, 235, 236, 237, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 6, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_decomposition_type_stage_4[] = { + 0, 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 8, 8, + 10, 11, 10, 12, 10, 11, 10, 9, 8, 8, 8, 8, 13, 8, 8, 8, + 8, 12, 8, 8, 14, 8, 10, 15, 16, 8, 17, 8, 12, 8, 8, 8, + 8, 8, 8, 15, 12, 0, 0, 18, 19, 0, 0, 0, 0, 20, 20, 21, + 8, 8, 8, 22, 8, 13, 8, 8, 23, 12, 8, 8, 8, 8, 8, 13, + 0, 13, 8, 8, 8, 0, 0, 0, 24, 24, 25, 0, 0, 0, 20, 5, + 24, 25, 0, 0, 9, 19, 0, 0, 0, 19, 26, 27, 0, 21, 11, 22, + 0, 0, 13, 8, 0, 0, 13, 11, 28, 29, 0, 0, 30, 5, 31, 0, + 9, 18, 0, 11, 0, 0, 32, 0, 0, 13, 0, 0, 33, 0, 0, 0, + 8, 13, 13, 8, 13, 8, 13, 8, 8, 12, 12, 0, 0, 3, 0, 0, + 13, 11, 0, 0, 0, 34, 35, 0, 36, 0, 0, 0, 18, 0, 0, 0, + 32, 19, 0, 0, 0, 0, 8, 8, 0, 0, 18, 19, 0, 0, 0, 9, + 18, 27, 0, 0, 0, 0, 10, 27, 0, 0, 37, 19, 0, 0, 0, 12, + 0, 19, 0, 0, 0, 0, 13, 19, 0, 0, 19, 0, 19, 18, 22, 0, + 0, 0, 27, 11, 3, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 1, + 18, 0, 0, 32, 27, 18, 0, 19, 18, 38, 17, 0, 32, 0, 0, 0, + 0, 27, 0, 0, 0, 0, 0, 25, 0, 27, 36, 36, 27, 0, 0, 0, + 0, 0, 18, 32, 9, 0, 0, 0, 0, 0, 0, 39, 24, 24, 39, 24, + 24, 24, 24, 40, 24, 24, 24, 24, 41, 42, 43, 0, 0, 0, 25, 0, + 0, 0, 44, 24, 8, 8, 45, 0, 8, 8, 12, 0, 8, 12, 8, 12, + 8, 8, 46, 46, 8, 8, 8, 12, 8, 22, 8, 47, 21, 22, 8, 8, + 8, 13, 8, 10, 13, 22, 8, 48, 49, 50, 30, 0, 51, 3, 0, 0, + 0, 30, 0, 52, 3, 53, 0, 54, 0, 3, 5, 0, 0, 3, 0, 3, + 55, 24, 24, 24, 42, 42, 42, 43, 42, 42, 42, 56, 0, 0, 35, 0, + 57, 34, 58, 59, 59, 60, 61, 62, 63, 64, 65, 66, 66, 67, 68, 59, + 69, 61, 62, 0, 70, 70, 70, 70, 20, 20, 20, 20, 0, 0, 71, 0, + 0, 0, 13, 0, 0, 0, 0, 27, 0, 0, 0, 10, 0, 19, 32, 19, + 0, 36, 0, 72, 35, 0, 0, 0, 32, 37, 32, 0, 36, 0, 0, 10, + 12, 12, 12, 0, 0, 0, 0, 8, 8, 0, 13, 12, 0, 0, 33, 0, + 73, 73, 73, 73, 73, 20, 20, 20, 20, 74, 73, 73, 73, 73, 75, 0, + 0, 0, 0, 35, 0, 30, 0, 0, 0, 0, 0, 19, 0, 0, 0, 76, + 0, 0, 0, 44, 0, 0, 0, 3, 20, 5, 0, 0, 77, 0, 0, 0, + 0, 26, 30, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 46, 32, 0, + 9, 22, 33, 12, 0, 19, 3, 78, 0, 37, 11, 79, 34, 20, 20, 20, + 20, 20, 20, 30, 4, 24, 24, 24, 20, 73, 0, 0, 80, 73, 73, 73, + 73, 73, 73, 75, 20, 20, 20, 81, 81, 81, 81, 81, 81, 81, 20, 20, + 82, 81, 81, 81, 20, 20, 20, 83, 25, 0, 0, 0, 0, 0, 55, 0, + 36, 10, 8, 11, 36, 33, 13, 8, 20, 30, 0, 0, 3, 20, 0, 46, + 59, 59, 84, 8, 8, 11, 8, 36, 9, 22, 8, 15, 85, 86, 86, 86, + 86, 86, 86, 86, 86, 85, 85, 85, 87, 85, 86, 86, 88, 0, 0, 0, + 89, 90, 91, 92, 85, 87, 86, 85, 85, 85, 93, 87, 94, 94, 94, 94, + 94, 95, 95, 95, 95, 95, 95, 95, 95, 96, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 98, 99, 99, 99, 99, 99, 100, 94, 94, 101, 95, 95, 95, + 95, 95, 95, 102, 97, 99, 99, 103, 104, 97, 105, 106, 107, 105, 108, 105, + 104, 96, 95, 105, 96, 109, 110, 97, 111, 106, 112, 105, 95, 106, 113, 95, + 96, 106, 0, 0, 94, 94, 94, 114, 115, 115, 116, 0, 115, 115, 115, 115, + 115, 117, 118, 20, 119, 120, 120, 120, 120, 119, 120, 0, 121, 122, 123, 123, + 124, 91, 125, 126, 90, 125, 127, 127, 127, 127, 126, 91, 125, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 126, 125, 126, 91, 128, 129, 130, 130, 130, + 130, 130, 130, 130, 131, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 133, + 134, 132, 134, 132, 134, 132, 134, 135, 130, 136, 132, 133, 0, 0, 27, 19, + 0, 0, 18, 0, 0, 0, 0, 13, 8, 19, 0, 0, 0, 0, 18, 8, + 59, 59, 59, 59, 59, 137, 59, 59, 59, 59, 59, 137, 138, 139, 61, 137, + 59, 59, 66, 61, 59, 61, 59, 59, 59, 66, 140, 61, 59, 137, 59, 137, + 59, 59, 66, 140, 59, 141, 142, 59, 137, 59, 59, 59, 59, 62, 59, 59, + 59, 59, 59, 142, 139, 143, 61, 59, 140, 59, 144, 0, 138, 145, 144, 61, + 139, 143, 144, 144, 139, 143, 140, 59, 140, 59, 61, 141, 59, 59, 66, 59, + 59, 59, 59, 0, 61, 61, 66, 59, 20, 20, 30, 0, 20, 20, 146, 75, + 0, 0, 4, 0, 147, 0, 0, 0, 148, 0, 0, 0, 81, 81, 148, 0, + 20, 20, 35, 0, 149, 0, 0, 0, +}; + +static RE_UINT8 re_decomposition_type_stage_5[] = { + 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 10, 0, 0, 0, 0, 2, + 0, 0, 10, 10, 2, 2, 0, 0, 2, 10, 10, 0, 17, 17, 17, 0, + 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 1, 1, 1, 2, + 2, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, + 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, 1, 1, 1, 2, 2, 2, + 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, + 2, 2, 2, 1, 1, 2, 2, 0, 2, 2, 2, 0, 0, 2, 0, 0, + 0, 1, 0, 0, 0, 1, 1, 0, 0, 2, 2, 2, 2, 0, 0, 0, + 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 2, 10, 10, 10, 0, + 10, 10, 0, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, + 0, 0, 0, 10, 1, 1, 2, 1, 0, 1, 0, 1, 1, 2, 1, 2, + 1, 1, 2, 0, 1, 1, 2, 2, 2, 2, 2, 4, 0, 4, 0, 0, + 0, 0, 0, 4, 2, 0, 2, 2, 2, 0, 2, 0, 10, 10, 0, 0, + 11, 0, 0, 0, 2, 2, 3, 2, 0, 2, 3, 3, 3, 3, 3, 3, + 0, 3, 2, 0, 0, 3, 3, 3, 3, 3, 0, 0, 10, 2, 10, 0, + 3, 0, 1, 0, 3, 0, 1, 1, 3, 3, 0, 3, 3, 2, 2, 2, + 2, 3, 0, 2, 3, 0, 0, 0, 17, 17, 17, 17, 0, 17, 0, 0, + 2, 2, 0, 2, 9, 9, 9, 9, 2, 2, 9, 9, 9, 9, 9, 0, + 11, 10, 0, 0, 13, 0, 0, 0, 2, 0, 1, 12, 0, 0, 1, 12, + 16, 9, 9, 9, 16, 16, 16, 16, 2, 16, 16, 16, 2, 2, 2, 16, + 3, 3, 1, 1, 8, 7, 8, 7, 5, 6, 8, 7, 8, 7, 5, 6, + 8, 7, 0, 0, 0, 0, 0, 8, 7, 5, 6, 8, 7, 8, 7, 8, + 7, 8, 8, 7, 5, 8, 7, 5, 8, 8, 8, 8, 7, 7, 7, 7, + 7, 7, 7, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, + 6, 8, 8, 8, 8, 7, 7, 7, 7, 5, 5, 5, 7, 8, 0, 0, + 5, 7, 5, 5, 7, 5, 7, 7, 5, 5, 7, 7, 5, 5, 7, 5, + 5, 7, 7, 5, 7, 7, 5, 7, 5, 5, 5, 7, 0, 0, 5, 5, + 5, 7, 7, 7, 5, 7, 5, 7, 8, 0, 0, 0, 12, 12, 12, 12, + 12, 12, 0, 0, 12, 0, 0, 12, 12, 2, 2, 2, 15, 15, 15, 0, + 15, 15, 15, 15, 8, 6, 8, 0, 8, 0, 8, 6, 8, 6, 8, 6, + 8, 8, 7, 8, 7, 8, 7, 5, 6, 8, 7, 8, 6, 8, 7, 5, + 7, 0, 0, 0, 0, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 14, 14, 14, 0, 0, 0, + 13, 13, 13, 0, 3, 0, 3, 3, 0, 0, 3, 0, 0, 3, 3, 0, + 3, 3, 3, 0, 3, 0, 3, 0, 0, 0, 3, 3, 3, 0, 0, 3, + 0, 3, 0, 3, 0, 0, 0, 3, 2, 2, 2, 9, 16, 0, 0, 0, + 16, 16, 16, 0, 9, 9, 0, 0, +}; + +/* Decomposition_Type: 2872 bytes. */ + +RE_UINT32 re_get_decomposition_type(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_decomposition_type_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_decomposition_type_stage_2[pos + f] << 4; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_decomposition_type_stage_3[pos + f] << 2; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_decomposition_type_stage_4[pos + f] << 2; + value = re_decomposition_type_stage_5[pos + code]; + + return value; +} + +/* East_Asian_Width. */ + +static RE_UINT8 re_east_asian_width_stage_1[] = { + 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 5, 5, 7, 8, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 10, 10, 10, 12, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 13, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 13, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 14, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 15, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 15, +}; + +static RE_UINT8 re_east_asian_width_stage_2[] = { + 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 7, 8, 9, 10, 11, 12, 13, 14, 5, 15, 5, 16, 5, 5, 17, 18, + 19, 20, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 24, 5, 5, 5, 5, 25, 5, 5, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 26, 5, 5, 5, 5, 5, 5, 5, 5, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 22, 22, 5, 5, 5, 28, 29, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 30, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 31, 32, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 33, + 5, 34, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 35, +}; + +static RE_UINT8 re_east_asian_width_stage_3[] = { + 0, 0, 1, 1, 1, 1, 1, 2, 0, 0, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 11, 0, 0, 0, 0, 0, 15, 16, 0, 0, + 0, 0, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 17, 18, 0, 0, + 19, 19, 19, 19, 19, 19, 19, 0, 0, 20, 21, 20, 21, 0, 0, 0, + 9, 19, 19, 19, 19, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 22, 22, 22, 22, 22, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 23, 24, 25, 0, 0, 0, 26, 27, 0, 28, 0, 0, 0, 0, 0, + 29, 30, 31, 0, 0, 32, 33, 34, 35, 34, 0, 36, 0, 37, 38, 0, + 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 0, 0, 0, 0, + 0, 44, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 19, 51, 19, + 19, 19, 19, 19, 33, 19, 19, 52, 19, 53, 21, 54, 55, 56, 57, 0, + 58, 59, 0, 0, 60, 0, 61, 0, 0, 62, 0, 62, 63, 19, 64, 19, + 0, 0, 0, 65, 0, 38, 0, 66, 0, 0, 0, 0, 0, 0, 67, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 22, 70, 22, 22, 22, 22, 22, 71, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 72, 0, 73, + 74, 22, 22, 75, 76, 22, 22, 22, 22, 77, 22, 22, 22, 22, 22, 22, + 78, 22, 79, 76, 22, 22, 22, 22, 75, 22, 22, 80, 22, 22, 71, 22, + 22, 75, 22, 22, 81, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 75, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 0, 0, 0, 0, + 22, 22, 22, 22, 22, 22, 22, 22, 82, 22, 22, 22, 83, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 22, 82, 0, 0, 0, 0, 0, 0, 0, 0, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 71, 0, 0, 0, 0, 0, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 84, 0, 22, 22, 85, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 87, 88, 88, 88, 88, 88, 89, 90, 90, 90, 90, 91, 92, 93, 94, 65, + 95, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 96, 19, 97, 19, 19, 19, 34, 19, 19, 96, 0, 0, 0, 0, 0, 0, + 98, 22, 22, 80, 99, 95, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 79, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 97, +}; + +static RE_UINT8 re_east_asian_width_stage_4[] = { + 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 7, 0, 10, 0, 0, 11, 12, 11, 13, 14, 10, 9, 14, + 8, 12, 9, 5, 15, 0, 0, 0, 16, 0, 12, 0, 0, 13, 12, 0, + 17, 0, 11, 12, 9, 11, 7, 15, 13, 0, 0, 0, 0, 0, 0, 10, + 5, 5, 5, 11, 0, 18, 17, 15, 11, 0, 7, 16, 7, 7, 7, 7, + 17, 7, 7, 7, 19, 7, 14, 0, 20, 20, 20, 20, 18, 9, 14, 14, + 9, 7, 0, 0, 8, 15, 12, 10, 0, 11, 0, 12, 17, 11, 0, 0, + 0, 0, 21, 11, 12, 15, 15, 0, 12, 10, 0, 0, 22, 10, 12, 0, + 12, 11, 12, 9, 7, 7, 7, 0, 7, 7, 14, 0, 0, 0, 15, 0, + 0, 0, 14, 0, 10, 11, 0, 0, 0, 12, 0, 0, 8, 12, 18, 12, + 15, 15, 10, 17, 18, 16, 7, 5, 0, 7, 0, 14, 0, 0, 11, 11, + 10, 0, 0, 0, 14, 7, 13, 13, 13, 13, 0, 0, 0, 15, 15, 0, + 0, 15, 0, 0, 0, 0, 0, 12, 0, 0, 23, 0, 7, 7, 19, 7, + 7, 0, 0, 0, 13, 14, 0, 0, 13, 13, 0, 14, 14, 13, 18, 13, + 14, 0, 0, 0, 13, 14, 0, 12, 0, 22, 15, 13, 0, 14, 0, 5, + 5, 0, 0, 0, 19, 19, 9, 19, 0, 0, 0, 13, 0, 7, 7, 19, + 19, 0, 7, 7, 0, 0, 0, 15, 0, 13, 7, 7, 0, 24, 1, 25, + 0, 26, 0, 0, 0, 17, 14, 0, 20, 20, 27, 20, 20, 0, 0, 0, + 20, 28, 0, 0, 20, 20, 20, 0, 29, 20, 20, 20, 20, 20, 20, 30, + 31, 20, 20, 20, 20, 30, 31, 20, 0, 31, 20, 20, 20, 20, 20, 28, + 20, 20, 30, 0, 20, 20, 7, 7, 20, 20, 20, 32, 20, 30, 0, 0, + 20, 20, 28, 0, 30, 20, 20, 20, 20, 30, 20, 0, 33, 34, 34, 34, + 34, 34, 34, 34, 35, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 37, + 38, 36, 38, 36, 38, 36, 38, 39, 34, 40, 36, 37, 28, 0, 0, 0, + 7, 7, 9, 0, 7, 7, 7, 14, 30, 0, 0, 0, 20, 20, 32, 0, +}; + +static RE_UINT8 re_east_asian_width_stage_5[] = { + 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 0, 0, 1, 5, 5, + 1, 5, 5, 1, 1, 0, 1, 0, 5, 1, 1, 5, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, + 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, + 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, + 3, 3, 3, 3, 0, 2, 0, 0, 0, 1, 1, 0, 0, 3, 3, 0, + 0, 0, 5, 5, 5, 5, 0, 0, 0, 5, 5, 0, 3, 3, 0, 3, + 3, 3, 0, 0, 4, 3, 3, 3, 3, 3, 3, 0, 0, 3, 3, 3, + 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 0, 0, 0, + 4, 4, 4, 0, +}; + +/* East_Asian_Width: 1668 bytes. */ + +RE_UINT32 re_get_east_asian_width(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_east_asian_width_stage_1[f] << 4; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_east_asian_width_stage_2[pos + f] << 4; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_east_asian_width_stage_3[pos + f] << 2; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_east_asian_width_stage_4[pos + f] << 2; + value = re_east_asian_width_stage_5[pos + code]; + + return value; +} + +/* Joining_Group. */ + +static RE_UINT8 re_joining_group_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_joining_group_stage_2[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_joining_group_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, + 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_joining_group_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 0, 0, 21, 0, 22, + 0, 0, 23, 24, 25, 26, 0, 0, 0, 27, 28, 29, 30, 31, 32, 33, + 0, 0, 0, 0, 34, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_joining_group_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 3, 3, 43, 3, 45, 3, + 4, 41, 4, 4, 13, 13, 13, 6, 6, 31, 31, 35, 35, 33, 33, 39, + 39, 1, 1, 11, 11, 55, 55, 55, 0, 9, 29, 19, 22, 24, 26, 16, + 43, 45, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 29, + 0, 3, 3, 3, 0, 3, 43, 43, 45, 4, 4, 4, 4, 4, 4, 4, + 4, 13, 13, 13, 13, 13, 13, 13, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 31, 31, 31, 31, 31, 31, 31, 31, 31, 35, 35, 35, 33, 33, 39, + 1, 9, 9, 9, 9, 9, 9, 29, 29, 11, 38, 11, 19, 19, 19, 11, + 11, 11, 11, 11, 11, 22, 22, 22, 22, 26, 26, 26, 26, 56, 21, 13, + 41, 17, 17, 14, 43, 43, 43, 43, 43, 43, 43, 43, 55, 47, 55, 43, + 45, 45, 46, 46, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 6, 31, + 0, 0, 35, 33, 1, 0, 0, 21, 2, 0, 5, 12, 12, 7, 7, 15, + 44, 50, 18, 42, 42, 48, 49, 20, 23, 25, 27, 36, 10, 8, 28, 32, + 34, 30, 7, 37, 40, 5, 12, 7, 0, 0, 0, 0, 0, 51, 52, 53, + 4, 4, 4, 4, 4, 4, 4, 13, 13, 6, 6, 31, 35, 1, 1, 1, + 9, 9, 11, 11, 11, 24, 24, 26, 26, 26, 22, 31, 31, 35, 13, 13, + 35, 31, 13, 3, 3, 55, 55, 45, 43, 43, 54, 54, 13, 35, 35, 19, + 4, 0, 13, 39, 9, 29, 22, 24, 45, 45, 31, 43, 57, 0, 0, 0, +}; + +/* Joining_Group: 481 bytes. */ + +RE_UINT32 re_get_joining_group(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_joining_group_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_joining_group_stage_2[pos + f] << 5; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_joining_group_stage_3[pos + f] << 4; + f = code >> 3; + code ^= f << 3; + pos = (RE_UINT32)re_joining_group_stage_4[pos + f] << 3; + value = re_joining_group_stage_5[pos + code]; + + return value; +} + +/* Joining_Type. */ + +static RE_UINT8 re_joining_type_stage_1[] = { + 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 6, 2, 2, 7, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_joining_type_stage_2[] = { + 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 1, 1, 16, 1, 1, 1, 17, 18, 19, 20, 21, 22, 23, 1, 1, + 24, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 25, 26, 1, 1, + 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 28, 1, 29, 30, 31, 32, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 33, 1, 1, 34, 35, + 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 37, 1, 1, 1, 1, 1, + 38, 39, 1, 1, 1, 1, 40, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 42, 43, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 44, 45, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_joining_type_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 5, 6, 0, 0, 0, + 0, 7, 8, 9, 10, 2, 11, 12, 13, 14, 15, 15, 16, 17, 18, 19, + 20, 21, 22, 2, 23, 24, 25, 26, 0, 0, 27, 28, 29, 15, 30, 31, + 0, 32, 33, 0, 34, 35, 0, 0, 0, 0, 36, 0, 0, 0, 37, 38, + 39, 0, 0, 40, 41, 42, 43, 0, 44, 0, 0, 45, 46, 0, 43, 0, + 47, 0, 0, 45, 48, 44, 0, 49, 47, 0, 0, 45, 50, 0, 43, 0, + 44, 0, 0, 51, 46, 52, 43, 0, 53, 0, 0, 0, 54, 0, 0, 0, + 0, 0, 0, 55, 56, 57, 43, 0, 0, 0, 0, 51, 58, 0, 43, 0, + 0, 0, 0, 0, 46, 0, 43, 0, 0, 0, 0, 0, 59, 60, 0, 0, + 0, 0, 0, 61, 62, 0, 0, 0, 0, 0, 0, 63, 64, 0, 0, 0, + 0, 65, 0, 66, 0, 0, 0, 67, 68, 69, 2, 70, 52, 0, 0, 0, + 0, 0, 71, 72, 0, 73, 28, 74, 75, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 76, 0, 76, 0, 43, 0, 43, 0, 0, 0, 77, 78, 79, 0, 0, + 80, 0, 15, 15, 15, 15, 15, 81, 82, 15, 83, 0, 0, 0, 0, 0, + 0, 0, 84, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 86, 0, 0, 0, 87, 88, 89, 0, 0, 0, 0, 0, 0, 0, 0, + 90, 0, 0, 91, 53, 0, 92, 90, 93, 0, 94, 0, 0, 0, 95, 93, + 0, 0, 96, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, 98, 99, 100, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 101, 96, + 102, 0, 103, 0, 0, 0, 104, 0, 0, 0, 0, 0, 0, 2, 2, 28, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 93, + 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 2, 2, + 0, 0, 105, 0, 0, 0, 0, 0, 0, 106, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 20, 107, 0, 20, 0, 0, 0, 0, 0, 93, + 108, 0, 57, 0, 15, 15, 15, 109, 0, 0, 0, 0, 100, 0, 2, 93, + 0, 0, 110, 0, 111, 93, 0, 0, 39, 0, 0, 112, 0, 0, 0, 0, + 0, 0, 113, 114, 115, 0, 0, 0, 0, 0, 0, 116, 44, 0, 117, 52, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, + 0, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 121, 0, 0, 122, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 44, 0, 0, 123, 101, 0, 0, 0, 93, 0, 0, 124, 0, 0, 0, 0, + 39, 0, 125, 126, 0, 0, 0, 0, 93, 0, 0, 127, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 129, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 20, 39, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 130, 131, 132, 0, 105, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 44, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, +}; + +static RE_UINT8 re_joining_type_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 2, 4, 0, + 5, 2, 2, 2, 2, 2, 2, 6, 7, 6, 0, 0, 2, 2, 8, 9, + 10, 11, 12, 13, 14, 15, 15, 15, 16, 15, 17, 2, 0, 0, 0, 18, + 19, 20, 15, 15, 15, 15, 21, 21, 21, 21, 22, 15, 15, 15, 15, 15, + 23, 21, 21, 24, 25, 26, 2, 27, 2, 27, 28, 29, 0, 0, 18, 30, + 0, 0, 0, 3, 31, 32, 22, 33, 15, 15, 34, 23, 2, 2, 8, 35, + 15, 15, 32, 15, 15, 15, 13, 36, 24, 36, 22, 15, 0, 37, 2, 2, + 9, 0, 0, 0, 0, 0, 18, 15, 15, 15, 38, 2, 2, 0, 39, 0, + 0, 37, 6, 2, 2, 5, 5, 4, 36, 33, 12, 13, 15, 40, 5, 0, + 41, 15, 25, 42, 0, 2, 2, 2, 2, 2, 2, 8, 8, 0, 0, 0, + 0, 0, 43, 9, 5, 2, 9, 1, 5, 2, 0, 0, 37, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 9, 5, 9, 0, 1, 7, 0, 0, 0, + 7, 3, 27, 4, 4, 1, 0, 0, 5, 6, 9, 1, 0, 0, 0, 27, + 0, 43, 0, 0, 43, 0, 0, 0, 9, 0, 0, 1, 0, 0, 0, 37, + 9, 37, 28, 4, 0, 7, 0, 0, 0, 43, 0, 4, 0, 0, 43, 0, + 37, 44, 0, 0, 1, 2, 8, 0, 0, 3, 2, 8, 1, 2, 6, 9, + 0, 0, 2, 4, 0, 0, 4, 0, 0, 45, 1, 0, 5, 2, 2, 8, + 2, 28, 0, 5, 2, 2, 5, 2, 2, 2, 2, 9, 0, 0, 0, 5, + 28, 2, 7, 7, 0, 0, 4, 37, 5, 9, 0, 0, 43, 7, 0, 1, + 37, 9, 0, 0, 0, 6, 2, 4, 0, 43, 5, 2, 2, 0, 0, 1, + 0, 46, 47, 4, 15, 15, 0, 0, 0, 46, 15, 15, 15, 15, 48, 0, + 8, 3, 9, 0, 43, 0, 5, 0, 0, 3, 27, 0, 0, 43, 2, 8, + 44, 5, 2, 9, 3, 2, 2, 27, 2, 0, 0, 0, 0, 28, 8, 9, + 0, 0, 3, 2, 4, 0, 0, 0, 37, 4, 6, 0, 0, 43, 4, 45, + 0, 0, 0, 2, 2, 37, 0, 0, 8, 2, 2, 2, 28, 2, 9, 1, + 0, 9, 0, 0, 2, 8, 0, 0, 0, 0, 3, 49, 0, 0, 37, 8, + 2, 9, 37, 2, 0, 0, 37, 4, 0, 0, 7, 0, 8, 2, 2, 4, + 43, 43, 3, 0, 50, 0, 0, 0, 0, 37, 2, 4, 0, 3, 2, 2, + 3, 37, 4, 9, 0, 0, 5, 8, 7, 7, 0, 0, 3, 0, 0, 9, + 28, 27, 9, 37, 0, 0, 0, 4, 0, 1, 9, 1, 0, 0, 0, 43, + 0, 0, 5, 0, 5, 7, 0, 2, 0, 0, 8, 3, 0, 0, 2, 2, + 3, 8, 7, 1, 0, 3, 2, 5, 2, 9, 0, 0, 0, 37, 2, 8, + 0, 0, 3, 1, 2, 6, 0, 0, 0, 3, 4, 0, 3, 2, 2, 2, + 8, 5, 2, 0, +}; + +static RE_UINT8 re_joining_type_stage_5[] = { + 0, 0, 0, 0, 0, 5, 0, 0, 5, 5, 5, 5, 0, 0, 0, 5, + 5, 5, 0, 0, 0, 5, 5, 5, 5, 5, 0, 5, 0, 5, 5, 0, + 5, 5, 5, 0, 5, 0, 0, 0, 2, 0, 3, 3, 3, 3, 2, 3, + 2, 3, 2, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 3, 2, 2, 5, 0, 0, 2, 2, 5, 3, 3, 3, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 2, 2, 3, + 2, 3, 2, 3, 2, 2, 3, 3, 0, 3, 5, 5, 5, 0, 0, 5, + 5, 0, 5, 5, 5, 5, 3, 3, 2, 0, 0, 2, 3, 5, 2, 2, + 2, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 2, 0, 3, 2, 2, + 3, 2, 2, 2, 0, 0, 5, 5, 2, 2, 2, 5, 0, 0, 1, 0, + 3, 2, 0, 0, 2, 0, 2, 2, 3, 0, 0, 0, 0, 0, 5, 0, + 5, 0, 5, 0, 0, 5, 0, 5, 0, 0, 0, 2, 0, 0, 1, 5, + 2, 5, 2, 0, 0, 1, 5, 5, 2, 2, 4, 0, +}; + +/* Joining_Type: 1896 bytes. */ + +RE_UINT32 re_get_joining_type(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_joining_type_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_joining_type_stage_2[pos + f] << 4; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_joining_type_stage_3[pos + f] << 2; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_joining_type_stage_4[pos + f] << 2; + value = re_joining_type_stage_5[pos + code]; + + return value; +} + +/* Line_Break. */ + +static RE_UINT8 re_line_break_stage_1[] = { + 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 10, 10, 16, 10, 10, 10, 10, 17, 10, 18, 19, 20, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 21, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 21, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 22, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, +}; + +static RE_UINT8 re_line_break_stage_2[] = { + 0, 1, 2, 2, 2, 3, 4, 5, 2, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 2, 2, 2, 2, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 2, 51, 2, 2, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 2, 2, 2, 70, 2, 2, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 87, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, + 88, 79, 79, 79, 79, 79, 79, 79, 79, 89, 2, 2, 90, 91, 2, 92, + 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 101, + 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, + 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, + 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, + 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, + 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 108, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 79, 79, 79, 79, 110, 111, 2, 2, 112, 113, 114, 115, 116, 117, + 118, 119, 120, 121, 72, 122, 123, 124, 2, 125, 72, 72, 72, 72, 72, 72, + 126, 72, 127, 128, 129, 72, 130, 72, 131, 72, 72, 72, 132, 72, 72, 72, + 133, 134, 135, 136, 72, 72, 72, 72, 72, 72, 72, 72, 72, 137, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 2, 2, 2, 2, 2, 2, 138, 72, 139, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 2, 2, 2, 2, 140, 141, 142, 2, 143, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 2, 2, 2, 2, 144, 72, 72, 72, 72, 72, 72, 72, 72, 72, 145, 146, + 147, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 2, 148, 149, 150, 151, 72, 152, 72, 153, 154, 155, 2, 2, 156, 2, 157, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 158, 159, 72, 72, + 160, 161, 162, 163, 164, 72, 165, 166, 167, 168, 169, 170, 171, 172, 173, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 174, + 175, 72, 176, 177, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, +}; + +static RE_UINT16 re_line_break_stage_3[] = { + 0, 1, 2, 3, 4, 5, 4, 6, 7, 1, 8, 9, 4, 10, 4, 10, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 11, 12, 4, 4, + 1, 1, 1, 1, 13, 14, 15, 16, 17, 4, 18, 4, 4, 4, 4, 4, + 19, 4, 4, 4, 4, 4, 4, 4, 4, 4, 20, 21, 4, 22, 21, 4, + 23, 24, 1, 25, 26, 27, 28, 29, 30, 31, 4, 4, 32, 1, 33, 34, + 4, 4, 4, 4, 4, 35, 36, 37, 38, 39, 4, 1, 40, 4, 4, 4, + 4, 4, 41, 42, 37, 4, 32, 43, 4, 44, 45, 46, 4, 47, 48, 48, + 48, 48, 49, 48, 48, 48, 50, 51, 52, 4, 4, 53, 1, 54, 55, 56, + 57, 58, 59, 60, 61, 62, 63, 64, 65, 58, 59, 66, 67, 68, 69, 70, + 71, 18, 59, 72, 73, 74, 63, 75, 57, 58, 59, 72, 76, 77, 63, 20, + 78, 79, 80, 81, 82, 83, 69, 84, 85, 86, 59, 87, 88, 89, 63, 90, + 91, 86, 59, 92, 88, 93, 63, 94, 91, 86, 4, 95, 96, 97, 63, 98, + 99, 100, 4, 101, 102, 103, 48, 104, 105, 106, 106, 107, 108, 109, 48, 48, + 110, 111, 112, 113, 114, 115, 48, 48, 116, 117, 37, 118, 56, 4, 119, 120, + 121, 122, 1, 123, 124, 125, 48, 48, 106, 106, 106, 106, 126, 106, 106, 106, + 106, 127, 4, 4, 128, 4, 4, 4, 129, 129, 129, 129, 129, 129, 130, 130, + 130, 130, 131, 132, 132, 132, 132, 132, 4, 4, 4, 4, 133, 134, 4, 4, + 133, 4, 4, 135, 136, 137, 4, 4, 4, 136, 4, 4, 4, 138, 139, 119, + 4, 140, 4, 4, 4, 4, 4, 141, 142, 4, 4, 4, 4, 4, 4, 4, + 142, 143, 4, 4, 4, 4, 144, 74, 145, 146, 4, 147, 4, 148, 145, 149, + 106, 106, 106, 106, 106, 150, 151, 140, 152, 151, 4, 4, 4, 4, 4, 20, + 4, 4, 153, 4, 4, 4, 4, 154, 4, 119, 155, 155, 156, 106, 157, 158, + 106, 106, 159, 106, 160, 161, 4, 4, 4, 162, 106, 106, 106, 163, 106, 164, + 151, 151, 157, 48, 48, 48, 48, 48, 165, 4, 4, 166, 167, 168, 169, 170, + 171, 4, 172, 37, 4, 4, 41, 173, 4, 4, 166, 174, 175, 37, 4, 176, + 48, 48, 48, 48, 20, 177, 178, 179, 4, 4, 4, 4, 1, 1, 180, 181, + 4, 182, 4, 4, 182, 183, 4, 184, 4, 4, 4, 185, 185, 186, 4, 187, + 188, 189, 190, 191, 192, 193, 194, 195, 196, 119, 197, 198, 199, 1, 1, 200, + 201, 202, 203, 4, 4, 204, 205, 206, 207, 206, 4, 4, 4, 208, 4, 4, + 209, 210, 211, 212, 213, 214, 215, 4, 216, 217, 218, 219, 4, 4, 4, 4, + 4, 220, 221, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 222, + 4, 4, 223, 48, 224, 48, 225, 225, 225, 225, 225, 225, 225, 225, 225, 226, + 225, 225, 225, 225, 205, 225, 225, 227, 225, 228, 229, 230, 231, 232, 233, 4, + 234, 235, 4, 236, 237, 4, 238, 239, 4, 240, 4, 241, 242, 243, 244, 245, + 246, 4, 4, 4, 4, 247, 248, 249, 225, 250, 4, 4, 251, 4, 252, 4, + 253, 254, 4, 4, 4, 255, 4, 256, 4, 4, 4, 4, 119, 257, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 4, 4, 46, 4, 4, 46, 4, 4, + 4, 4, 4, 4, 4, 4, 258, 259, 4, 4, 128, 4, 4, 4, 260, 261, + 4, 223, 262, 262, 262, 262, 1, 1, 263, 264, 265, 266, 48, 48, 48, 48, + 267, 268, 267, 267, 267, 267, 267, 222, 267, 267, 267, 267, 267, 267, 267, 267, + 267, 267, 267, 267, 267, 269, 48, 270, 271, 272, 273, 274, 275, 267, 276, 267, + 277, 278, 279, 267, 276, 267, 277, 280, 281, 267, 282, 283, 267, 267, 267, 267, + 284, 267, 267, 285, 267, 267, 222, 286, 267, 284, 267, 267, 287, 267, 267, 267, + 267, 267, 267, 267, 267, 267, 267, 284, 267, 267, 267, 267, 4, 4, 4, 4, + 267, 288, 267, 267, 267, 267, 267, 267, 289, 267, 267, 267, 290, 4, 4, 176, + 291, 4, 292, 48, 4, 4, 258, 293, 4, 294, 4, 4, 4, 4, 4, 295, + 46, 296, 224, 48, 48, 48, 48, 90, 297, 4, 298, 299, 4, 4, 4, 300, + 301, 4, 4, 166, 302, 151, 1, 303, 37, 4, 304, 4, 305, 306, 129, 307, + 52, 4, 4, 308, 309, 310, 48, 48, 4, 4, 311, 180, 312, 313, 106, 159, + 106, 106, 106, 106, 314, 315, 32, 316, 317, 318, 262, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 4, 4, 319, 151, 320, 321, 322, 323, 322, 324, 322, 320, + 321, 322, 323, 322, 324, 322, 320, 321, 322, 323, 322, 324, 322, 320, 321, 322, + 323, 322, 324, 322, 320, 321, 322, 323, 322, 324, 322, 320, 321, 322, 323, 322, + 324, 322, 320, 321, 322, 323, 322, 324, 322, 320, 321, 322, 323, 322, 324, 322, + 323, 322, 325, 130, 326, 132, 132, 327, 328, 328, 328, 328, 328, 328, 328, 328, + 223, 329, 330, 331, 332, 4, 4, 4, 4, 4, 4, 4, 333, 334, 4, 4, + 4, 4, 4, 335, 48, 4, 4, 4, 4, 336, 4, 4, 20, 48, 48, 337, + 1, 338, 180, 339, 340, 341, 342, 185, 4, 4, 4, 4, 4, 4, 4, 343, + 344, 345, 267, 346, 267, 347, 348, 349, 4, 350, 4, 46, 351, 352, 353, 354, + 355, 4, 137, 356, 184, 184, 48, 48, 4, 4, 4, 4, 4, 4, 4, 224, + 357, 4, 4, 358, 4, 4, 4, 4, 224, 359, 48, 48, 48, 4, 4, 360, + 4, 119, 4, 4, 4, 74, 48, 48, 4, 46, 296, 4, 224, 48, 48, 48, + 4, 361, 4, 4, 362, 363, 48, 48, 4, 184, 151, 48, 48, 48, 48, 48, + 364, 4, 4, 365, 4, 366, 48, 48, 4, 367, 4, 368, 48, 48, 48, 48, + 4, 4, 4, 369, 48, 48, 48, 48, 370, 371, 4, 372, 20, 373, 4, 4, + 4, 4, 4, 374, 4, 375, 4, 376, 4, 4, 4, 4, 377, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 4, 46, 171, 4, 4, 378, 379, 336, 380, 48, + 171, 4, 4, 381, 382, 4, 377, 151, 171, 4, 305, 383, 384, 48, 48, 48, + 171, 4, 4, 308, 385, 151, 48, 48, 4, 4, 32, 386, 151, 48, 48, 48, + 4, 4, 4, 4, 4, 4, 46, 48, 4, 4, 4, 4, 4, 4, 387, 384, + 4, 4, 4, 4, 4, 388, 4, 4, 389, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 390, 4, 4, 46, 48, 48, 48, 48, 48, + 4, 4, 4, 377, 48, 48, 48, 48, 4, 4, 4, 4, 141, 391, 1, 51, + 392, 171, 48, 48, 48, 48, 48, 48, 393, 48, 48, 48, 48, 48, 48, 48, + 4, 4, 4, 4, 4, 4, 4, 154, 4, 4, 22, 4, 4, 4, 394, 1, + 395, 4, 396, 4, 4, 184, 48, 48, 4, 4, 4, 4, 397, 48, 48, 48, + 4, 4, 4, 4, 4, 223, 4, 333, 4, 4, 4, 4, 4, 185, 4, 4, + 4, 145, 398, 399, 400, 4, 4, 4, 401, 402, 4, 403, 404, 86, 4, 4, + 4, 4, 375, 4, 4, 4, 4, 4, 4, 4, 4, 4, 405, 406, 406, 406, + 400, 4, 407, 408, 409, 410, 411, 412, 413, 359, 414, 359, 48, 48, 48, 333, + 267, 267, 270, 267, 267, 267, 267, 267, 267, 222, 284, 415, 283, 283, 48, 48, + 416, 225, 417, 225, 225, 225, 418, 225, 225, 416, 48, 48, 48, 48, 419, 420, + 421, 267, 267, 285, 422, 393, 48, 48, 267, 267, 423, 424, 267, 267, 267, 289, + 267, 222, 267, 425, 426, 48, 267, 423, 267, 267, 267, 284, 427, 267, 267, 267, + 267, 267, 428, 429, 267, 267, 267, 430, 431, 432, 433, 434, 296, 267, 435, 48, + 48, 48, 48, 48, 48, 48, 48, 436, 267, 267, 267, 267, 437, 48, 48, 48, + 267, 267, 267, 267, 269, 48, 48, 48, 4, 4, 4, 4, 4, 4, 4, 296, + 267, 267, 267, 267, 267, 267, 267, 282, 438, 48, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 48, +}; + +static RE_UINT8 re_line_break_stage_4[] = { + 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 12, 12, 12, 13, 14, 15, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 16, 17, 14, + 14, 14, 14, 14, 14, 16, 18, 19, 0, 0, 20, 0, 0, 0, 0, 0, + 21, 22, 23, 24, 25, 26, 27, 14, 22, 28, 29, 28, 28, 26, 28, 30, + 14, 14, 14, 24, 14, 14, 14, 14, 14, 14, 14, 24, 31, 28, 31, 14, + 25, 14, 14, 14, 28, 28, 24, 32, 0, 0, 0, 0, 0, 0, 0, 33, + 0, 0, 0, 0, 0, 0, 34, 34, 34, 35, 0, 0, 0, 0, 0, 0, + 14, 14, 14, 14, 36, 14, 14, 37, 36, 36, 14, 14, 14, 38, 38, 14, + 14, 39, 14, 14, 14, 14, 14, 14, 14, 19, 0, 0, 0, 14, 14, 14, + 14, 14, 14, 14, 36, 36, 36, 36, 39, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 38, 39, 14, 14, 14, 14, 14, 14, 14, 40, 41, 36, 42, + 43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, + 19, 45, 0, 46, 36, 36, 36, 36, 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 48, 36, 36, 47, 49, 38, 36, 36, 36, 36, 36, + 14, 14, 38, 14, 50, 51, 13, 14, 0, 0, 0, 0, 0, 52, 53, 54, + 14, 14, 14, 14, 14, 19, 0, 0, 12, 12, 12, 12, 12, 55, 56, 14, + 45, 14, 14, 14, 14, 14, 14, 14, 14, 14, 57, 0, 0, 0, 45, 19, + 0, 0, 45, 19, 45, 0, 0, 14, 12, 12, 12, 12, 12, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 39, 19, 14, 14, 14, 14, 14, 14, 14, + 0, 0, 0, 0, 0, 53, 39, 14, 14, 14, 14, 0, 0, 0, 0, 0, + 45, 36, 36, 36, 36, 36, 36, 36, 0, 0, 14, 14, 58, 38, 36, 36, + 14, 14, 14, 0, 0, 19, 0, 0, 0, 0, 19, 0, 19, 0, 0, 36, + 14, 14, 14, 14, 14, 14, 14, 38, 14, 14, 14, 14, 19, 0, 36, 38, + 36, 36, 36, 36, 36, 36, 36, 36, 38, 14, 14, 14, 14, 14, 38, 36, + 36, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, + 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 45, 0, + 19, 0, 0, 0, 14, 14, 14, 14, 14, 0, 59, 12, 12, 12, 12, 12, + 14, 14, 14, 14, 39, 14, 14, 14, 43, 0, 39, 14, 14, 14, 38, 39, + 38, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 38, 14, 14, 14, + 38, 38, 36, 14, 14, 36, 45, 0, 0, 0, 53, 43, 53, 43, 0, 38, + 36, 36, 36, 43, 36, 36, 14, 39, 14, 0, 36, 12, 12, 12, 12, 12, + 14, 51, 14, 14, 50, 9, 36, 36, 43, 0, 39, 14, 14, 38, 36, 39, + 38, 14, 39, 38, 14, 36, 53, 0, 0, 53, 36, 43, 53, 43, 0, 36, + 43, 36, 36, 36, 39, 14, 38, 38, 36, 36, 36, 12, 12, 12, 12, 12, + 0, 14, 19, 36, 36, 36, 36, 36, 43, 0, 39, 14, 14, 14, 14, 39, + 38, 14, 39, 14, 14, 36, 45, 0, 0, 0, 0, 43, 0, 43, 0, 36, + 38, 36, 36, 36, 36, 36, 36, 36, 9, 36, 36, 36, 36, 36, 36, 36, + 0, 0, 53, 43, 53, 43, 0, 36, 36, 36, 36, 0, 36, 36, 14, 39, + 36, 45, 39, 14, 14, 38, 36, 14, 38, 14, 14, 36, 39, 38, 38, 14, + 36, 39, 38, 36, 14, 38, 36, 14, 14, 14, 14, 14, 14, 36, 36, 0, + 0, 53, 36, 0, 53, 0, 0, 36, 38, 36, 36, 43, 36, 36, 36, 36, + 14, 14, 14, 14, 9, 38, 36, 36, 43, 0, 39, 14, 14, 14, 38, 14, + 38, 14, 14, 14, 14, 14, 14, 14, 14, 14, 39, 14, 14, 36, 39, 0, + 0, 0, 53, 0, 53, 0, 0, 36, 36, 36, 43, 53, 14, 36, 36, 36, + 36, 36, 36, 36, 14, 14, 14, 14, 36, 0, 39, 14, 14, 14, 38, 14, + 14, 14, 39, 14, 14, 36, 45, 0, 36, 36, 43, 53, 36, 36, 36, 38, + 39, 38, 36, 36, 36, 36, 36, 36, 14, 14, 14, 14, 14, 38, 39, 0, + 0, 0, 53, 0, 53, 0, 0, 38, 36, 36, 36, 43, 36, 36, 36, 36, + 14, 14, 14, 36, 60, 14, 14, 14, 36, 0, 39, 14, 14, 14, 14, 14, + 14, 14, 14, 38, 36, 14, 14, 14, 14, 39, 14, 14, 14, 14, 39, 36, + 14, 14, 14, 38, 36, 53, 36, 43, 0, 0, 53, 53, 0, 0, 0, 0, + 36, 0, 38, 36, 36, 36, 36, 36, 61, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 36, 42, + 62, 62, 62, 62, 62, 62, 62, 64, 12, 12, 12, 12, 12, 59, 36, 36, + 61, 63, 63, 61, 63, 63, 61, 36, 36, 36, 62, 62, 61, 62, 62, 62, + 61, 62, 61, 61, 36, 62, 61, 62, 62, 62, 62, 62, 62, 61, 62, 36, + 62, 62, 63, 63, 62, 62, 62, 36, 12, 12, 12, 12, 12, 36, 62, 62, + 32, 65, 29, 65, 66, 67, 68, 54, 54, 69, 57, 14, 0, 14, 14, 14, + 14, 14, 44, 19, 19, 70, 70, 0, 14, 14, 14, 14, 14, 14, 38, 36, + 43, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 14, 14, 19, 0, + 0, 0, 0, 0, 43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 59, + 14, 14, 14, 45, 14, 14, 38, 14, 65, 71, 14, 14, 72, 73, 36, 36, + 12, 12, 12, 12, 12, 59, 14, 14, 12, 12, 12, 12, 12, 62, 62, 62, + 14, 14, 14, 39, 36, 36, 39, 36, 74, 74, 74, 74, 74, 74, 74, 74, + 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, + 76, 76, 76, 76, 76, 76, 76, 76, 14, 14, 14, 14, 38, 14, 14, 36, + 14, 14, 14, 38, 38, 14, 14, 36, 38, 14, 14, 36, 14, 14, 14, 38, + 38, 14, 14, 36, 14, 14, 14, 14, 14, 14, 14, 38, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 38, 43, 0, 27, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 36, 36, 36, 14, 14, 38, 36, 36, 36, 36, 36, + 77, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 16, 78, 36, + 14, 14, 14, 14, 14, 27, 59, 14, 14, 14, 14, 14, 14, 14, 38, 14, + 14, 0, 53, 36, 36, 36, 36, 36, 14, 0, 1, 41, 36, 36, 36, 36, + 14, 0, 36, 36, 36, 36, 36, 36, 38, 0, 36, 36, 36, 36, 36, 36, + 62, 62, 59, 79, 77, 80, 62, 36, 12, 12, 12, 12, 12, 36, 36, 36, + 14, 54, 59, 29, 54, 19, 0, 73, 14, 14, 14, 14, 19, 38, 36, 36, + 14, 14, 14, 36, 36, 36, 36, 36, 0, 0, 0, 0, 0, 0, 36, 36, + 38, 36, 54, 12, 12, 12, 12, 12, 62, 62, 62, 62, 62, 62, 62, 36, + 62, 62, 63, 36, 36, 36, 36, 36, 62, 62, 62, 62, 62, 62, 36, 36, + 62, 62, 62, 62, 62, 36, 36, 36, 12, 12, 12, 12, 12, 63, 36, 62, + 14, 14, 14, 19, 0, 0, 36, 14, 62, 62, 62, 62, 62, 62, 62, 63, + 62, 62, 62, 62, 62, 62, 63, 43, 0, 0, 45, 14, 14, 14, 14, 14, + 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 45, 14, 14, 14, 36, 36, + 12, 12, 12, 12, 12, 59, 27, 59, 77, 14, 14, 14, 14, 19, 0, 0, + 0, 0, 14, 14, 14, 14, 38, 36, 0, 45, 14, 14, 14, 14, 14, 14, + 19, 0, 0, 0, 0, 0, 0, 14, 0, 0, 36, 36, 36, 36, 14, 14, + 0, 0, 0, 0, 36, 81, 59, 59, 12, 12, 12, 12, 12, 36, 39, 14, + 14, 14, 14, 14, 14, 14, 14, 59, 0, 45, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 45, 14, 19, 14, 14, 0, 45, 38, 36, 36, 36, 36, + 0, 0, 0, 53, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 0, + 14, 14, 14, 36, 14, 14, 14, 36, 14, 14, 14, 14, 39, 39, 39, 39, + 14, 14, 14, 14, 14, 14, 14, 36, 14, 14, 38, 14, 14, 14, 14, 14, + 14, 14, 36, 14, 14, 14, 39, 14, 36, 14, 38, 14, 14, 14, 32, 38, + 59, 59, 59, 82, 59, 83, 0, 0, 82, 59, 84, 25, 85, 86, 85, 86, + 28, 14, 87, 88, 89, 0, 0, 33, 51, 51, 51, 51, 7, 90, 91, 14, + 14, 14, 92, 93, 91, 14, 14, 14, 14, 14, 14, 77, 59, 59, 27, 59, + 94, 14, 38, 0, 0, 0, 0, 0, 14, 36, 25, 14, 14, 14, 16, 95, + 24, 28, 25, 14, 14, 14, 16, 78, 23, 23, 23, 6, 23, 23, 23, 23, + 23, 23, 23, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 53, 36, 36, 36, 36, 36, 36, 36, 14, 50, 24, 14, 50, 14, 14, 14, + 14, 24, 14, 96, 14, 14, 14, 14, 24, 25, 14, 14, 14, 24, 14, 14, + 14, 14, 28, 14, 14, 24, 14, 25, 28, 28, 28, 28, 28, 28, 14, 14, + 28, 28, 28, 28, 28, 14, 14, 14, 14, 14, 14, 14, 24, 36, 36, 36, + 14, 25, 25, 14, 14, 14, 14, 14, 25, 28, 14, 24, 25, 24, 14, 24, + 24, 23, 24, 14, 14, 25, 24, 28, 25, 24, 24, 24, 28, 28, 25, 25, + 14, 14, 28, 28, 14, 14, 28, 14, 14, 14, 14, 14, 25, 14, 25, 14, + 14, 25, 14, 14, 14, 14, 14, 14, 28, 14, 28, 28, 14, 28, 14, 28, + 14, 28, 14, 28, 14, 14, 14, 14, 14, 14, 24, 14, 24, 14, 14, 14, + 14, 14, 24, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 24, + 14, 25, 14, 14, 14, 97, 14, 14, 14, 14, 14, 14, 16, 98, 14, 14, + 97, 97, 36, 36, 36, 36, 36, 36, 14, 14, 14, 38, 36, 36, 36, 36, + 14, 14, 14, 14, 14, 38, 36, 36, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 25, 28, 28, 25, 14, 14, 14, 14, 14, + 14, 28, 28, 14, 14, 14, 14, 14, 28, 24, 28, 28, 28, 14, 14, 14, + 14, 28, 14, 28, 14, 14, 28, 14, 28, 14, 14, 28, 25, 24, 14, 28, + 28, 14, 14, 14, 14, 14, 14, 14, 14, 28, 28, 14, 14, 14, 14, 24, + 97, 97, 24, 25, 24, 14, 14, 28, 14, 14, 97, 28, 99, 97, 97, 97, + 14, 14, 14, 14, 100, 97, 14, 14, 25, 25, 14, 14, 14, 14, 14, 14, + 28, 24, 28, 24, 101, 25, 28, 24, 14, 14, 14, 14, 14, 14, 14, 100, + 14, 14, 14, 14, 14, 14, 14, 28, 14, 14, 14, 14, 14, 14, 100, 97, + 97, 97, 97, 97, 101, 28, 102, 100, 97, 102, 101, 28, 97, 28, 101, 102, + 97, 24, 14, 14, 28, 101, 28, 28, 102, 97, 97, 102, 97, 101, 102, 97, + 103, 97, 99, 14, 97, 97, 97, 14, 14, 14, 14, 24, 14, 7, 85, 5, + 14, 54, 14, 14, 70, 70, 70, 70, 70, 70, 70, 28, 28, 28, 28, 28, + 28, 28, 14, 14, 14, 14, 14, 14, 14, 14, 16, 98, 14, 14, 14, 14, + 14, 14, 14, 70, 70, 70, 70, 70, 14, 16, 104, 104, 104, 104, 104, 104, + 104, 104, 104, 104, 98, 14, 14, 14, 14, 14, 14, 14, 70, 70, 14, 14, + 14, 14, 14, 14, 14, 14, 70, 14, 14, 14, 24, 28, 28, 36, 36, 36, + 14, 14, 14, 14, 14, 14, 14, 19, 0, 14, 36, 36, 105, 59, 77, 106, + 14, 14, 14, 14, 36, 36, 36, 39, 41, 36, 36, 36, 36, 36, 36, 43, + 14, 14, 14, 38, 14, 14, 14, 38, 85, 85, 85, 85, 85, 85, 85, 59, + 59, 59, 59, 27, 107, 14, 85, 14, 85, 70, 70, 70, 70, 59, 59, 57, + 59, 27, 77, 14, 14, 108, 36, 36, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 103, 97, 97, 97, 97, 97, 36, 36, 36, 36, 36, + 97, 97, 97, 97, 97, 97, 36, 36, 18, 109, 110, 97, 70, 70, 70, 70, + 70, 97, 70, 70, 70, 70, 111, 112, 97, 97, 97, 97, 97, 0, 0, 0, + 97, 97, 113, 97, 97, 110, 114, 97, 115, 116, 116, 116, 116, 97, 97, 97, + 97, 116, 97, 97, 97, 97, 97, 97, 97, 116, 116, 116, 97, 97, 97, 117, + 97, 97, 116, 118, 43, 119, 91, 114, 120, 116, 116, 116, 116, 97, 97, 97, + 97, 97, 116, 117, 97, 110, 121, 114, 36, 36, 103, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 36, 103, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 122, 97, 97, 97, 97, 97, 122, 36, 36, + 123, 123, 123, 123, 123, 123, 123, 123, 97, 97, 97, 97, 28, 28, 28, 28, + 97, 97, 110, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 122, 36, + 97, 97, 97, 122, 36, 36, 36, 36, 14, 14, 14, 14, 14, 14, 27, 106, + 12, 12, 12, 12, 12, 14, 36, 36, 0, 45, 0, 0, 0, 0, 0, 14, + 14, 14, 14, 14, 36, 36, 36, 43, 0, 27, 59, 59, 36, 36, 36, 36, + 14, 14, 36, 36, 36, 36, 36, 36, 14, 45, 14, 45, 14, 19, 14, 14, + 14, 19, 0, 0, 14, 14, 36, 36, 14, 14, 14, 14, 124, 36, 36, 36, + 14, 14, 65, 54, 36, 36, 36, 36, 0, 14, 14, 14, 14, 14, 14, 14, + 0, 0, 53, 36, 36, 36, 36, 59, 0, 14, 14, 14, 14, 14, 36, 36, + 14, 14, 14, 0, 0, 0, 0, 59, 14, 14, 14, 19, 0, 0, 0, 0, + 0, 0, 36, 36, 36, 36, 36, 39, 74, 74, 74, 74, 74, 74, 125, 36, + 14, 19, 0, 0, 0, 0, 0, 0, 45, 14, 14, 27, 59, 14, 14, 39, + 12, 12, 12, 12, 12, 36, 36, 14, 14, 14, 14, 14, 19, 0, 0, 0, + 14, 19, 14, 14, 14, 14, 0, 36, 12, 12, 12, 12, 12, 36, 27, 59, + 62, 63, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 61, 62, 62, + 59, 14, 19, 53, 36, 36, 36, 36, 39, 14, 14, 38, 39, 14, 14, 38, + 39, 14, 14, 38, 36, 36, 36, 36, 14, 19, 0, 0, 0, 1, 0, 36, + 126, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 126, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 126, 127, 127, 127, + 127, 127, 126, 127, 127, 127, 127, 127, 127, 127, 36, 36, 36, 36, 36, 36, + 75, 75, 75, 128, 36, 129, 76, 76, 76, 76, 76, 76, 76, 76, 36, 36, + 130, 130, 130, 130, 130, 130, 130, 130, 36, 39, 14, 14, 36, 36, 131, 132, + 47, 47, 47, 47, 49, 47, 47, 47, 47, 47, 47, 48, 47, 47, 48, 48, + 47, 131, 48, 47, 47, 47, 47, 47, 14, 36, 36, 36, 36, 36, 36, 36, + 36, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 70, + 36, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 124, 36, + 133, 134, 58, 135, 136, 36, 36, 36, 97, 97, 137, 104, 104, 104, 104, 104, + 104, 104, 109, 137, 109, 97, 97, 97, 109, 78, 91, 54, 137, 104, 104, 109, + 97, 97, 97, 122, 138, 139, 36, 36, 14, 14, 14, 14, 14, 14, 38, 140, + 105, 97, 6, 97, 70, 97, 109, 109, 97, 97, 97, 97, 97, 91, 97, 141, + 97, 97, 97, 97, 97, 137, 142, 97, 97, 97, 97, 97, 97, 137, 142, 137, + 112, 70, 93, 143, 123, 123, 123, 123, 144, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 91, 36, 14, 14, 14, 36, 14, 14, 14, + 36, 14, 14, 14, 36, 14, 38, 36, 22, 97, 138, 145, 14, 14, 14, 38, + 36, 36, 36, 36, 43, 0, 146, 36, 14, 14, 14, 14, 14, 14, 39, 14, + 14, 14, 14, 14, 14, 38, 14, 39, 59, 41, 36, 39, 14, 14, 14, 14, + 14, 14, 36, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 36, 36, + 14, 14, 14, 14, 14, 14, 19, 36, 14, 14, 14, 14, 14, 14, 14, 81, + 14, 14, 36, 36, 14, 14, 14, 14, 77, 14, 14, 36, 36, 36, 36, 36, + 14, 14, 14, 36, 38, 14, 14, 14, 14, 14, 14, 39, 38, 36, 38, 39, + 14, 14, 14, 81, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 36, 81, + 14, 14, 14, 14, 14, 36, 36, 39, 14, 14, 14, 14, 36, 36, 36, 14, + 19, 0, 43, 53, 36, 36, 0, 0, 14, 14, 39, 14, 39, 14, 14, 14, + 14, 14, 36, 36, 0, 53, 36, 43, 59, 59, 59, 59, 38, 36, 36, 36, + 14, 14, 14, 36, 81, 59, 59, 59, 14, 14, 14, 36, 14, 14, 14, 14, + 14, 38, 36, 36, 14, 14, 14, 14, 14, 14, 14, 14, 38, 36, 36, 36, + 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 1, 77, 14, 14, 36, + 14, 14, 14, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, 45, 14, 59, + 59, 36, 36, 36, 36, 36, 36, 36, 0, 0, 53, 12, 12, 12, 12, 12, + 59, 59, 36, 36, 36, 36, 36, 36, 45, 14, 27, 77, 41, 36, 36, 36, + 0, 0, 0, 0, 36, 36, 36, 36, 14, 38, 36, 36, 36, 36, 36, 36, + 14, 14, 14, 14, 147, 70, 112, 14, 14, 98, 14, 70, 70, 14, 14, 14, + 14, 14, 14, 14, 16, 112, 14, 14, 19, 0, 0, 0, 0, 0, 0, 0, + 36, 36, 36, 36, 36, 36, 36, 43, 97, 36, 36, 36, 36, 36, 36, 36, + 14, 14, 19, 0, 0, 14, 19, 0, 0, 45, 19, 0, 0, 0, 14, 14, + 14, 14, 14, 14, 14, 0, 0, 14, 14, 0, 45, 36, 36, 36, 36, 36, + 36, 38, 39, 38, 39, 14, 38, 14, 14, 14, 14, 14, 14, 39, 39, 14, + 14, 14, 39, 14, 14, 14, 14, 14, 14, 14, 14, 39, 14, 38, 39, 14, + 14, 14, 38, 14, 14, 14, 38, 14, 14, 14, 14, 14, 14, 39, 14, 38, + 14, 14, 38, 38, 36, 14, 14, 14, 14, 14, 14, 14, 14, 14, 36, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 39, 38, 38, 39, 39, 14, 14, 14, + 14, 38, 14, 14, 39, 39, 36, 36, 36, 38, 36, 39, 39, 39, 39, 14, + 39, 38, 38, 39, 39, 39, 39, 39, 39, 38, 38, 39, 14, 38, 14, 14, + 14, 38, 14, 14, 39, 14, 38, 38, 14, 14, 14, 14, 14, 39, 14, 14, + 39, 14, 39, 14, 14, 39, 14, 14, 103, 97, 97, 97, 97, 97, 97, 122, + 28, 28, 28, 28, 28, 148, 36, 36, 28, 28, 28, 28, 28, 28, 28, 38, + 28, 28, 28, 28, 28, 14, 36, 36, 36, 36, 36, 149, 149, 149, 149, 149, + 149, 149, 149, 149, 149, 149, 149, 149, 97, 122, 36, 36, 36, 36, 36, 36, + 97, 97, 97, 97, 122, 36, 36, 36, 122, 36, 36, 36, 36, 36, 36, 36, + 97, 97, 97, 103, 97, 97, 97, 97, 97, 97, 99, 100, 97, 97, 100, 97, + 97, 97, 122, 97, 97, 122, 36, 36, 122, 97, 97, 97, 97, 97, 97, 97, + 100, 100, 100, 97, 97, 97, 97, 99, 99, 100, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 103, 97, 122, 36, 14, 14, 14, 100, 97, 97, 97, 97, + 97, 97, 97, 99, 14, 14, 14, 14, 14, 14, 100, 97, 97, 97, 97, 97, + 97, 14, 14, 14, 14, 14, 14, 36, 97, 97, 97, 97, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 103, 97, 97, 122, 36, 103, 97, 97, 97, 97, 97, + 43, 36, 36, 36, 36, 36, 36, 36, +}; + +static RE_UINT8 re_line_break_stage_5[] = { + 16, 16, 16, 18, 22, 20, 20, 21, 19, 6, 3, 12, 9, 10, 12, 3, + 1, 36, 12, 9, 8, 15, 8, 7, 11, 11, 8, 8, 12, 12, 12, 6, + 12, 1, 9, 36, 18, 2, 12, 16, 16, 29, 4, 1, 10, 9, 9, 9, + 12, 25, 25, 12, 25, 3, 12, 18, 25, 25, 17, 12, 25, 1, 17, 25, + 12, 17, 16, 4, 4, 4, 4, 16, 0, 0, 8, 0, 12, 0, 0, 12, + 0, 8, 18, 0, 0, 9, 0, 16, 18, 16, 16, 12, 6, 16, 37, 37, + 37, 0, 37, 12, 12, 10, 10, 10, 16, 6, 16, 0, 6, 6, 10, 11, + 11, 12, 6, 12, 8, 6, 18, 18, 0, 10, 0, 24, 24, 24, 24, 0, + 24, 12, 17, 17, 4, 17, 17, 18, 4, 6, 4, 12, 1, 2, 18, 17, + 12, 4, 4, 0, 31, 31, 32, 32, 33, 33, 18, 12, 2, 0, 5, 24, + 18, 9, 0, 18, 18, 4, 18, 28, 26, 25, 3, 3, 1, 3, 14, 14, + 14, 18, 20, 20, 3, 25, 5, 5, 8, 1, 2, 5, 30, 12, 2, 25, + 9, 12, 13, 13, 2, 12, 13, 12, 12, 13, 13, 25, 25, 13, 0, 13, + 2, 1, 0, 6, 6, 18, 1, 18, 26, 26, 2, 13, 13, 5, 5, 1, + 2, 2, 13, 16, 5, 13, 0, 38, 13, 38, 38, 13, 38, 0, 16, 5, + 5, 38, 38, 5, 13, 0, 38, 38, 10, 12, 31, 0, 34, 35, 35, 35, + 32, 0, 0, 33, 27, 27, 0, 37, 16, 37, 8, 2, 2, 8, 6, 1, + 2, 14, 13, 1, 13, 9, 10, 13, 0, 30, 13, 6, 13, 2, 12, 38, + 38, 12, 9, 0, 23, 25, 1, 1, 25, 0, 39, 39, +}; + +/* Line_Break: 7668 bytes. */ + +RE_UINT32 re_get_line_break(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_line_break_stage_1[f] << 5; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_line_break_stage_2[pos + f] << 3; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_line_break_stage_3[pos + f] << 3; + f = code >> 1; + code ^= f << 1; + pos = (RE_UINT32)re_line_break_stage_4[pos + f] << 1; + value = re_line_break_stage_5[pos + code]; + + return value; +} + +/* Numeric_Type. */ + +static RE_UINT8 re_numeric_type_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11, 12, + 13, 14, 15, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, 11, 17, + 18, 11, 19, 20, 11, 11, 21, 11, 11, 11, 11, 11, 11, 11, 11, 22, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, +}; + +static RE_UINT8 re_numeric_type_stage_2[] = { + 0, 1, 1, 1, 1, 1, 2, 3, 1, 4, 5, 6, 7, 8, 9, 10, + 11, 1, 1, 12, 1, 1, 13, 14, 15, 16, 17, 18, 19, 1, 1, 1, + 20, 21, 1, 1, 22, 1, 1, 23, 1, 1, 1, 1, 24, 1, 1, 1, + 25, 26, 27, 1, 28, 1, 1, 1, 29, 1, 1, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 31, 32, + 1, 33, 1, 34, 1, 1, 35, 1, 36, 1, 1, 1, 1, 1, 37, 38, + 1, 1, 39, 40, 1, 1, 1, 41, 1, 1, 1, 1, 1, 1, 1, 42, + 1, 1, 1, 43, 1, 1, 44, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 45, 1, 1, 1, 46, 1, 1, 1, 1, 1, 1, 1, 47, 48, 1, 1, + 1, 1, 1, 1, 1, 1, 49, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 50, 1, 51, 52, 53, 54, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 55, 1, 1, 1, 1, 1, 15, + 1, 56, 1, 57, 58, 1, 1, 1, 59, 60, 61, 62, 1, 1, 63, 1, + 64, 65, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 66, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 67, 1, 1, 1, 68, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 69, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 70, 71, 1, 1, 1, 1, 1, 1, 1, 72, 73, 74, 1, 1, 1, 1, + 1, 1, 1, 75, 1, 1, 1, 1, 1, 76, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, + 1, 1, 78, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 75, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_numeric_type_stage_3[] = { + 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, + 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 4, + 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 8, 0, 0, 0, 4, + 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, + 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, + 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, + 0, 0, 0, 0, 0, 0, 0, 13, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, + 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, + 0, 0, 0, 16, 17, 0, 0, 0, 0, 0, 18, 19, 20, 0, 0, 0, + 0, 0, 0, 21, 22, 0, 0, 23, 0, 0, 0, 24, 25, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 26, 27, 28, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 29, 0, 0, 0, 0, 30, 31, 0, 30, 32, 0, 0, + 33, 0, 0, 0, 34, 0, 0, 0, 0, 35, 0, 0, 0, 0, 0, 0, + 0, 0, 36, 0, 0, 0, 0, 0, 37, 0, 26, 0, 38, 39, 40, 41, + 36, 0, 0, 42, 0, 0, 0, 0, 43, 0, 44, 45, 0, 0, 0, 0, + 0, 0, 46, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 48, 0, + 0, 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 50, 0, 0, 0, 51, + 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, + 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 0, + 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 0, 0, 0, + 0, 0, 0, 53, 0, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, + 0, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, + 0, 42, 0, 0, 0, 0, 0, 0, 0, 58, 59, 60, 0, 0, 0, 56, + 0, 3, 0, 0, 0, 0, 0, 61, 0, 62, 0, 0, 0, 0, 1, 0, + 3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 63, 0, 55, 64, 26, + 65, 66, 19, 67, 35, 0, 0, 0, 0, 68, 69, 0, 0, 0, 70, 0, + 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 71, 0, 0, 0, 0, 0, + 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73, 74, 0, 0, 0, 0, + 0, 0, 71, 71, 0, 0, 0, 0, 0, 0, 0, 75, 0, 0, 0, 0, + 0, 0, 76, 77, 0, 0, 0, 1, 0, 78, 0, 0, 0, 0, 1, 0, + 19, 19, 19, 79, 0, 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 81, 82, 83, 0, 0, 0, 0, 0, 0, 0, + 58, 0, 0, 43, 0, 0, 0, 84, 0, 58, 0, 0, 0, 0, 0, 0, + 0, 35, 0, 0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 86, + 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, + 0, 0, 0, 0, 60, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 36, 0, 0, 0, 0, +}; + +static RE_UINT8 re_numeric_type_stage_4[] = { + 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 1, 2, 0, 0, + 5, 1, 0, 0, 5, 1, 6, 7, 5, 1, 8, 0, 5, 1, 9, 0, + 5, 1, 0, 10, 5, 1, 11, 0, 1, 12, 13, 0, 0, 14, 15, 16, + 0, 17, 18, 0, 1, 2, 19, 7, 0, 0, 1, 20, 1, 2, 1, 2, + 0, 0, 21, 22, 23, 22, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, + 24, 7, 0, 0, 23, 25, 26, 27, 19, 23, 25, 13, 0, 28, 29, 30, + 0, 0, 31, 32, 23, 33, 34, 0, 0, 0, 0, 35, 36, 0, 0, 0, + 37, 7, 0, 9, 0, 0, 38, 0, 19, 7, 0, 0, 0, 19, 37, 19, + 0, 0, 37, 19, 35, 0, 0, 0, 39, 0, 0, 0, 0, 40, 0, 0, + 0, 35, 0, 0, 41, 42, 0, 0, 0, 43, 44, 0, 0, 0, 0, 36, + 18, 0, 0, 36, 0, 18, 0, 0, 0, 0, 18, 0, 43, 0, 0, 0, + 45, 0, 0, 0, 0, 46, 0, 0, 47, 43, 0, 0, 48, 0, 0, 0, + 0, 0, 0, 39, 0, 0, 42, 42, 0, 0, 0, 40, 0, 0, 0, 17, + 0, 49, 18, 0, 0, 0, 0, 45, 0, 43, 0, 0, 0, 0, 40, 0, + 0, 0, 45, 0, 0, 45, 39, 0, 42, 0, 0, 0, 45, 43, 0, 0, + 0, 0, 0, 18, 17, 19, 0, 0, 0, 0, 11, 0, 0, 39, 39, 18, + 0, 0, 50, 0, 36, 19, 19, 19, 19, 19, 13, 0, 19, 19, 19, 18, + 13, 0, 0, 0, 42, 40, 0, 0, 0, 0, 51, 0, 0, 0, 0, 19, + 0, 0, 17, 13, 52, 0, 0, 0, 0, 0, 0, 53, 23, 25, 19, 10, + 0, 0, 54, 55, 56, 1, 0, 0, 0, 0, 5, 1, 9, 0, 0, 0, + 19, 19, 7, 0, 0, 5, 1, 1, 1, 1, 1, 1, 23, 57, 0, 0, + 40, 0, 0, 0, 39, 43, 0, 43, 0, 40, 0, 35, 0, 0, 0, 42, +}; + +static RE_UINT8 re_numeric_type_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, + 0, 2, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 3, 3, + 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, + 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 3, 3, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, + 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 0, 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, + 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 0, 0, 0, 0, 0, 0, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, + 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, + 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, + 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, + 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 3, 3, 2, 2, 2, 0, 0, 0, 0, 0, +}; + +/* Numeric_Type: 2088 bytes. */ + +RE_UINT32 re_get_numeric_type(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_numeric_type_stage_1[f] << 4; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_numeric_type_stage_2[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_numeric_type_stage_3[pos + f] << 2; + f = code >> 3; + code ^= f << 3; + pos = (RE_UINT32)re_numeric_type_stage_4[pos + f] << 3; + value = re_numeric_type_stage_5[pos + code]; + + return value; +} + +/* Numeric_Value. */ + +static RE_UINT8 re_numeric_value_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11, 12, + 13, 14, 15, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, 11, 17, + 18, 11, 19, 20, 11, 11, 21, 11, 11, 11, 11, 11, 11, 11, 11, 22, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, +}; + +static RE_UINT8 re_numeric_value_stage_2[] = { + 0, 1, 1, 1, 1, 1, 2, 3, 1, 4, 5, 6, 7, 8, 9, 10, + 11, 1, 1, 12, 1, 1, 13, 14, 15, 16, 17, 18, 19, 1, 1, 1, + 20, 21, 1, 1, 22, 1, 1, 23, 1, 1, 1, 1, 24, 1, 1, 1, + 25, 26, 27, 1, 28, 1, 1, 1, 29, 1, 1, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 31, 32, + 1, 33, 1, 34, 1, 1, 35, 1, 36, 1, 1, 1, 1, 1, 37, 38, + 1, 1, 39, 40, 1, 1, 1, 41, 1, 1, 1, 1, 1, 1, 1, 42, + 1, 1, 1, 43, 1, 1, 44, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 45, 1, 1, 1, 46, 1, 1, 1, 1, 1, 1, 1, 47, 48, 1, 1, + 1, 1, 1, 1, 1, 1, 49, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 50, 1, 51, 52, 53, 54, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 55, 1, 1, 1, 1, 1, 15, + 1, 56, 1, 57, 58, 1, 1, 1, 59, 60, 61, 62, 1, 1, 63, 1, + 64, 65, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 66, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 67, 1, 1, 1, 68, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 69, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 70, 71, 1, 1, 1, 1, 1, 1, 1, 72, 73, 74, 1, 1, 1, 1, + 1, 1, 1, 75, 1, 1, 1, 1, 1, 76, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, + 1, 1, 78, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 79, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_numeric_value_stage_3[] = { + 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, + 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 4, + 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 8, 0, 0, 0, 4, + 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, + 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, + 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, + 0, 0, 0, 0, 0, 0, 0, 13, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 0, 14, 0, 0, 0, 0, 0, 13, 0, 0, 0, + 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, + 0, 0, 0, 15, 3, 0, 0, 0, 0, 0, 16, 17, 18, 0, 0, 0, + 0, 0, 0, 19, 20, 0, 0, 21, 0, 0, 0, 22, 23, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 24, 25, 26, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 27, 0, 0, 0, 0, 28, 29, 0, 28, 30, 0, 0, + 31, 0, 0, 0, 32, 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, + 0, 0, 34, 0, 0, 0, 0, 0, 35, 0, 36, 0, 37, 38, 39, 40, + 41, 0, 0, 42, 0, 0, 0, 0, 43, 0, 44, 45, 0, 0, 0, 0, + 0, 0, 46, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 48, 0, + 0, 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 50, 0, 0, 0, 51, + 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, + 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 0, + 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, 0, + 0, 0, 0, 58, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0, 0, + 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 61, 0, 0, + 0, 62, 0, 0, 0, 0, 0, 0, 0, 63, 64, 65, 0, 0, 0, 66, + 0, 3, 0, 0, 0, 0, 0, 67, 0, 68, 0, 0, 0, 0, 1, 0, + 3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 69, 0, 70, 71, 72, + 73, 74, 75, 76, 77, 0, 0, 0, 0, 78, 79, 0, 0, 0, 80, 0, + 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 81, 0, 0, 0, 0, 0, + 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83, 84, 0, 0, 0, 0, + 0, 0, 85, 85, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, + 0, 0, 87, 88, 0, 0, 0, 1, 0, 89, 0, 0, 0, 0, 1, 0, + 90, 91, 92, 93, 0, 0, 0, 0, 0, 0, 0, 94, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 95, 96, 97, 0, 0, 0, 0, 0, 0, 0, + 98, 0, 0, 99, 0, 0, 0, 100, 0, 101, 0, 0, 0, 0, 0, 0, + 0, 102, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104, + 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, + 0, 0, 0, 0, 106, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 108, 0, 0, 0, 0, 0, 0, 0, 0, 109, 0, 0, 0, +}; + +static RE_UINT8 re_numeric_value_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, + 0, 0, 0, 0, 4, 0, 5, 6, 1, 2, 3, 0, 0, 0, 0, 0, + 0, 7, 8, 9, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 0, + 0, 7, 8, 9, 12, 13, 0, 0, 0, 7, 8, 9, 14, 0, 0, 0, + 0, 7, 8, 9, 0, 0, 1, 15, 0, 7, 8, 9, 16, 17, 0, 0, + 1, 2, 18, 19, 20, 0, 0, 0, 0, 0, 21, 2, 22, 23, 24, 25, + 0, 0, 0, 26, 27, 0, 0, 0, 1, 2, 3, 0, 1, 2, 3, 0, + 0, 0, 0, 0, 1, 2, 28, 0, 0, 0, 0, 0, 29, 2, 3, 0, + 0, 0, 0, 0, 30, 31, 32, 33, 34, 35, 36, 37, 34, 35, 36, 37, + 38, 39, 40, 0, 0, 0, 0, 0, 34, 35, 36, 41, 42, 34, 35, 36, + 41, 42, 34, 35, 36, 41, 42, 0, 0, 0, 43, 44, 45, 46, 2, 47, + 0, 0, 0, 0, 0, 48, 49, 50, 34, 35, 51, 49, 50, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 52, 0, 53, 0, 0, 0, 0, 0, 0, + 21, 2, 3, 0, 0, 0, 54, 0, 0, 0, 0, 0, 48, 55, 0, 0, + 34, 35, 56, 0, 0, 0, 0, 0, 0, 0, 57, 58, 59, 60, 61, 62, + 0, 0, 0, 0, 63, 64, 65, 66, 0, 67, 0, 0, 0, 0, 0, 0, + 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 0, 0, 0, 0, 0, + 0, 0, 0, 70, 0, 0, 0, 0, 71, 72, 73, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 75, 0, 76, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 77, 78, 0, 0, 0, 0, 0, 0, 79, + 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, + 0, 0, 0, 0, 81, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, + 0, 83, 0, 0, 0, 0, 0, 0, 0, 0, 84, 85, 0, 0, 0, 0, + 86, 87, 0, 88, 0, 0, 0, 0, 89, 80, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 90, 0, 0, 0, 0, 0, 5, 0, 5, 0, + 0, 0, 0, 0, 0, 0, 91, 0, 0, 0, 0, 0, 0, 0, 0, 92, + 0, 0, 0, 15, 75, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 93, + 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, 0, + 0, 95, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 97, 0, 98, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 25, 0, 0, 0, 0, 0, 0, 0, 99, 68, 0, 0, 0, + 0, 0, 0, 0, 75, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, + 0, 101, 0, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102, 0, + 0, 0, 0, 0, 0, 103, 0, 0, 0, 48, 49, 104, 0, 0, 0, 0, + 0, 0, 0, 0, 105, 106, 0, 0, 0, 0, 107, 0, 108, 0, 75, 0, + 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 109, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 110, 0, 111, 8, 9, 57, 58, 112, 113, + 114, 115, 116, 117, 118, 0, 0, 0, 119, 120, 121, 122, 123, 124, 125, 126, + 127, 128, 129, 130, 122, 131, 132, 0, 0, 0, 103, 0, 0, 0, 0, 0, + 133, 0, 0, 0, 0, 0, 0, 0, 134, 0, 135, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 136, 137, 0, 0, 0, 0, 0, 0, 0, 0, 138, 139, + 0, 0, 0, 0, 0, 140, 141, 0, 34, 142, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 143, 0, 0, 0, 0, 0, 0, 34, 142, + 34, 35, 144, 145, 146, 147, 148, 149, 0, 0, 0, 0, 48, 49, 50, 150, + 151, 152, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, + 8, 9, 49, 153, 35, 154, 2, 155, 156, 157, 9, 158, 159, 158, 160, 161, + 162, 163, 164, 165, 166, 167, 168, 169, 170, 0, 0, 0, 0, 0, 0, 0, + 34, 35, 144, 145, 171, 0, 0, 0, 0, 0, 0, 7, 8, 9, 1, 2, + 172, 8, 9, 1, 2, 172, 8, 9, 173, 49, 174, 0, 0, 0, 0, 0, + 70, 0, 0, 0, 0, 0, 0, 0, 0, 175, 0, 0, 0, 0, 0, 0, + 98, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 91, 0, 0, 0, 0, 0, 176, 0, 0, 88, 0, 0, 0, 88, + 0, 0, 101, 0, 0, 0, 0, 73, 0, 0, 0, 0, 0, 0, 73, 0, + 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 107, 0, + 0, 0, 0, 177, 0, 0, 0, 0, 0, 0, 0, 0, 178, 0, 0, 0, +}; + +static RE_UINT8 re_numeric_value_stage_5[] = { + 0, 0, 0, 0, 2, 23, 25, 27, 29, 31, 33, 35, 37, 39, 0, 0, + 0, 0, 25, 27, 0, 23, 0, 0, 11, 15, 19, 0, 0, 0, 2, 23, + 25, 27, 29, 31, 33, 35, 37, 39, 3, 6, 9, 11, 19, 46, 0, 0, + 0, 0, 11, 15, 19, 3, 6, 9, 40, 85, 94, 0, 23, 25, 27, 0, + 40, 85, 94, 11, 15, 19, 0, 0, 37, 39, 15, 24, 26, 28, 30, 32, + 34, 36, 38, 1, 0, 23, 25, 27, 37, 39, 40, 50, 60, 70, 80, 81, + 82, 83, 84, 85, 103, 0, 0, 0, 0, 0, 47, 48, 49, 0, 0, 0, + 37, 39, 23, 0, 2, 0, 0, 0, 7, 5, 4, 12, 18, 10, 14, 16, + 20, 8, 21, 6, 13, 17, 22, 23, 23, 25, 27, 29, 31, 33, 35, 37, + 39, 40, 41, 42, 80, 85, 89, 94, 94, 98, 103, 0, 0, 33, 80, 107, + 112, 2, 0, 0, 43, 44, 45, 46, 47, 48, 49, 50, 0, 0, 2, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 23, 25, 27, 37, 39, 40, 2, + 0, 0, 23, 25, 27, 29, 31, 33, 35, 37, 39, 40, 39, 40, 23, 25, + 0, 15, 0, 0, 0, 0, 0, 2, 40, 50, 60, 0, 27, 29, 0, 0, + 39, 40, 0, 0, 40, 50, 60, 70, 80, 81, 82, 83, 0, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 0, 66, 67, 68, + 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 0, 31, 0, 0, + 0, 0, 0, 25, 0, 0, 31, 0, 0, 35, 0, 0, 23, 0, 0, 35, + 0, 0, 0, 103, 0, 27, 0, 0, 0, 39, 0, 0, 25, 0, 0, 0, + 31, 0, 29, 0, 0, 0, 0, 115, 40, 0, 0, 0, 0, 0, 0, 94, + 27, 0, 0, 0, 85, 0, 0, 0, 115, 0, 0, 0, 0, 0, 116, 0, + 0, 25, 0, 37, 0, 33, 0, 0, 0, 40, 0, 94, 50, 60, 0, 0, + 70, 0, 0, 0, 0, 27, 27, 27, 0, 0, 0, 29, 0, 0, 23, 0, + 0, 0, 39, 50, 0, 0, 40, 0, 37, 0, 0, 0, 0, 0, 35, 0, + 0, 0, 39, 0, 0, 0, 85, 0, 0, 0, 29, 0, 0, 0, 25, 0, + 0, 94, 0, 0, 0, 0, 33, 0, 33, 0, 0, 0, 0, 0, 2, 0, + 35, 37, 39, 2, 11, 15, 19, 3, 6, 9, 0, 0, 0, 0, 0, 27, + 0, 0, 0, 40, 0, 33, 0, 33, 0, 40, 0, 0, 0, 0, 0, 23, + 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 11, 15, 23, 31, + 80, 89, 98, 107, 31, 40, 80, 85, 89, 94, 98, 31, 40, 80, 85, 89, + 94, 103, 107, 40, 23, 23, 23, 25, 25, 25, 25, 31, 40, 40, 40, 40, + 40, 60, 80, 80, 80, 80, 85, 87, 89, 89, 89, 89, 80, 15, 15, 18, + 19, 0, 0, 0, 23, 31, 40, 80, 0, 84, 0, 0, 0, 0, 93, 0, + 0, 23, 25, 40, 50, 85, 0, 0, 23, 25, 27, 40, 50, 85, 94, 103, + 0, 0, 23, 40, 50, 85, 25, 27, 40, 50, 85, 94, 0, 23, 80, 0, + 39, 40, 50, 60, 70, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 15, 11, 12, 18, 0, 50, 60, 70, 80, 81, 82, 83, 84, + 85, 94, 2, 23, 35, 37, 39, 29, 39, 23, 25, 27, 37, 39, 23, 25, + 27, 29, 31, 25, 27, 27, 29, 31, 23, 25, 27, 27, 29, 31, 113, 114, + 29, 31, 27, 27, 29, 29, 29, 29, 33, 35, 35, 35, 37, 37, 39, 39, + 39, 39, 25, 27, 29, 31, 33, 23, 25, 27, 29, 29, 31, 31, 25, 27, + 23, 25, 12, 18, 21, 12, 18, 6, 11, 8, 11, 0, 83, 84, 0, 0, + 37, 39, 2, 23, 2, 2, 23, 25, 35, 37, 39, 0, 29, 0, 0, 0, + 0, 0, 0, 60, 0, 29, 0, 0, 39, 0, 0, 0, +}; + +/* Numeric_Value: 2876 bytes. */ + +RE_UINT32 re_get_numeric_value(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_numeric_value_stage_1[f] << 4; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_numeric_value_stage_2[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_numeric_value_stage_3[pos + f] << 3; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_numeric_value_stage_4[pos + f] << 2; + value = re_numeric_value_stage_5[pos + code]; + + return value; +} + +/* Bidi_Mirrored. */ + +static RE_UINT8 re_bidi_mirrored_stage_1[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, +}; + +static RE_UINT8 re_bidi_mirrored_stage_2[] = { + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +}; + +static RE_UINT8 re_bidi_mirrored_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 3, 1, 1, 1, 1, + 4, 5, 1, 6, 7, 8, 1, 9, 10, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11, + 1, 1, 1, 12, 1, 1, 1, 1, +}; + +static RE_UINT8 re_bidi_mirrored_stage_4[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, + 6, 7, 8, 3, 3, 9, 3, 3, 10, 11, 12, 13, 14, 3, 3, 3, + 3, 3, 3, 3, 3, 15, 3, 16, 3, 3, 3, 3, 3, 3, 17, 18, + 19, 20, 21, 22, 3, 3, 3, 3, 23, 3, 3, 3, 3, 3, 3, 3, + 24, 3, 3, 3, 3, 3, 3, 3, 3, 25, 3, 3, 26, 27, 3, 3, + 3, 3, 3, 28, 29, 30, 31, 32, +}; + +static RE_UINT8 re_bidi_mirrored_stage_5[] = { + 0, 0, 0, 0, 0, 3, 0, 80, 0, 0, 0, 40, 0, 0, 0, 40, + 0, 0, 0, 0, 0, 8, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 60, 0, 0, 0, 24, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 6, 96, 0, 0, 0, 0, 0, 0, 96, + 0, 96, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 30, 63, 98, 188, 87, 248, 15, 250, 255, 31, 60, 128, 245, 207, 255, 255, + 255, 159, 7, 1, 204, 255, 255, 193, 0, 62, 195, 255, 255, 63, 255, 255, + 0, 15, 0, 0, 3, 6, 0, 0, 0, 0, 0, 0, 0, 255, 63, 0, + 121, 59, 120, 112, 252, 255, 0, 0, 248, 255, 255, 249, 255, 255, 0, 1, + 63, 194, 55, 31, 58, 3, 240, 51, 0, 252, 255, 223, 83, 122, 48, 112, + 0, 0, 128, 1, 48, 188, 25, 254, 255, 255, 255, 255, 207, 191, 255, 255, + 255, 255, 127, 80, 124, 112, 136, 47, 60, 54, 0, 48, 255, 3, 0, 0, + 0, 255, 243, 15, 0, 0, 0, 0, 0, 0, 0, 126, 48, 0, 0, 0, + 0, 3, 0, 80, 0, 0, 0, 40, 0, 0, 0, 168, 13, 0, 0, 0, + 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, + 0, 128, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, + 8, 0, 0, 0, 0, 0, 0, 0, +}; + +/* Bidi_Mirrored: 489 bytes. */ + +RE_UINT32 re_get_bidi_mirrored(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_bidi_mirrored_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_bidi_mirrored_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_bidi_mirrored_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_bidi_mirrored_stage_4[pos + f] << 6; + pos += code; + value = (re_bidi_mirrored_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Indic_Matra_Category. */ + +static RE_UINT8 re_indic_matra_category_stage_1[] = { + 0, 1, 1, 1, 1, 2, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_indic_matra_category_stage_2[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, + 8, 0, 0, 0, 0, 0, 0, 9, 0, 10, 11, 12, 13, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 14, 15, 16, 17, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 0, 0, 0, 0, + 19, 20, 0, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_indic_matra_category_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, 5, 6, 7, 4, 0, + 0, 0, 0, 5, 8, 0, 0, 0, 0, 0, 0, 5, 9, 0, 4, 0, + 0, 0, 0, 10, 11, 12, 4, 0, 0, 0, 0, 13, 14, 7, 0, 0, + 0, 0, 0, 15, 16, 17, 4, 0, 0, 0, 0, 10, 18, 19, 4, 0, + 0, 0, 0, 13, 20, 7, 4, 0, 0, 0, 0, 0, 21, 22, 0, 23, + 0, 0, 0, 24, 25, 0, 0, 0, 0, 0, 0, 26, 27, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 28, 29, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 30, 31, 0, 32, 33, 34, 35, 36, 0, 0, 0, 0, 0, 0, + 0, 37, 0, 37, 0, 38, 0, 38, 0, 0, 0, 39, 40, 41, 0, 0, + 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 43, 44, 0, 0, 0, + 0, 45, 0, 0, 0, 0, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 48, 49, 0, 0, 0, 0, 0, 50, 0, 0, 0, 0, 23, + 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 52, 0, 53, 0, 0, 0, 0, 0, 0, 0, 0, 54, 55, 0, 0, 0, + 0, 0, 0, 0, 56, 57, 0, 0, 0, 0, 0, 58, 59, 0, 0, 0, + 0, 0, 60, 61, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 63, 64, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 0, + 66, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 68, 69, 0, 0, 0, 0, 0, 0, 70, 0, 0, 0, 0, + 0, 0, 71, 72, 0, 0, 0, 0, 0, 0, 0, 73, 44, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 74, 69, 0, 0, 0, 0, +}; + +static RE_UINT8 re_indic_matra_category_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, + 3, 4, 5, 6, 1, 7, 3, 8, 0, 0, 9, 4, 0, 0, 0, 0, + 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, + 3, 4, 10, 11, 12, 13, 14, 0, 0, 0, 0, 15, 0, 0, 0, 0, + 3, 10, 0, 9, 16, 9, 17, 0, 3, 4, 5, 9, 18, 15, 3, 0, + 0, 0, 0, 0, 0, 0, 0, 19, 3, 4, 10, 11, 20, 13, 21, 0, + 0, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, + 17, 10, 0, 22, 12, 23, 24, 0, 0, 0, 0, 0, 0, 0, 0, 6, + 1, 7, 25, 6, 26, 6, 6, 0, 0, 0, 9, 10, 0, 0, 0, 0, + 27, 7, 25, 18, 28, 29, 6, 0, 0, 0, 15, 25, 0, 0, 0, 0, + 7, 3, 10, 22, 12, 23, 24, 0, 0, 0, 0, 0, 0, 16, 0, 15, + 7, 6, 10, 10, 2, 30, 23, 31, 0, 7, 0, 0, 0, 0, 0, 0, + 19, 7, 6, 6, 4, 10, 0, 0, 32, 32, 33, 9, 0, 0, 0, 16, + 19, 7, 6, 6, 4, 9, 0, 0, 32, 32, 34, 0, 0, 0, 0, 0, + 35, 36, 4, 37, 37, 6, 6, 0, 36, 0, 10, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 15, 19, 17, 38, 6, 6, 0, 39, 16, 0, 0, + 0, 0, 0, 7, 4, 0, 0, 0, 0, 25, 0, 15, 25, 0, 0, 0, + 9, 6, 16, 0, 0, 0, 0, 0, 0, 15, 40, 16, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 19, 0, 0, 17, 10, 0, 0, 0, 0, 0, + 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 6, 17, 4, 41, + 42, 22, 23, 0, 25, 0, 0, 0, 9, 43, 0, 0, 0, 0, 0, 0, + 6, 44, 45, 46, 16, 0, 0, 0, 7, 7, 2, 22, 7, 8, 7, 7, + 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 38, 2, 0, 0, + 47, 1, 19, 6, 17, 5, 44, 22, 22, 40, 16, 0, 0, 0, 0, 0, + 0, 0, 15, 6, 4, 48, 49, 22, 23, 18, 25, 0, 0, 0, 0, 0, + 0, 0, 17, 8, 6, 25, 0, 0, 0, 0, 0, 2, 50, 7, 10, 0, + 0, 0, 0, 16, 0, 0, 0, 0, 0, 15, 3, 1, 0, 0, 0, 0, + 0, 0, 15, 7, 7, 7, 7, 7, 7, 7, 10, 0, 0, 0, 0, 0, + 0, 0, 0, 35, 4, 17, 4, 10, 0, 15, 0, 0, 0, 0, 0, 0, + 0, 0, 7, 6, 4, 22, 16, 0, 51, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 9, 6, 17, 52, 40, 10, 0, 0, 0, 0, 0, 0, + 1, 6, 53, 54, 55, 56, 33, 16, 0, 0, 0, 0, 0, 11, 5, 8, + 0, 0, 0, 43, 0, 0, 0, 0, 0, 15, 19, 7, 44, 25, 35, 0, + 57, 4, 9, 58, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 39, + 0, 0, 0, 0, 6, 6, 4, 4, 4, 6, 6, 16, 0, 0, 0, 0, + 2, 3, 5, 1, 3, 0, 0, 0, 0, 0, 0, 9, 6, 4, 40, 37, + 17, 59, 16, 0, 0, 0, 0, 0, 0, 15, 8, 4, 4, 4, 6, 18, + 0, 0, 0, 0, 0, 0, 9, 8, +}; + +static RE_UINT8 re_indic_matra_category_stage_5[] = { + 0, 0, 5, 1, 1, 2, 1, 6, 6, 6, 6, 5, 5, 5, 1, 1, + 2, 1, 0, 5, 6, 0, 0, 2, 2, 0, 0, 4, 4, 6, 0, 1, + 5, 0, 5, 6, 5, 8, 1, 5, 9, 0, 10, 6, 2, 2, 4, 4, + 4, 5, 1, 0, 7, 0, 8, 1, 8, 0, 8, 8, 9, 2, 4, 1, + 3, 3, 3, 1, 3, 0, 0, 6, 5, 7, 7, 7, 6, 2, 0, 14, + 2, 5, 9, 10, 4, 2, 14, 0, 6, 1, 1, 8, 8, 5, 14, 1, + 6, 11, 7, 12, 2, 9, 11, 0, 5, 2, 6, 3, 3, 5, 5, 3, + 1, 3, 0, 13, 13, 0, 6, 14, +}; + +/* Indic_Matra_Category: 1336 bytes. */ + +RE_UINT32 re_get_indic_matra_category(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_indic_matra_category_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_indic_matra_category_stage_2[pos + f] << 4; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_indic_matra_category_stage_3[pos + f] << 3; + f = code >> 1; + code ^= f << 1; + pos = (RE_UINT32)re_indic_matra_category_stage_4[pos + f] << 1; + value = re_indic_matra_category_stage_5[pos + code]; + + return value; +} + +/* Indic_Syllabic_Category. */ + +static RE_UINT8 re_indic_syllabic_category_stage_1[] = { + 0, 1, 2, 2, 2, 3, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_indic_syllabic_category_stage_2[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 1, 1, 1, 1, 1, 1, 10, 1, 11, 12, 13, 14, 1, 1, 1, + 1, 1, 1, 1, 1, 15, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 16, 17, 18, 19, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20, 1, 1, 1, 1, 1, + 21, 22, 1, 1, 1, 1, 23, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_indic_syllabic_category_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 8, 16, + 17, 11, 12, 18, 19, 20, 0, 21, 22, 23, 12, 24, 25, 0, 8, 0, + 10, 11, 12, 24, 26, 27, 8, 28, 29, 30, 31, 32, 33, 34, 0, 0, + 35, 36, 12, 37, 38, 39, 8, 0, 40, 36, 12, 41, 38, 42, 8, 0, + 40, 36, 4, 43, 44, 34, 8, 45, 46, 47, 4, 48, 49, 50, 0, 51, + 52, 4, 53, 54, 55, 0, 0, 0, 56, 57, 58, 59, 60, 61, 0, 0, + 0, 0, 0, 0, 62, 4, 63, 64, 65, 66, 67, 68, 0, 0, 0, 0, + 4, 4, 69, 70, 0, 71, 72, 73, 74, 75, 0, 0, 0, 0, 0, 0, + 76, 77, 78, 77, 78, 79, 76, 80, 4, 4, 81, 82, 83, 84, 0, 0, + 85, 63, 86, 87, 0, 4, 88, 89, 4, 4, 90, 91, 92, 0, 0, 0, + 4, 93, 4, 4, 94, 95, 96, 97, 0, 0, 0, 0, 0, 0, 0, 0, + 98, 78, 4, 99, 100, 0, 0, 0, 101, 4, 102, 103, 4, 4, 104, 105, + 4, 4, 106, 107, 108, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, + 111, 4, 112, 0, 4, 113, 114, 115, 116, 117, 4, 118, 119, 0, 0, 0, + 120, 4, 121, 4, 122, 123, 0, 0, 124, 4, 4, 125, 126, 0, 0, 0, + 127, 4, 128, 129, 130, 0, 4, 131, 4, 4, 4, 132, 133, 0, 134, 135, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136, 137, 138, 0, + 139, 140, 4, 141, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 142, 78, 4, 143, 144, 0, 0, 0, 145, 4, 4, 146, 0, 0, 0, 0, + 147, 4, 148, 149, 0, 0, 0, 0, 150, 151, 4, 152, 153, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 154, 4, 155, 156, 0, 0, 0, 0, +}; + +static RE_UINT8 re_indic_syllabic_category_stage_4[] = { + 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 3, 3, 3, 4, 5, 5, + 5, 5, 5, 5, 5, 5, 6, 7, 8, 8, 8, 9, 0, 10, 5, 5, + 11, 0, 0, 0, 12, 3, 13, 5, 14, 15, 3, 16, 16, 4, 5, 5, + 5, 5, 17, 5, 18, 19, 20, 7, 8, 21, 21, 22, 0, 23, 0, 24, + 20, 0, 0, 0, 14, 15, 25, 26, 17, 27, 20, 28, 29, 23, 21, 30, + 0, 0, 13, 18, 31, 32, 0, 0, 14, 15, 3, 33, 33, 4, 5, 5, + 17, 13, 20, 7, 8, 34, 34, 30, 8, 21, 21, 30, 0, 35, 0, 24, + 36, 0, 0, 0, 37, 15, 25, 12, 38, 39, 27, 17, 40, 41, 42, 19, + 5, 5, 20, 35, 29, 35, 43, 30, 0, 23, 0, 0, 14, 15, 3, 38, + 38, 4, 5, 5, 5, 13, 20, 44, 8, 43, 43, 30, 0, 45, 20, 0, + 46, 15, 3, 38, 5, 13, 20, 7, 0, 45, 0, 47, 5, 5, 42, 44, + 8, 43, 43, 48, 0, 0, 49, 50, 46, 15, 3, 3, 3, 25, 19, 5, + 24, 5, 5, 36, 5, 42, 51, 23, 8, 52, 8, 8, 35, 0, 0, 0, + 13, 5, 5, 5, 5, 5, 5, 42, 8, 8, 53, 0, 8, 34, 54, 55, + 27, 56, 18, 36, 0, 5, 13, 5, 13, 57, 19, 27, 8, 8, 34, 58, + 8, 59, 54, 60, 0, 0, 0, 20, 5, 5, 13, 5, 5, 5, 5, 41, + 10, 8, 8, 61, 62, 63, 64, 65, 66, 66, 67, 66, 66, 66, 66, 66, + 66, 66, 66, 68, 69, 3, 70, 8, 8, 71, 72, 73, 74, 11, 75, 76, + 77, 78, 79, 80, 81, 82, 5, 5, 83, 84, 54, 85, 0, 0, 86, 87, + 88, 5, 5, 17, 6, 89, 0, 0, 88, 5, 5, 5, 6, 0, 0, 0, + 90, 0, 0, 0, 91, 3, 3, 3, 3, 35, 8, 8, 8, 61, 92, 93, + 94, 0, 0, 95, 96, 5, 5, 5, 8, 8, 97, 0, 98, 99, 100, 0, + 101, 102, 102, 103, 104, 105, 0, 0, 5, 5, 5, 0, 8, 8, 8, 8, + 106, 99, 107, 0, 5, 108, 8, 0, 5, 5, 5, 69, 88, 109, 99, 110, + 111, 8, 8, 8, 8, 79, 107, 0, 112, 113, 3, 3, 5, 114, 8, 8, + 8, 115, 5, 0, 116, 3, 117, 5, 118, 8, 119, 120, 0, 0, 121, 122, + 5, 123, 8, 8, 124, 0, 0, 0, 5, 125, 8, 106, 99, 126, 0, 0, + 0, 0, 0, 13, 127, 0, 0, 0, 0, 0, 0, 1, 33, 128, 129, 5, + 108, 8, 0, 0, 5, 5, 5, 130, 131, 132, 133, 5, 134, 0, 0, 0, + 135, 3, 3, 3, 117, 5, 5, 5, 5, 136, 8, 8, 8, 89, 0, 0, + 0, 0, 19, 5, 130, 102, 137, 107, 5, 108, 8, 138, 139, 0, 0, 0, + 140, 3, 4, 88, 141, 8, 8, 142, 89, 0, 0, 0, 3, 117, 5, 5, + 5, 5, 81, 8, 143, 144, 0, 0, 99, 99, 99, 145, 13, 0, 146, 0, + 8, 8, 8, 84, 147, 0, 0, 0, 117, 5, 108, 8, 0, 148, 0, 0, + 5, 5, 5, 74, 149, 5, 150, 99, 151, 8, 29, 152, 81, 45, 0, 153, + 5, 13, 13, 5, 5, 0, 0, 154, 155, 15, 3, 3, 5, 5, 8, 8, + 8, 53, 0, 0, 156, 3, 3, 4, 8, 8, 157, 0, 156, 88, 5, 5, + 5, 108, 8, 8, 158, 89, 0, 0, 156, 3, 3, 3, 4, 5, 5, 5, + 108, 8, 8, 8, 63, 0, 0, 0, 3, 3, 117, 5, 5, 5, 129, 159, + 8, 160, 0, 0, +}; + +static RE_UINT8 re_indic_syllabic_category_stage_5[] = { + 0, 0, 0, 0, 9, 0, 0, 0, 1, 1, 1, 2, 6, 6, 6, 6, + 6, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 4, 3, 7, 7, + 7, 7, 7, 7, 7, 5, 7, 7, 0, 7, 7, 7, 6, 6, 7, 7, + 0, 0, 6, 6, 0, 10, 10, 10, 0, 1, 1, 2, 0, 6, 6, 6, + 6, 0, 0, 6, 10, 0, 10, 10, 10, 0, 10, 0, 0, 0, 10, 10, + 10, 10, 0, 0, 7, 0, 0, 7, 7, 5, 11, 0, 0, 0, 0, 7, + 10, 10, 0, 10, 6, 6, 6, 0, 0, 0, 0, 6, 0, 10, 10, 0, + 4, 0, 7, 7, 7, 7, 7, 0, 7, 5, 0, 0, 1, 0, 9, 9, + 0, 14, 0, 0, 6, 6, 0, 6, 7, 7, 0, 7, 0, 0, 7, 7, + 0, 10, 0, 0, 0, 0, 1, 17, 6, 0, 6, 6, 6, 10, 0, 0, + 0, 0, 0, 10, 10, 0, 0, 0, 10, 10, 10, 0, 7, 0, 7, 7, + 0, 3, 7, 7, 0, 7, 7, 0, 0, 0, 1, 2, 0, 0, 10, 0, + 7, 5, 12, 0, 0, 0, 11, 11, 11, 11, 11, 11, 0, 0, 5, 0, + 7, 0, 7, 0, 7, 7, 5, 0, 19, 19, 19, 19, 0, 1, 5, 0, + 10, 0, 0, 10, 0, 10, 0, 10, 14, 14, 0, 0, 7, 0, 0, 0, + 0, 1, 0, 0, 7, 7, 1, 2, 7, 7, 1, 1, 5, 3, 0, 0, + 16, 16, 16, 16, 16, 13, 13, 13, 13, 13, 13, 13, 0, 13, 13, 13, + 13, 0, 0, 0, 10, 6, 6, 6, 6, 6, 6, 7, 7, 7, 1, 19, + 2, 5, 5, 14, 14, 14, 14, 10, 10, 10, 6, 6, 7, 7, 10, 10, + 10, 10, 14, 14, 14, 10, 7, 19, 19, 10, 10, 7, 7, 19, 19, 19, + 19, 19, 10, 10, 10, 7, 7, 7, 7, 10, 10, 10, 10, 10, 14, 7, + 7, 7, 7, 19, 19, 19, 10, 19, 0, 0, 19, 19, 7, 7, 0, 0, + 6, 6, 6, 10, 5, 0, 0, 0, 10, 0, 7, 7, 10, 10, 10, 6, + 7, 20, 20, 0, 12, 0, 0, 0, 0, 5, 5, 0, 3, 0, 0, 0, + 9, 10, 10, 10, 7, 13, 13, 13, 15, 15, 1, 15, 15, 15, 15, 15, + 15, 0, 0, 0, 10, 10, 10, 8, 8, 8, 8, 8, 8, 8, 0, 0, + 18, 18, 18, 18, 18, 0, 0, 0, 7, 15, 15, 15, 19, 19, 0, 0, + 10, 10, 10, 7, 10, 14, 14, 15, 15, 15, 15, 0, 5, 7, 7, 7, + 1, 1, 1, 12, 2, 6, 6, 6, 4, 7, 7, 7, 5, 10, 10, 10, + 1, 12, 2, 6, 6, 6, 10, 10, 10, 13, 13, 13, 7, 7, 5, 5, + 13, 13, 10, 10, 0, 0, 3, 10, 10, 10, 15, 15, 6, 6, 4, 7, + 15, 15, 5, 5, 13, 13, 7, 7, 1, 1, 0, 4, 0, 0, 2, 2, + 6, 6, 5, 10, 10, 10, 10, 1, 10, 10, 8, 8, 8, 8, 10, 10, + 10, 10, 8, 13, 13, 10, 10, 10, 10, 13, 10, 1, 1, 2, 6, 6, + 15, 7, 7, 7, 8, 8, 8, 19, 7, 7, 7, 15, 15, 15, 15, 5, + 1, 1, 12, 2, 10, 10, 10, 4, 7, 13, 14, 14, 7, 7, 7, 14, + 14, 14, 14, 0, 15, 15, 0, 0, 0, 0, 10, 19, 18, 19, 18, 0, + 0, 2, 5, 0, 10, 6, 10, 10, 10, 10, 10, 15, 15, 15, 15, 7, + 19, 5, 0, 0, 7, 0, 1, 2, 0, 0, 0, 5, 1, 1, 2, 0, + 1, 1, 2, 6, 7, 5, 4, 0, 7, 7, 7, 5, 2, 7, 7, 7, + 7, 7, 5, 4, +}; + +/* Indic_Syllabic_Category: 1952 bytes. */ + +RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_indic_syllabic_category_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_indic_syllabic_category_stage_2[pos + f] << 4; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_indic_syllabic_category_stage_3[pos + f] << 2; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_indic_syllabic_category_stage_4[pos + f] << 2; + value = re_indic_syllabic_category_stage_5[pos + code]; + + return value; +} + +/* Alphanumeric. */ + +RE_UINT32 re_get_alphanumeric(RE_UINT32 ch) { + return re_get_alphabetic(ch) || re_get_general_category(ch) == RE_PROP_ND; +} + +/* Any. */ + +RE_UINT32 re_get_any(RE_UINT32 ch) { + return 1; +} + +/* ASCII. */ + +RE_UINT32 re_get_ascii(RE_UINT32 ch) { + if (ch <= RE_ASCII_MAX) + return 1; + + return 0; +} + +/* Assigned. */ + +RE_UINT32 re_get_assigned(RE_UINT32 ch) { + return re_get_general_category(ch) != RE_PROP_CN; +} + +/* Blank. */ + +RE_UINT32 re_get_blank(RE_UINT32 ch) { + return ch == 0x09 || re_get_general_category(ch) == RE_PROP_ZS; +} + +/* Graph. */ + +RE_UINT32 re_get_graph(RE_UINT32 ch) { + return !re_get_white_space(ch) && + (RE_GRAPH_MASK & (1 << re_get_general_category(ch))) == 0; +} + +/* Print. */ + +RE_UINT32 re_get_print(RE_UINT32 ch) { + return (re_get_graph(ch) || re_get_blank(ch)) && + re_get_general_category(ch) != RE_PROP_CC; +} + +/* Word. */ + +RE_UINT32 re_get_word(RE_UINT32 ch) { + return re_get_alphabetic(ch) || (RE_WORD_MASK & (1 << + re_get_general_category(ch))) != 0 || re_get_join_control(ch); +} + +/* XDigit. */ + +RE_UINT32 re_get_xdigit(RE_UINT32 ch) { + return re_get_general_category(ch) == RE_PROP_ND || re_get_hex_digit(ch); +} + +/* All_Cases. */ + +static RE_UINT8 re_all_cases_stage_1[] = { + 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_all_cases_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, 10, + 6, 11, 6, 6, 12, 6, 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 15, 16, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 17, 6, 6, 6, 18, + 6, 6, 6, 6, 19, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, +}; + +static RE_UINT8 re_all_cases_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, + 0, 0, 0, 0, 0, 0, 9, 0, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 18, 18, 18, 18, 18, 19, 20, 21, 22, 18, 18, 18, 18, 18, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 21, 34, 18, 18, 35, 18, + 18, 18, 18, 18, 36, 18, 37, 38, 39, 18, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 0, 0, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 18, 18, 18, 63, 64, + 65, 65, 11, 11, 11, 11, 15, 15, 15, 15, 66, 66, 18, 18, 18, 18, + 67, 68, 18, 18, 18, 18, 18, 18, 69, 70, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 0, 71, 72, 72, 72, 73, 0, 74, 75, 75, 75, + 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 77, 77, 77, 77, 78, 79, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 81, 18, 18, 18, + 18, 18, 82, 83, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 84, 85, 86, 87, 84, 85, 84, 85, 86, 87, 88, 89, 84, 85, 90, 91, + 84, 85, 84, 85, 84, 85, 92, 93, 94, 95, 96, 97, 98, 99, 94, 100, + 0, 0, 0, 0, 101, 102, 103, 0, 0, 104, 0, 0, 105, 105, 106, 106, + 107, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 108, 109, 109, 109, 110, 110, 110, 111, 0, 0, + 72, 72, 72, 72, 72, 73, 75, 75, 75, 75, 75, 76, 112, 113, 114, 115, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 37, 116, 117, 0, + 118, 118, 118, 118, 119, 120, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 18, 18, 18, 18, 18, 82, 0, 0, + 18, 18, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 68, 18, 68, 18, 18, 18, 18, 18, 18, 18, 0, 121, + 18, 122, 37, 0, 18, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 11, 11, 4, 5, 15, 15, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 125, 125, 125, 125, 125, 126, 126, 126, 126, 126, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_all_cases_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, + 5, 6, 5, 7, 5, 5, 5, 5, 5, 5, 5, 8, 5, 5, 5, 5, + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, + 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 11, + 5, 5, 5, 5, 5, 12, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 13, + 14, 15, 14, 15, 14, 15, 14, 15, 16, 17, 14, 15, 14, 15, 14, 15, + 0, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, + 15, 0, 14, 15, 14, 15, 14, 15, 18, 14, 15, 14, 15, 14, 15, 19, + 20, 21, 14, 15, 14, 15, 22, 14, 15, 23, 23, 14, 15, 0, 24, 25, + 26, 14, 15, 23, 27, 28, 29, 30, 14, 15, 31, 0, 29, 32, 33, 34, + 14, 15, 14, 15, 14, 15, 35, 14, 15, 35, 0, 0, 14, 15, 35, 14, + 15, 36, 36, 14, 15, 14, 15, 37, 14, 15, 0, 0, 14, 15, 0, 38, + 0, 0, 0, 0, 39, 40, 41, 39, 40, 41, 39, 40, 41, 14, 15, 14, + 15, 14, 15, 14, 15, 42, 14, 15, 0, 39, 40, 41, 14, 15, 43, 44, + 45, 0, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 0, 0, 0, 0, + 0, 0, 46, 14, 15, 47, 48, 49, 49, 14, 15, 50, 51, 52, 14, 15, + 53, 54, 55, 56, 57, 0, 58, 58, 0, 59, 0, 60, 0, 0, 0, 0, + 58, 0, 0, 61, 0, 62, 63, 0, 64, 65, 0, 66, 0, 0, 0, 65, + 0, 67, 68, 0, 0, 69, 0, 0, 0, 0, 0, 0, 0, 70, 0, 0, + 71, 0, 0, 71, 0, 0, 0, 0, 71, 72, 73, 73, 74, 0, 0, 0, + 0, 0, 75, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 0, 0, + 14, 15, 14, 15, 0, 0, 14, 15, 0, 0, 0, 33, 33, 33, 0, 0, + 0, 0, 0, 0, 0, 0, 77, 0, 78, 78, 78, 0, 79, 0, 80, 80, + 81, 1, 82, 1, 1, 83, 1, 1, 84, 85, 86, 1, 87, 1, 1, 1, + 88, 89, 0, 90, 1, 1, 91, 1, 1, 92, 1, 1, 93, 94, 94, 94, + 95, 5, 96, 5, 5, 97, 5, 5, 98, 99, 100, 5, 101, 5, 5, 5, + 102, 103, 104, 105, 5, 5, 106, 5, 5, 107, 5, 5, 108, 109, 109, 110, + 111, 112, 0, 0, 0, 113, 114, 115, 116, 117, 118, 0, 119, 120, 0, 14, + 15, 121, 14, 15, 0, 45, 45, 45, 122, 122, 122, 122, 122, 122, 122, 122, + 123, 123, 123, 123, 123, 123, 123, 123, 14, 15, 0, 0, 0, 0, 0, 0, + 0, 0, 14, 15, 14, 15, 14, 15, 124, 14, 15, 14, 15, 14, 15, 14, + 15, 14, 15, 14, 15, 14, 15, 125, 0, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 0, + 0, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 0, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 0, 128, 0, 0, 0, 0, 0, 128, 0, 0, + 0, 129, 0, 0, 0, 130, 0, 0, 131, 132, 14, 15, 14, 15, 14, 15, + 14, 15, 14, 15, 14, 15, 0, 0, 0, 0, 0, 133, 0, 0, 134, 0, + 110, 110, 110, 110, 110, 110, 110, 110, 115, 115, 115, 115, 115, 115, 115, 115, + 110, 110, 110, 110, 110, 110, 0, 0, 115, 115, 115, 115, 115, 115, 0, 0, + 0, 110, 0, 110, 0, 110, 0, 110, 0, 115, 0, 115, 0, 115, 0, 115, + 135, 135, 136, 136, 136, 136, 137, 137, 138, 138, 139, 139, 140, 140, 0, 0, + 110, 110, 0, 141, 0, 0, 0, 0, 115, 115, 142, 142, 143, 0, 144, 0, + 0, 0, 0, 141, 0, 0, 0, 0, 145, 145, 145, 145, 143, 0, 0, 0, + 110, 110, 0, 146, 0, 0, 0, 0, 115, 115, 147, 147, 0, 0, 0, 0, + 110, 110, 0, 148, 0, 118, 0, 0, 115, 115, 149, 149, 121, 0, 0, 0, + 150, 150, 151, 151, 143, 0, 0, 0, 0, 0, 0, 0, 0, 0, 152, 0, + 0, 0, 153, 154, 0, 0, 0, 0, 0, 0, 155, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 156, 0, 157, 157, 157, 157, 157, 157, 157, 157, + 158, 158, 158, 158, 158, 158, 158, 158, 0, 0, 0, 14, 15, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, + 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 0, 0, 0, 0, 0, 0, + 14, 15, 161, 162, 163, 164, 165, 14, 15, 14, 15, 14, 15, 166, 167, 168, + 169, 0, 14, 15, 0, 14, 15, 0, 0, 0, 0, 0, 0, 0, 170, 170, + 0, 0, 0, 14, 15, 14, 15, 0, 0, 0, 14, 15, 0, 0, 0, 0, + 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 0, 171, + 0, 0, 0, 0, 0, 171, 0, 0, 0, 14, 15, 14, 15, 172, 14, 15, + 0, 0, 0, 14, 15, 173, 0, 0, 14, 15, 174, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 14, 15, 0, 175, 175, 175, 175, 175, 175, 175, 175, + 176, 176, 176, 176, 176, 176, 176, 176, +}; + +/* All_Cases: 1984 bytes. */ + +static RE_AllCases re_all_cases_table[] = { + { 0, 0, 0}, + { 32, 0, 0}, + { 32, 232, 0}, + { 32, 8415, 0}, + { 32, 300, 0}, + { -32, 0, 0}, + { -32, 199, 0}, + { -32, 8383, 0}, + { -32, 268, 0}, + { 743, 775, 0}, + { 32, 8294, 0}, + { 7615, 0, 0}, + { -32, 8262, 0}, + { 121, 0, 0}, + { 1, 0, 0}, + { -1, 0, 0}, + { -199, 0, 0}, + { -232, 0, 0}, + { -121, 0, 0}, + { -300, -268, 0}, + { 195, 0, 0}, + { 210, 0, 0}, + { 206, 0, 0}, + { 205, 0, 0}, + { 79, 0, 0}, + { 202, 0, 0}, + { 203, 0, 0}, + { 207, 0, 0}, + { 97, 0, 0}, + { 211, 0, 0}, + { 209, 0, 0}, + { 163, 0, 0}, + { 213, 0, 0}, + { 130, 0, 0}, + { 214, 0, 0}, + { 218, 0, 0}, + { 217, 0, 0}, + { 219, 0, 0}, + { 56, 0, 0}, + { 1, 2, 0}, + { -1, 1, 0}, + { -2, -1, 0}, + { -79, 0, 0}, + { -97, 0, 0}, + { -56, 0, 0}, + { -130, 0, 0}, + { 10795, 0, 0}, + { -163, 0, 0}, + { 10792, 0, 0}, + { 10815, 0, 0}, + { -195, 0, 0}, + { 69, 0, 0}, + { 71, 0, 0}, + { 10783, 0, 0}, + { 10780, 0, 0}, + { 10782, 0, 0}, + { -210, 0, 0}, + { -206, 0, 0}, + { -205, 0, 0}, + { -202, 0, 0}, + { -203, 0, 0}, + { -207, 0, 0}, + { 42280, 0, 0}, + { 42308, 0, 0}, + { -209, 0, 0}, + { -211, 0, 0}, + { 10743, 0, 0}, + { 10749, 0, 0}, + { -213, 0, 0}, + { -214, 0, 0}, + { 10727, 0, 0}, + { -218, 0, 0}, + { -69, 0, 0}, + { -217, 0, 0}, + { -71, 0, 0}, + { -219, 0, 0}, + { 84, 116, 7289}, + { 38, 0, 0}, + { 37, 0, 0}, + { 64, 0, 0}, + { 63, 0, 0}, + { 7235, 0, 0}, + { 32, 62, 0}, + { 32, 96, 0}, + { 32, 57, 92}, + { -84, 32, 7205}, + { 32, 86, 0}, + { -743, 32, 0}, + { 32, 54, 0}, + { 32, 80, 0}, + { 31, 32, 0}, + { 32, 47, 0}, + { 32, 7549, 0}, + { -38, 0, 0}, + { -37, 0, 0}, + { 7219, 0, 0}, + { -32, 30, 0}, + { -32, 64, 0}, + { -32, 25, 60}, + { -116, -32, 7173}, + { -32, 54, 0}, + { -775, -32, 0}, + { -32, 22, 0}, + { -32, 48, 0}, + { -31, 1, 0}, + { -32, -1, 0}, + { -32, 15, 0}, + { -32, 7517, 0}, + { -64, 0, 0}, + { -63, 0, 0}, + { 8, 0, 0}, + { -62, -30, 0}, + { -57, -25, 35}, + { -47, -15, 0}, + { -54, -22, 0}, + { -8, 0, 0}, + { -86, -54, 0}, + { -80, -48, 0}, + { 7, 0, 0}, + { -92, -60, -35}, + { -96, -64, 0}, + { -7, 0, 0}, + { 80, 0, 0}, + { -80, 0, 0}, + { 15, 0, 0}, + { -15, 0, 0}, + { 48, 0, 0}, + { -48, 0, 0}, + { 7264, 0, 0}, + { 35332, 0, 0}, + { 3814, 0, 0}, + { 1, 59, 0}, + { -1, 58, 0}, + { -59, -58, 0}, + { -7615, 0, 0}, + { 74, 0, 0}, + { 86, 0, 0}, + { 100, 0, 0}, + { 128, 0, 0}, + { 112, 0, 0}, + { 126, 0, 0}, + { 9, 0, 0}, + { -74, 0, 0}, + { -9, 0, 0}, + { -7289, -7205, -7173}, + { -86, 0, 0}, + { -7235, 0, 0}, + { -100, 0, 0}, + { -7219, 0, 0}, + { -112, 0, 0}, + { -128, 0, 0}, + { -126, 0, 0}, + { -7549, -7517, 0}, + { -8415, -8383, 0}, + { -8294, -8262, 0}, + { 28, 0, 0}, + { -28, 0, 0}, + { 16, 0, 0}, + { -16, 0, 0}, + { 26, 0, 0}, + { -26, 0, 0}, + {-10743, 0, 0}, + { -3814, 0, 0}, + {-10727, 0, 0}, + {-10795, 0, 0}, + {-10792, 0, 0}, + {-10780, 0, 0}, + {-10749, 0, 0}, + {-10783, 0, 0}, + {-10782, 0, 0}, + {-10815, 0, 0}, + { -7264, 0, 0}, + {-35332, 0, 0}, + {-42280, 0, 0}, + {-42308, 0, 0}, + { 40, 0, 0}, + { -40, 0, 0}, +}; + +/* All_Cases: 1062 bytes. */ + +int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + RE_AllCases* all_cases; + int count; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_all_cases_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_all_cases_stage_2[pos + f] << 5; + f = code >> 3; + code ^= f << 3; + pos = (RE_UINT32)re_all_cases_stage_3[pos + f] << 3; + value = re_all_cases_stage_4[pos + code]; + + all_cases = &re_all_cases_table[value]; + + codepoints[0] = ch; + count = 1; + + while (count < RE_MAX_CASES && all_cases->diffs[count - 1] != 0) { + codepoints[count] = ch + all_cases->diffs[count - 1]; + ++count; + } + + return count; +} + +/* Simple_Case_Folding. */ + +static RE_UINT8 re_simple_case_folding_stage_1[] = { + 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_simple_case_folding_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, + 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 15, + 6, 6, 6, 6, 16, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, +}; + +static RE_UINT8 re_simple_case_folding_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 3, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 0, 2, 2, 5, 5, 0, 0, 0, 0, + 6, 6, 6, 6, 6, 6, 7, 8, 8, 7, 6, 6, 6, 6, 6, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 8, 20, 6, 6, 21, 6, + 6, 6, 6, 6, 22, 6, 23, 24, 25, 6, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 26, 0, 0, 0, 0, 0, 27, 0, + 28, 29, 1, 2, 30, 31, 0, 0, 32, 33, 34, 6, 6, 6, 35, 36, + 37, 37, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, + 38, 7, 6, 6, 6, 6, 6, 6, 39, 40, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 0, 41, 42, 42, 42, 43, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 44, 44, 44, 44, 45, 46, 0, 0, 0, 0, 0, 0, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 47, 48, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 0, 49, 0, 50, 0, 49, 0, 49, 0, 50, 0, 51, 0, 49, 0, 0, + 0, 49, 0, 49, 0, 49, 0, 52, 0, 53, 0, 54, 0, 55, 0, 56, + 0, 0, 0, 0, 57, 58, 59, 0, 0, 0, 0, 0, 60, 60, 0, 0, + 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 62, 63, 63, 63, 0, 0, 0, 0, 0, 0, + 42, 42, 42, 42, 42, 43, 0, 0, 0, 0, 0, 0, 64, 65, 66, 67, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 23, 68, 32, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 47, 0, 0, + 6, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 6, 7, 6, 6, 6, 6, 6, 6, 6, 0, 69, + 6, 70, 23, 0, 6, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 72, 72, 72, 72, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_simple_case_folding_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, + 3, 0, 3, 0, 3, 0, 3, 0, 0, 0, 3, 0, 3, 0, 3, 0, + 0, 3, 0, 3, 0, 3, 0, 3, 4, 3, 0, 3, 0, 3, 0, 5, + 0, 6, 3, 0, 3, 0, 7, 3, 0, 8, 8, 3, 0, 0, 9, 10, + 11, 3, 0, 8, 12, 0, 13, 14, 3, 0, 0, 0, 13, 15, 0, 16, + 3, 0, 3, 0, 3, 0, 17, 3, 0, 17, 0, 0, 3, 0, 17, 3, + 0, 18, 18, 3, 0, 3, 0, 19, 3, 0, 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 20, 3, 0, 20, 3, 0, 20, 3, 0, 3, 0, 3, + 0, 3, 0, 3, 0, 0, 3, 0, 0, 20, 3, 0, 3, 0, 21, 22, + 23, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 0, 0, 0, 0, + 0, 0, 24, 3, 0, 25, 26, 0, 0, 3, 0, 27, 28, 29, 3, 0, + 0, 0, 0, 0, 0, 30, 0, 0, 3, 0, 3, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, 31, 0, 32, 32, 32, 0, 33, 0, 34, 34, + 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, + 36, 37, 0, 0, 0, 38, 39, 0, 40, 41, 0, 0, 42, 43, 0, 3, + 0, 44, 3, 0, 0, 23, 23, 23, 45, 45, 45, 45, 45, 45, 45, 45, + 3, 0, 0, 0, 0, 0, 0, 0, 46, 3, 0, 3, 0, 3, 0, 3, + 0, 3, 0, 3, 0, 3, 0, 0, 0, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 0, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 0, 48, + 0, 0, 0, 0, 0, 48, 0, 0, 3, 0, 3, 0, 3, 0, 0, 0, + 0, 0, 0, 49, 0, 0, 50, 0, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 0, 0, 0, 51, 0, 51, 0, 51, 0, 51, + 51, 51, 52, 52, 53, 0, 54, 0, 55, 55, 55, 55, 53, 0, 0, 0, + 51, 51, 56, 56, 0, 0, 0, 0, 51, 51, 57, 57, 44, 0, 0, 0, + 58, 58, 59, 59, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 0, + 0, 0, 61, 62, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, + 64, 64, 64, 64, 64, 64, 64, 64, 0, 0, 0, 3, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 3, 0, 66, 67, 68, 0, 0, 3, 0, 3, 0, 3, 0, 69, 70, 71, + 72, 0, 3, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 73, 73, + 0, 0, 0, 3, 0, 3, 0, 0, 0, 3, 0, 3, 0, 74, 3, 0, + 0, 0, 0, 3, 0, 75, 0, 0, 3, 0, 76, 0, 0, 0, 0, 0, + 77, 77, 77, 77, 77, 77, 77, 77, +}; + +/* Simple_Case_Folding: 1456 bytes. */ + +static RE_INT32 re_simple_case_folding_table[] = { + 0, + 32, + 775, + 1, + -121, + -268, + 210, + 206, + 205, + 79, + 202, + 203, + 207, + 211, + 209, + 213, + 214, + 218, + 217, + 219, + 2, + -97, + -56, + -130, + 10795, + -163, + 10792, + -195, + 69, + 71, + 116, + 38, + 37, + 64, + 63, + 8, + -30, + -25, + -15, + -22, + -54, + -48, + -60, + -64, + -7, + 80, + 15, + 48, + 7264, + -58, + -7615, + -8, + -74, + -9, + -7173, + -86, + -100, + -112, + -128, + -126, + -7517, + -8383, + -8262, + 28, + 16, + 26, + -10743, + -3814, + -10727, + -10780, + -10749, + -10783, + -10782, + -10815, + -35332, + -42280, + -42308, + 40, +}; + +/* Simple_Case_Folding: 312 bytes. */ + +RE_UINT32 re_get_simple_case_folding(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + RE_INT32 diff; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_simple_case_folding_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_simple_case_folding_stage_2[pos + f] << 5; + f = code >> 3; + code ^= f << 3; + pos = (RE_UINT32)re_simple_case_folding_stage_3[pos + f] << 3; + value = re_simple_case_folding_stage_4[pos + code]; + + diff = re_simple_case_folding_table[value]; + + return ch + diff; +} + +/* Full_Case_Folding. */ + +static RE_UINT8 re_full_case_folding_stage_1[] = { + 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_full_case_folding_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, + 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 15, 6, 6, 6, 16, + 6, 6, 6, 6, 17, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, +}; + +static RE_UINT8 re_full_case_folding_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 3, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 0, 2, 2, 5, 6, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 8, 9, 9, 10, 7, 7, 7, 7, 7, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 9, 22, 7, 7, 23, 7, + 7, 7, 7, 7, 24, 7, 25, 26, 27, 7, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, 0, 0, 29, 0, + 30, 31, 32, 2, 33, 34, 35, 0, 36, 37, 38, 7, 7, 7, 39, 40, + 41, 41, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, + 42, 43, 7, 7, 7, 7, 7, 7, 44, 45, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 0, 46, 47, 47, 47, 48, 0, 0, 0, 0, 0, + 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 50, 50, 50, 50, 51, 52, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 53, 54, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 0, 55, 0, 56, 0, 55, 0, 55, 0, 56, 57, 58, 0, 55, 0, 0, + 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 0, 0, 0, 0, 75, 76, 77, 0, 0, 0, 0, 0, 78, 78, 0, 0, + 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 80, 81, 81, 81, 0, 0, 0, 0, 0, 0, + 47, 47, 47, 47, 47, 48, 0, 0, 0, 0, 0, 0, 82, 83, 84, 85, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 25, 86, 36, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 87, 0, 0, + 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 43, 7, 43, 7, 7, 7, 7, 7, 7, 7, 0, 88, + 7, 89, 25, 0, 7, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 91, 0, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 93, 93, 93, 93, 93, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_full_case_folding_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 3, 4, 0, 4, 0, 4, 0, 4, 0, + 5, 0, 4, 0, 4, 0, 4, 0, 0, 4, 0, 4, 0, 4, 0, 4, + 0, 6, 4, 0, 4, 0, 4, 0, 7, 4, 0, 4, 0, 4, 0, 8, + 0, 9, 4, 0, 4, 0, 10, 4, 0, 11, 11, 4, 0, 0, 12, 13, + 14, 4, 0, 11, 15, 0, 16, 17, 4, 0, 0, 0, 16, 18, 0, 19, + 4, 0, 4, 0, 4, 0, 20, 4, 0, 20, 0, 0, 4, 0, 20, 4, + 0, 21, 21, 4, 0, 4, 0, 22, 4, 0, 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 23, 4, 0, 23, 4, 0, 23, 4, 0, 4, 0, 4, + 0, 4, 0, 4, 0, 0, 4, 0, 24, 23, 4, 0, 4, 0, 25, 26, + 27, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 0, 0, 0, 0, + 0, 0, 28, 4, 0, 29, 30, 0, 0, 4, 0, 31, 32, 33, 4, 0, + 0, 0, 0, 0, 0, 34, 0, 0, 4, 0, 4, 0, 0, 0, 4, 0, + 0, 0, 0, 0, 0, 0, 35, 0, 36, 36, 36, 0, 37, 0, 38, 38, + 39, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, + 42, 43, 0, 0, 0, 44, 45, 0, 46, 47, 0, 0, 48, 49, 0, 4, + 0, 50, 4, 0, 0, 27, 27, 27, 51, 51, 51, 51, 51, 51, 51, 51, + 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 4, 0, + 52, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 0, + 0, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, + 53, 53, 53, 53, 53, 53, 53, 0, 0, 0, 0, 0, 0, 0, 0, 54, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, + 0, 0, 0, 0, 0, 55, 0, 0, 4, 0, 4, 0, 4, 0, 56, 57, + 58, 59, 60, 61, 0, 0, 62, 0, 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 0, 0, 64, 0, 65, 0, 66, 0, 67, 0, + 0, 63, 0, 63, 0, 63, 0, 63, 68, 68, 68, 68, 68, 68, 68, 68, + 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, + 71, 71, 71, 71, 71, 71, 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, + 73, 73, 73, 73, 73, 73, 73, 73, 0, 0, 74, 75, 76, 0, 77, 78, + 63, 63, 79, 79, 80, 0, 81, 0, 0, 0, 82, 83, 84, 0, 85, 86, + 87, 87, 87, 87, 88, 0, 0, 0, 0, 0, 89, 90, 0, 0, 91, 92, + 63, 63, 93, 93, 0, 0, 0, 0, 0, 0, 94, 95, 96, 0, 97, 98, + 63, 63, 99, 99, 50, 0, 0, 0, 0, 0, 100, 101, 102, 0, 103, 104, + 105, 105, 106, 106, 107, 0, 0, 0, 0, 0, 0, 0, 0, 0, 108, 0, + 0, 0, 109, 110, 0, 0, 0, 0, 0, 0, 111, 0, 0, 0, 0, 0, + 112, 112, 112, 112, 112, 112, 112, 112, 0, 0, 0, 4, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 4, 0, 114, 115, 116, 0, 0, 4, 0, 4, 0, 4, 0, 117, 118, 119, + 120, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 121, 121, + 0, 0, 0, 4, 0, 4, 0, 0, 4, 0, 4, 0, 4, 0, 0, 0, + 0, 4, 0, 4, 0, 122, 4, 0, 0, 0, 0, 4, 0, 123, 0, 0, + 4, 0, 124, 0, 0, 0, 0, 0, 125, 126, 127, 128, 129, 130, 131, 0, + 0, 0, 0, 132, 133, 134, 135, 136, 137, 137, 137, 137, 137, 137, 137, 137, +}; + +/* Full_Case_Folding: 1656 bytes. */ + +static RE_FullCaseFolding re_full_case_folding_table[] = { + { 0, 0, 0}, + { 32, 0, 0}, + { 775, 0, 0}, + { -108, 115, 0}, + { 1, 0, 0}, + { -199, 775, 0}, + { 371, 110, 0}, + { -121, 0, 0}, + { -268, 0, 0}, + { 210, 0, 0}, + { 206, 0, 0}, + { 205, 0, 0}, + { 79, 0, 0}, + { 202, 0, 0}, + { 203, 0, 0}, + { 207, 0, 0}, + { 211, 0, 0}, + { 209, 0, 0}, + { 213, 0, 0}, + { 214, 0, 0}, + { 218, 0, 0}, + { 217, 0, 0}, + { 219, 0, 0}, + { 2, 0, 0}, + { -390, 780, 0}, + { -97, 0, 0}, + { -56, 0, 0}, + { -130, 0, 0}, + { 10795, 0, 0}, + { -163, 0, 0}, + { 10792, 0, 0}, + { -195, 0, 0}, + { 69, 0, 0}, + { 71, 0, 0}, + { 116, 0, 0}, + { 38, 0, 0}, + { 37, 0, 0}, + { 64, 0, 0}, + { 63, 0, 0}, + { 41, 776, 769}, + { 21, 776, 769}, + { 8, 0, 0}, + { -30, 0, 0}, + { -25, 0, 0}, + { -15, 0, 0}, + { -22, 0, 0}, + { -54, 0, 0}, + { -48, 0, 0}, + { -60, 0, 0}, + { -64, 0, 0}, + { -7, 0, 0}, + { 80, 0, 0}, + { 15, 0, 0}, + { 48, 0, 0}, + { -34, 1410, 0}, + { 7264, 0, 0}, + { -7726, 817, 0}, + { -7715, 776, 0}, + { -7713, 778, 0}, + { -7712, 778, 0}, + { -7737, 702, 0}, + { -58, 0, 0}, + { -7723, 115, 0}, + { -8, 0, 0}, + { -7051, 787, 0}, + { -7053, 787, 768}, + { -7055, 787, 769}, + { -7057, 787, 834}, + { -128, 953, 0}, + { -136, 953, 0}, + { -112, 953, 0}, + { -120, 953, 0}, + { -64, 953, 0}, + { -72, 953, 0}, + { -66, 953, 0}, + { -7170, 953, 0}, + { -7176, 953, 0}, + { -7173, 834, 0}, + { -7174, 834, 953}, + { -74, 0, 0}, + { -7179, 953, 0}, + { -7173, 0, 0}, + { -78, 953, 0}, + { -7180, 953, 0}, + { -7190, 953, 0}, + { -7183, 834, 0}, + { -7184, 834, 953}, + { -86, 0, 0}, + { -7189, 953, 0}, + { -7193, 776, 768}, + { -7194, 776, 769}, + { -7197, 834, 0}, + { -7198, 776, 834}, + { -100, 0, 0}, + { -7197, 776, 768}, + { -7198, 776, 769}, + { -7203, 787, 0}, + { -7201, 834, 0}, + { -7202, 776, 834}, + { -112, 0, 0}, + { -118, 953, 0}, + { -7210, 953, 0}, + { -7206, 953, 0}, + { -7213, 834, 0}, + { -7214, 834, 953}, + { -128, 0, 0}, + { -126, 0, 0}, + { -7219, 953, 0}, + { -7517, 0, 0}, + { -8383, 0, 0}, + { -8262, 0, 0}, + { 28, 0, 0}, + { 16, 0, 0}, + { 26, 0, 0}, + {-10743, 0, 0}, + { -3814, 0, 0}, + {-10727, 0, 0}, + {-10780, 0, 0}, + {-10749, 0, 0}, + {-10783, 0, 0}, + {-10782, 0, 0}, + {-10815, 0, 0}, + {-35332, 0, 0}, + {-42280, 0, 0}, + {-42308, 0, 0}, + {-64154, 102, 0}, + {-64155, 105, 0}, + {-64156, 108, 0}, + {-64157, 102, 105}, + {-64158, 102, 108}, + {-64146, 116, 0}, + {-64147, 116, 0}, + {-62879, 1398, 0}, + {-62880, 1381, 0}, + {-62881, 1387, 0}, + {-62872, 1398, 0}, + {-62883, 1389, 0}, + { 40, 0, 0}, +}; + +/* Full_Case_Folding: 1104 bytes. */ + +int re_get_full_case_folding(RE_UINT32 ch, RE_UINT32* codepoints) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + RE_FullCaseFolding* case_folding; + int count; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_full_case_folding_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_full_case_folding_stage_2[pos + f] << 5; + f = code >> 3; + code ^= f << 3; + pos = (RE_UINT32)re_full_case_folding_stage_3[pos + f] << 3; + value = re_full_case_folding_stage_4[pos + code]; + + case_folding = &re_full_case_folding_table[value]; + + codepoints[0] = ch + case_folding->diff; + count = 1; + + while (count < RE_MAX_FOLDED && case_folding->codepoints[count - 1] != 0) { + codepoints[count] = case_folding->codepoints[count - 1]; + ++count; + } + + return count; +} + +/* Property function table. */ + +RE_GetPropertyFunc re_get_property[] = { + re_get_general_category, + re_get_block, + re_get_script, + re_get_word_break, + re_get_grapheme_cluster_break, + re_get_sentence_break, + re_get_math, + re_get_alphabetic, + re_get_lowercase, + re_get_uppercase, + re_get_cased, + re_get_case_ignorable, + re_get_changes_when_lowercased, + re_get_changes_when_uppercased, + re_get_changes_when_titlecased, + re_get_changes_when_casefolded, + re_get_changes_when_casemapped, + re_get_id_start, + re_get_id_continue, + re_get_xid_start, + re_get_xid_continue, + re_get_default_ignorable_code_point, + re_get_grapheme_extend, + re_get_grapheme_base, + re_get_grapheme_link, + re_get_white_space, + re_get_bidi_control, + re_get_join_control, + re_get_dash, + re_get_hyphen, + re_get_quotation_mark, + re_get_terminal_punctuation, + re_get_other_math, + re_get_hex_digit, + re_get_ascii_hex_digit, + re_get_other_alphabetic, + re_get_ideographic, + re_get_diacritic, + re_get_extender, + re_get_other_lowercase, + re_get_other_uppercase, + re_get_noncharacter_code_point, + re_get_other_grapheme_extend, + re_get_ids_binary_operator, + re_get_ids_trinary_operator, + re_get_radical, + re_get_unified_ideograph, + re_get_other_default_ignorable_code_point, + re_get_deprecated, + re_get_soft_dotted, + re_get_logical_order_exception, + re_get_other_id_start, + re_get_other_id_continue, + re_get_sterm, + re_get_variation_selector, + re_get_pattern_white_space, + re_get_pattern_syntax, + re_get_hangul_syllable_type, + re_get_bidi_class, + re_get_canonical_combining_class, + re_get_decomposition_type, + re_get_east_asian_width, + re_get_joining_group, + re_get_joining_type, + re_get_line_break, + re_get_numeric_type, + re_get_numeric_value, + re_get_bidi_mirrored, + re_get_indic_matra_category, + re_get_indic_syllabic_category, + re_get_alphanumeric, + re_get_any, + re_get_ascii, + re_get_assigned, + re_get_blank, + re_get_graph, + re_get_print, + re_get_word, + re_get_xdigit, +}; diff --git a/src/regex/_regex_unicode.h b/src/regex/_regex_unicode.h new file mode 100644 index 000000000000..950d02e16155 --- /dev/null +++ b/src/regex/_regex_unicode.h @@ -0,0 +1,220 @@ +typedef unsigned char RE_UINT8; +typedef signed char RE_INT8; +typedef unsigned short RE_UINT16; +typedef signed short RE_INT16; +typedef unsigned int RE_UINT32; +typedef signed int RE_INT32; + +typedef unsigned char BOOL; +enum {FALSE, TRUE}; + +#define RE_ASCII_MAX 0x7F +#define RE_LOCALE_MAX 0xFF +#define RE_UNICODE_MAX 0x10FFFF + +#define RE_MAX_CASES 4 +#define RE_MAX_FOLDED 3 + +typedef struct RE_Property { + RE_UINT16 name; + RE_UINT8 id; + RE_UINT8 value_set; +} RE_Property; + +typedef struct RE_PropertyValue { + RE_UINT16 name; + RE_UINT8 value_set; + RE_UINT8 id; +} RE_PropertyValue; + +typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch); + +#define RE_PROP_GC 0x0 +#define RE_PROP_CASED 0xA +#define RE_PROP_UPPERCASE 0x9 +#define RE_PROP_LOWERCASE 0x8 + +#define RE_PROP_C 30 +#define RE_PROP_L 31 +#define RE_PROP_M 32 +#define RE_PROP_N 33 +#define RE_PROP_P 34 +#define RE_PROP_S 35 +#define RE_PROP_Z 36 + +#define RE_PROP_CN 0 +#define RE_PROP_LU 1 +#define RE_PROP_LL 2 +#define RE_PROP_LT 3 +#define RE_PROP_LM 4 +#define RE_PROP_LO 5 +#define RE_PROP_MN 6 +#define RE_PROP_ME 7 +#define RE_PROP_MC 8 +#define RE_PROP_ND 9 +#define RE_PROP_NL 10 +#define RE_PROP_NO 11 +#define RE_PROP_ZS 12 +#define RE_PROP_ZL 13 +#define RE_PROP_ZP 14 +#define RE_PROP_CC 15 +#define RE_PROP_CF 16 +#define RE_PROP_CO 17 +#define RE_PROP_CS 18 +#define RE_PROP_PD 19 +#define RE_PROP_PS 20 +#define RE_PROP_PE 21 +#define RE_PROP_PC 22 +#define RE_PROP_PO 23 +#define RE_PROP_SM 24 +#define RE_PROP_SC 25 +#define RE_PROP_SK 26 +#define RE_PROP_SO 27 +#define RE_PROP_PI 28 +#define RE_PROP_PF 29 + +#define RE_PROP_C_MASK 0x00078001 +#define RE_PROP_L_MASK 0x0000003E +#define RE_PROP_M_MASK 0x000001C0 +#define RE_PROP_N_MASK 0x00000E00 +#define RE_PROP_P_MASK 0x30F80000 +#define RE_PROP_S_MASK 0x0F000000 +#define RE_PROP_Z_MASK 0x00007000 + +#define RE_PROP_ALNUM 0x460001 +#define RE_PROP_ALPHA 0x070001 +#define RE_PROP_ANY 0x470001 +#define RE_PROP_ASCII 0x480001 +#define RE_PROP_ASSIGNED 0x490001 +#define RE_PROP_BLANK 0x4A0001 +#define RE_PROP_CNTRL 0x00000F +#define RE_PROP_DIGIT 0x000009 +#define RE_PROP_GRAPH 0x4B0001 +#define RE_PROP_LOWER 0x080001 +#define RE_PROP_PRINT 0x4C0001 +#define RE_PROP_PUNCT 0x000022 +#define RE_PROP_SPACE 0x190001 +#define RE_PROP_UPPER 0x090001 +#define RE_PROP_WORD 0x4D0001 +#define RE_PROP_XDIGIT 0x4E0001 + +#define RE_BREAK_OTHER 0 +#define RE_BREAK_DOUBLEQUOTE 1 +#define RE_BREAK_SINGLEQUOTE 2 +#define RE_BREAK_HEBREWLETTER 3 +#define RE_BREAK_CR 4 +#define RE_BREAK_LF 5 +#define RE_BREAK_NEWLINE 6 +#define RE_BREAK_EXTEND 7 +#define RE_BREAK_REGIONALINDICATOR 8 +#define RE_BREAK_FORMAT 9 +#define RE_BREAK_KATAKANA 10 +#define RE_BREAK_ALETTER 11 +#define RE_BREAK_MIDLETTER 12 +#define RE_BREAK_MIDNUM 13 +#define RE_BREAK_MIDNUMLET 14 +#define RE_BREAK_NUMERIC 15 +#define RE_BREAK_EXTENDNUMLET 16 + +#define RE_GBREAK_OTHER 0 +#define RE_GBREAK_CR 1 +#define RE_GBREAK_LF 2 +#define RE_GBREAK_CONTROL 3 +#define RE_GBREAK_EXTEND 4 +#define RE_GBREAK_REGIONALINDICATOR 5 +#define RE_GBREAK_SPACINGMARK 6 +#define RE_GBREAK_L 7 +#define RE_GBREAK_V 8 +#define RE_GBREAK_T 9 +#define RE_GBREAK_LV 10 +#define RE_GBREAK_LVT 11 +#define RE_GBREAK_PREPEND 12 + +extern char* re_strings[1155]; +extern RE_Property re_properties[145]; +extern RE_PropertyValue re_property_values[1244]; +extern RE_UINT16 re_expand_on_folding[104]; +extern RE_GetPropertyFunc re_get_property[79]; + +RE_UINT32 re_get_general_category(RE_UINT32 ch); +RE_UINT32 re_get_block(RE_UINT32 ch); +RE_UINT32 re_get_script(RE_UINT32 ch); +RE_UINT32 re_get_word_break(RE_UINT32 ch); +RE_UINT32 re_get_grapheme_cluster_break(RE_UINT32 ch); +RE_UINT32 re_get_sentence_break(RE_UINT32 ch); +RE_UINT32 re_get_math(RE_UINT32 ch); +RE_UINT32 re_get_alphabetic(RE_UINT32 ch); +RE_UINT32 re_get_lowercase(RE_UINT32 ch); +RE_UINT32 re_get_uppercase(RE_UINT32 ch); +RE_UINT32 re_get_cased(RE_UINT32 ch); +RE_UINT32 re_get_case_ignorable(RE_UINT32 ch); +RE_UINT32 re_get_changes_when_lowercased(RE_UINT32 ch); +RE_UINT32 re_get_changes_when_uppercased(RE_UINT32 ch); +RE_UINT32 re_get_changes_when_titlecased(RE_UINT32 ch); +RE_UINT32 re_get_changes_when_casefolded(RE_UINT32 ch); +RE_UINT32 re_get_changes_when_casemapped(RE_UINT32 ch); +RE_UINT32 re_get_id_start(RE_UINT32 ch); +RE_UINT32 re_get_id_continue(RE_UINT32 ch); +RE_UINT32 re_get_xid_start(RE_UINT32 ch); +RE_UINT32 re_get_xid_continue(RE_UINT32 ch); +RE_UINT32 re_get_default_ignorable_code_point(RE_UINT32 ch); +RE_UINT32 re_get_grapheme_extend(RE_UINT32 ch); +RE_UINT32 re_get_grapheme_base(RE_UINT32 ch); +RE_UINT32 re_get_grapheme_link(RE_UINT32 ch); +RE_UINT32 re_get_white_space(RE_UINT32 ch); +RE_UINT32 re_get_bidi_control(RE_UINT32 ch); +RE_UINT32 re_get_join_control(RE_UINT32 ch); +RE_UINT32 re_get_dash(RE_UINT32 ch); +RE_UINT32 re_get_hyphen(RE_UINT32 ch); +RE_UINT32 re_get_quotation_mark(RE_UINT32 ch); +RE_UINT32 re_get_terminal_punctuation(RE_UINT32 ch); +RE_UINT32 re_get_other_math(RE_UINT32 ch); +RE_UINT32 re_get_hex_digit(RE_UINT32 ch); +RE_UINT32 re_get_ascii_hex_digit(RE_UINT32 ch); +RE_UINT32 re_get_other_alphabetic(RE_UINT32 ch); +RE_UINT32 re_get_ideographic(RE_UINT32 ch); +RE_UINT32 re_get_diacritic(RE_UINT32 ch); +RE_UINT32 re_get_extender(RE_UINT32 ch); +RE_UINT32 re_get_other_lowercase(RE_UINT32 ch); +RE_UINT32 re_get_other_uppercase(RE_UINT32 ch); +RE_UINT32 re_get_noncharacter_code_point(RE_UINT32 ch); +RE_UINT32 re_get_other_grapheme_extend(RE_UINT32 ch); +RE_UINT32 re_get_ids_binary_operator(RE_UINT32 ch); +RE_UINT32 re_get_ids_trinary_operator(RE_UINT32 ch); +RE_UINT32 re_get_radical(RE_UINT32 ch); +RE_UINT32 re_get_unified_ideograph(RE_UINT32 ch); +RE_UINT32 re_get_other_default_ignorable_code_point(RE_UINT32 ch); +RE_UINT32 re_get_deprecated(RE_UINT32 ch); +RE_UINT32 re_get_soft_dotted(RE_UINT32 ch); +RE_UINT32 re_get_logical_order_exception(RE_UINT32 ch); +RE_UINT32 re_get_other_id_start(RE_UINT32 ch); +RE_UINT32 re_get_other_id_continue(RE_UINT32 ch); +RE_UINT32 re_get_sterm(RE_UINT32 ch); +RE_UINT32 re_get_variation_selector(RE_UINT32 ch); +RE_UINT32 re_get_pattern_white_space(RE_UINT32 ch); +RE_UINT32 re_get_pattern_syntax(RE_UINT32 ch); +RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 ch); +RE_UINT32 re_get_bidi_class(RE_UINT32 ch); +RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch); +RE_UINT32 re_get_decomposition_type(RE_UINT32 ch); +RE_UINT32 re_get_east_asian_width(RE_UINT32 ch); +RE_UINT32 re_get_joining_group(RE_UINT32 ch); +RE_UINT32 re_get_joining_type(RE_UINT32 ch); +RE_UINT32 re_get_line_break(RE_UINT32 ch); +RE_UINT32 re_get_numeric_type(RE_UINT32 ch); +RE_UINT32 re_get_numeric_value(RE_UINT32 ch); +RE_UINT32 re_get_bidi_mirrored(RE_UINT32 ch); +RE_UINT32 re_get_indic_matra_category(RE_UINT32 ch); +RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch); +RE_UINT32 re_get_alphanumeric(RE_UINT32 ch); +RE_UINT32 re_get_any(RE_UINT32 ch); +RE_UINT32 re_get_ascii(RE_UINT32 ch); +RE_UINT32 re_get_assigned(RE_UINT32 ch); +RE_UINT32 re_get_blank(RE_UINT32 ch); +RE_UINT32 re_get_graph(RE_UINT32 ch); +RE_UINT32 re_get_print(RE_UINT32 ch); +RE_UINT32 re_get_word(RE_UINT32 ch); +RE_UINT32 re_get_xdigit(RE_UINT32 ch); +int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints); +RE_UINT32 re_get_simple_case_folding(RE_UINT32 ch); +int re_get_full_case_folding(RE_UINT32 ch, RE_UINT32* codepoints);