Skip to content

Commit

Permalink
fix: modernize rest lexer
Browse files Browse the repository at this point in the history
References: orbitalquark#76
  • Loading branch information
mcepl committed Jan 16, 2025
1 parent 1642cfe commit ca2e3fd
Showing 1 changed file with 31 additions and 41 deletions.
72 changes: 31 additions & 41 deletions lexers/rest.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
-- reStructuredText LPeg lexer.

local lexer = lexer
local token, word_match, starts_line = lexer.token, lexer.word_match, lexer.starts_line
local token, starts_line = lexer.token, lexer.starts_line
local P, S = lpeg.P, lpeg.S

local lex = lexer.new(...)
Expand All @@ -19,8 +19,7 @@ local block = '::' * (lexer.newline + -1) * function(input, index)
end
return #input + 1
end
lex:add_rule('literal_block', token('literal_block', block))
lex:add_style('literal_block', lexer.styles.embedded .. {eolfilled = true})
lex:add_rule('literal_block', lex:tag('literal_block', block))

-- Lists.
local option_word = lexer.alnum * (lexer.alnum + '-')^0
Expand All @@ -31,7 +30,7 @@ local bullet_list = S('*+-') -- TODO: '•‣⁃', as lpeg does not support UTF-
local enum_list = P('(')^-1 * (lexer.digit^1 + S('ivxlcmIVXLCM')^1 + lexer.alnum + '#') * S('.)')
local field_list = ':' * (lexer.any - ':')^1 * P(':')^-1
lex:add_rule('list', #(lexer.space^0 * (S('*+-:/') + enum_list)) *
starts_line(token(lexer.LIST,
starts_line(lex:tag(lexer.LIST,
lexer.space^0 * (option_list + bullet_list + enum_list + field_list) * lexer.space)))

local any_indent = S(' \t')^0
Expand All @@ -40,15 +39,12 @@ local prefix = any_indent * '.. '

-- Explicit markup blocks.
local footnote_label = '[' * (lexer.digit^1 + '#' * word^-1 + '*') * ']'
local footnote = token('footnote_block', prefix * footnote_label * lexer.space)
local footnote = lex:tag('footnote_block', prefix * footnote_label * lexer.space)
local citation_label = '[' * word * ']'
local citation = token('citation_block', prefix * citation_label * lexer.space)
local link = token('link_block', prefix * '_' *
local citation = lex:tag('citation_block', prefix * citation_label * lexer.space)
local link = lex:tag('link_block', prefix * '_' *
(lexer.range('`') + (P('\\') * 1 + lexer.nonnewline - ':')^1) * ':' * lexer.space)
lex:add_rule('markup_block', #prefix * starts_line(footnote + citation + link))
lex:add_style('footnote_block', lexer.styles.label)
lex:add_style('citation_block', lexer.styles.label)
lex:add_style('link_block', lexer.styles.label)

-- Sphinx code block.
local indented_block = function(input, index)
Expand All @@ -61,11 +57,11 @@ local indented_block = function(input, index)
end
local code_block =
prefix * 'code-block::' * S(' \t')^1 * lexer.nonnewline^0 * (lexer.newline + -1) * indented_block
lex:add_rule('code_block', #prefix * token('code_block', starts_line(code_block)))
lex:add_style('code_block', lexer.styles.embedded .. {eolfilled = true})
lex:add_rule('code_block', #prefix * lex:tag('code_block', starts_line(code_block)))

-- Directives.
local known_directive = token('directive', prefix * word_match{
local known_directive = lex:tag('directive', prefix * lex:word_match('directive') * '::' * lexer.space)
lex:set_word_list('directive', {
-- Admonitions
'attention', 'caution', 'danger', 'error', 'hint', 'important', 'note', 'tip', 'warning',
'admonition',
Expand All @@ -86,8 +82,9 @@ local known_directive = token('directive', prefix * word_match{
'replace', 'unicode', 'date',
-- Miscellaneous
'include', 'raw', 'class', 'role', 'default-role', 'title', 'restructuredtext-test-directive'
} * '::' * lexer.space)
local sphinx_directive = token('sphinx_directive', prefix * word_match{
})
local sphinx_directive = lex:tag('sphinx_directive', prefix * lex:word_match('sphinx_directive') * '::' * lexer.space)
lex:set_word_list('sphinx_directive', {
-- The TOC tree.
'toctree',
-- Paragraph-level markup.
Expand All @@ -97,24 +94,20 @@ local sphinx_directive = token('sphinx_directive', prefix * word_match{
'highlight', 'literalinclude',
-- Miscellaneous
'sectionauthor', 'index', 'only', 'tabularcolumns'
} * '::' * lexer.space)
local unknown_directive = token('unknown_directive', prefix * word * '::' * lexer.space)
})
local unknown_directive = lex:tag('unknown_directive', prefix * word * '::' * lexer.space)
lex:add_rule('directive',
#prefix * starts_line(known_directive + sphinx_directive + unknown_directive))
lex:add_style('directive', lexer.styles.keyword)
lex:add_style('sphinx_directive', lexer.styles.keyword .. {bold = true})
lex:add_style('unknown_directive', lexer.styles.keyword .. {italics = true})

-- Substitution definitions.
lex:add_rule('substitution', #prefix * token('substitution', starts_line(prefix * lexer.range('|') *
lex:add_rule('substitution', #prefix * lex:tag('substitution', starts_line(prefix * lexer.range('|') *
lexer.space^1 * word * '::' * lexer.space)))
lex:add_style('substitution', lexer.styles.variable)

-- Comments.
local line_comment = lexer.to_eol(prefix)
local bprefix = any_indent * '..'
local block_comment = bprefix * lexer.newline * indented_block
lex:add_rule('comment', #bprefix * token(lexer.COMMENT, starts_line(line_comment + block_comment)))
lex:add_rule('comment', #bprefix * lex:tag(lexer.COMMENT, starts_line(line_comment + block_comment)))

-- Section titles (2 or more characters).
local adornment_chars = lpeg.C(S('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'))
Expand All @@ -135,35 +128,32 @@ local underline = lpeg.Cmt(starts_line(adornment), function(_, index, adm, c)
return pos and index - #adm + pos - 1 or nil
end)
-- Token needs to be a predefined one in order for folder to work.
lex:add_rule('title', token(lexer.HEADING, overline + underline))
lex:add_rule('title', lex:tag(lexer.HEADING, overline + underline))

-- Line block.
lex:add_rule('line_block_char', token(lexer.OPERATOR, starts_line(any_indent * '|')))
lex:add_rule('line_block_char', lex:tag(lexer.OPERATOR, starts_line(any_indent * '|')))

-- Inline markup.
local strong = token(lexer.BOLD, lexer.range('**'))
local em = token(lexer.ITALIC, lexer.range('*'))
local inline_literal = token('inline_literal', lexer.range('``'))
local strong = lex:tag(lexer.BOLD, lexer.range('**'))
local em = lex:tag(lexer.ITALIC, lexer.range('*'))
local inline_literal = lex:tag('inline_literal', lexer.range('``'))
local postfix_link = (word + lexer.range('`')) * '_' * P('_')^-1
local prefix_link = '_' * lexer.range('`')
local link_ref = token(lexer.LINK, postfix_link + prefix_link)
local role = token('role', ':' * word * ':' * (word * ':')^-1)
local interpreted = role^-1 * token('interpreted', lexer.range('`')) * role^-1
local footnote_ref = token(lexer.REFERENCE, footnote_label * '_')
local citation_ref = token(lexer.REFERENCE, citation_label * '_')
local substitution_ref = token('substitution', lexer.range('|', true) * ('_' * P('_')^-1)^-1)
local link = token(lexer.LINK,
local link_ref = lex:tag(lexer.LINK, postfix_link + prefix_link)
local role = lex:tag('role', ':' * word * ':' * (word * ':')^-1)
local interpreted = role^-1 * lex:tag('interpreted', lexer.range('`')) * role^-1
local footnote_ref = lex:tag(lexer.REFERENCE, footnote_label * '_')
local citation_ref = lex:tag(lexer.REFERENCE, citation_label * '_')
local substitution_ref = lex:tag('substitution', lexer.range('|', true) * ('_' * P('_')^-1)^-1)
local link = lex:tag(lexer.LINK,
lexer.alpha * (lexer.alnum + S('-.'))^1 * ':' * (lexer.alnum + S('/.+-%@'))^1)
lex:add_rule('inline_markup',
(strong + em + inline_literal + link_ref + interpreted + footnote_ref + citation_ref +
substitution_ref + link) * -lexer.alnum)
lex:add_style('inline_literal', lexer.styles.embedded)
lex:add_style('role', lexer.styles.class)
lex:add_style('interpreted', lexer.styles.string)

-- Other.
lex:add_rule('non_space', token(lexer.DEFAULT, lexer.alnum * (lexer.any - lexer.space)^0))
lex:add_rule('escape', token(lexer.DEFAULT, '\\' * lexer.any))
lex:add_rule('non_space', lex:tag(lexer.DEFAULT, lexer.alnum * (lexer.any - lexer.space)^0))
lex:add_rule('escape', lex:tag(lexer.DEFAULT, '\\' * lexer.any))

-- Section-based folding.
local sphinx_levels = {
Expand Down Expand Up @@ -202,7 +192,7 @@ local bash = lexer.load('bash')
local bash_indent_level
local start_rule =
#(prefix * 'code-block' * '::' * lexer.space^1 * 'bash' * (lexer.newline + -1)) *
sphinx_directive * token('bash_begin', P(function(input, index)
sphinx_directive * lex:tag('bash_begin', P(function(input, index)
bash_indent_level = #input:match('^([ \t]*)', index)
return index
end))]]
Expand Down

0 comments on commit ca2e3fd

Please sign in to comment.