From 3b81fffca09f4932540ae51786f379a8ee76759d Mon Sep 17 00:00:00 2001 From: Asko Soukka Date: Tue, 5 Mar 2019 20:12:18 +0200 Subject: [PATCH] Merge https://github.com/gtri/irobotframework/pull/1 into robotmode --- src/jupyterlab_robotmode/src/mode.ts | 415 +++++++++++++++++---------- 1 file changed, 264 insertions(+), 151 deletions(-) diff --git a/src/jupyterlab_robotmode/src/mode.ts b/src/jupyterlab_robotmode/src/mode.ts index cc00c3a..c28fe61 100644 --- a/src/jupyterlab_robotmode/src/mode.ts +++ b/src/jupyterlab_robotmode/src/mode.ts @@ -22,6 +22,56 @@ import 'codemirror/addon/mode/simple'; import * as CodeMirror from 'codemirror'; +/** All the possible states: pushing non-existing states == bad */ +export type TMainState = 'test_cases' | 'keywords' | 'settings' | 'variables'; +export type TState = + | TMainState + | 'double_string' + | 'keyword_def' + | 'keyword_invocation' + | 'keyword_invocation_no_continue' + | 'library' + | 'loop_body_old' + | 'loop_start_new' + | 'loop_start_old' + | 'single_string' + | 'start' + | 'tags' + | 'tags_comma' + | 'variable_index' + | 'variable_property' + | 'variable'; + +/** the tokens we use */ +export enum TT { + AM = 'atom', + AT = 'attribute', + BE = 'builtin.em', + BI = 'builtin', + BK = 'bracket', + CM = 'comment', + DF = 'def', + HL = 'header.link', + KW = 'keyword', + MT = 'meta', + NB = 'number', + OP = 'operator', + PC = 'punctuation', + PR = 'property', + SE = 'string.em', + SH = 'string.header', + SS = 'string.strong', + SSE = 'string.strong.em', + S2 = 'string-2', + ST = 'string', + TG = 'tag', + V2 = 'variable-2' +} + +export function LINK(token: TT): TT { + return (token + '.link') as any; +} + /** An implementation of the CodeMirror simple mode object @@ -31,13 +81,13 @@ interface ISimpleState { /** The regular expression that matches the token. May be a string or a regex object. When a regex, the ignoreCase flag will be taken into account when matching the token. This regex has to capture groups when the token property is an array. If it captures groups, it must capture all of the string (since JS provides no way to find out where a group matched). */ regex: string | RegExp; /// An optional token style. Multiple styles can be specified by separating them with dots or spaces. When this property holds an array of token styles, the regex for this rule must capture a group for each array item. - token?: string | string[] | null; + token?: TT | TT[] | null; /// When true, this token will only match at the start of the line. (The ^ regexp marker doesn't work as you'd expect in this context because of limitations in JavaScript's RegExp API.) sol?: boolean; /// When a next property is present, the mode will transfer to the state named by the property when the token is encountered. - next?: string; + next?: TState; /// Like next, but instead replacing the current state by the new state, the current state is kept on a stack, and can be returned to with the pop directive. - push?: string; + push?: TState; /// When true, and there is another state on the state stack, will cause the mode to pop that state off the stack and transition to it. pop?: boolean; /// Can be used to embed another mode inside a mode. When present, must hold an object with a spec property that describes the embedded mode, and an optional end end property that specifies the regexp that will end the extent of the mode. When a persistent property is set (and true), the nested mode's state will be preserved between occurrences of the mode. @@ -55,21 +105,19 @@ interface ISimpleState { } /** A string-keyed set of simple state lists */ -export interface IStates { - [key: string]: ISimpleState[]; -} +export type IStates = { [key in TState]: ISimpleState[] }; /** helper function for compactly representing a rule */ function r( regex: RegExp, - token?: string | string[], + token?: TT | TT[], opt?: Partial ): ISimpleState { return { regex, token, ...opt }; } /** Possible Robot Framework table names. Group count is important. */ -const TABLE_NAMES: { [key: string]: RegExp } = { +const TABLE_NAMES: { [key in TMainState]: RegExp } = { keywords: /(\|\s)?(\*+ *)(user keywords?|keywords?)( *\**)/i, settings: /(\|\s)?(\*+ *)(settings?)( *\**)/i, test_cases: /(\|\s)?(\*+ *)(tasks?|test cases?)( *\**)/i, @@ -77,23 +125,14 @@ const TABLE_NAMES: { [key: string]: RegExp } = { }; /** Enumerate the possible rules */ -const RULES_TABLE: ISimpleState[] = Object.keys(TABLE_NAMES).map( - (next: string) => { - return r( - TABLE_NAMES[next], - ['bracket', 'header', 'header', 'header'], - { - next, - sol: true - } - ); - } -); +const RULES_TABLE = Object.keys(TABLE_NAMES).map((next: TMainState) => { + return r(TABLE_NAMES[next], [TT.BK, TT.HL, TT.HL, TT.HL], { + next, + sol: true + }); +}); -/** Pattern to match the start of a variable */ -const VAR_START = /[\$&@%]\{/; -/** Pattern to match the end of a variable */ -const VAR_END = /\}/; +const RULE_COMMENT_POP = r(/#.*$/, TT.CM, { pop: true }); /** Valid python operators */ const VAR_OP = /[*\-+\\%&|=> = {}; /** base isn't a state. these are the "normal business" that any state might use */ const base = [ ...RULES_TABLE, RULE_VAR_START, RULE_VAR_END, - r(/\|/, 'bracket'), - r(/#.*$/, 'comment'), - r(/\\ +/, 'bracket'), - r(/\\(?=$)/, 'bracket'), - r(/([^\s=]*)(=)/, ['attribute', 'operator']), - r(/^(?!http|https)([^\s:]*)(:)/, ['attribute', 'operator']), - r(/_\*.*?\*_/, 'string.strong.em'), - r(/\*.*?\*/, 'string.strong'), - r(/\_.*?\_/, 'string.em'), + RULE_DOC_TAGS, + RULE_ELLIPSIS, + r(/\|/, TT.BK), + r(/#.*$/, TT.CM), + r(/\\ +/, TT.BK), + r(/\\(?=$)/, TT.BK), + r( + // a non-variable argument fragment before a variable before an equal + /([^\s\$@&%=]((?!\t+|\s+\|\s+| +)([^=]|\\=))*?)(?=[$@&%].*?[^ =\\]=($| |[^=]|\s+\||\t))/, + TT.AT + ), + r( + // a non-variable argument fragment before an equal + /([^\s\$@&%=]((?!\t+|\s+\|\s+| +)([^=]|\\=))*?)(?==($| |[^=]|\s+\||\t))/, + TT.AT + ), + r(/^(?!http|https)([^\s:]*)(:)/, TT.OP), + r(/(=!<>+\-*\/%)*==?/, TT.OP), + r(/_\*.*?\*_/, TT.SSE), + r(/\*.*?\*/, TT.SS), + r(/\_.*?\_/, TT.SE), // this is pretty extreme, but seems to work - r(/[^\s\$]+/, 'string') + r(/[^\s\$@%&]+/, TT.ST), + r(/[\$@%&](?!\{)/, TT.ST) ]; /** the starting state (begining of a file) */ states.start = [ - r(/(%%python)( module )?(.*)?/, ['meta', 'keyword', 'variable'], { + r(/(%%python)( module )?(.*)?/, [TT.MT, TT.KW, TT.V2], { mode: { spec: 'ipython' }, sol: true }), - r(/(%%[^\s]*).*$/, 'meta', { sol: true }), + r(/(%%[^\s]*).*$/, TT.MT, { sol: true }), ...base ]; @@ -213,26 +280,31 @@ states.settings = [ RULE_SETTING_LIBRARY_PIPE, r( /(\|*\s*)(resource|variables|documentation|metadata|test timeout|task timeout)(\s*)/i, - ['bracket', 'meta', null], + [TT.BK, TT.MT, null], { sol: true } ), ...base ]; states.library = [ + RULE_NOT_ELLIPSIS_POP, + RULE_ELLIPSIS, RULE_LINE_ENDS_WITH_VAR, - r(/WITH NAME$/, 'atom', { pop: true }), - r(/WITH NAME/, 'atom'), - r(/[^\}\|\s]*$/, 'string', { pop: true }), + r( + /(WITH NAME)(\t+| +| +\| +)([^\|\s]*)(\s*)(\|?)(\s*)(?=$)/, + [TT.AM, TT.BK, TT.DF, null, TT.BK, null], + { + pop: true + } + ), + // r(/[^\}\|\s]*$/, TT.ST, { pop: true }), ...base ]; -const RULE_ELLIPSIS = r(/(\s*)(\.\.\.)/, [null, 'bracket']); - /** rule for behavior-driven-development keywords */ const RULE_START_BDD = r( /(\|\s*\|\s*|\s\s+)?(given|when|then|and|but)/i, - ['bracket', 'builtin.em'], + [TT.BK, TT.BE], { push: 'keyword_invocation', sol: true @@ -244,14 +316,18 @@ const RULE_KEY_START = r(/(\t+| +)(?!\.\.\.)/, null, { sol: true }); /** rule for pipe keywords */ -const RULE_KEY_START_PIPE = r(/(\|\s*\|)(\s+)/, ['bracket', null], { - push: 'keyword_invocation', - sol: true -}); +const RULE_KEY_START_PIPE = r( + /(\| )(\s*)(|[^\|\s][^\|]*)(\s*)( \|)(\s+)/, + [TT.BK, null, TT.SH, null, TT.BK, null], + { + push: 'keyword_invocation', + sol: true + } +); /** rule for for old-style loops (slashes) */ const RULE_START_LOOP_OLD = r( /(\s\|*\s*)(:FOR)(\s\|*\s*)/, - [null, 'atom', null], + [null, TT.AM, null], { push: 'loop_start_old', sol: true @@ -260,26 +336,39 @@ const RULE_START_LOOP_OLD = r( /** rule for for new-style loops (slashes) */ const RULE_START_LOOP_NEW = r( /(\s\|*\s*)(FOR)(\s\|*\s*)/, - [null, 'atom', null], + [null, TT.AM, null], { push: 'loop_start_new', sol: true } ); -/** rules for capturing individual tags */ -states.tags = [ - r(/\s\|\s*/, 'bracket'), - r(/^($|\n)/, null, { pop: true }), +const RULES_TAGS_COMMON = [ + r(/\s\|\s*/, TT.BK), + RULE_COMMENT_POP, + RULE_ELLIPSIS, + RULE_NOT_ELLIPSIS_POP, RULE_VAR_START, RULE_LINE_ENDS_WITH_VAR, RULE_VAR_END, - r(/^\s*(?=$)/, null, { pop: true }), - r(/ +/, null), - r(/[^\$&%@]*?(?=( +| \|))/, 'tag'), - r(/[^\$&%@]*?(?=\s*\|?$)/, 'tag', { pop: true }), + r(/ +/, null) +]; + +/** rules for capturing individual tags */ +states.tags = [ + ...RULES_TAGS_COMMON, + r(/[^\$&%@]*?(?=( +| \|))/, TT.TG), + // fall back to single char + r(/[^\$&%@|]/, TT.TG) +]; + +/** rules for capturing tags inside docs */ +states.tags_comma = [ + ...RULES_TAGS_COMMON, + r(/(,)(\s*)/, [TT.PC, null]), + r(/[^\$&%@,]+(?=,$)/, TT.TG), // fall back to single char - r(/[^\$&%@|]/, 'tag') + r(/[^\$&%@|,]/, TT.TG) ]; /** need to catch empty white lines pretty explicitly */ @@ -290,20 +379,29 @@ const RULES_KEYWORD_INVOKING = [ RULE_START_BDD, RULE_KEY_START_PIPE, RULE_KEY_START, - r(/\|\s(?=[^\s]*\|)/, null, { sol: true, push: 'keyword_invocation' }), - r(/(?=[^\s])/, null, { sol: true, push: 'keyword_invocation' }) + r(/\|\s(?=[^\s*]*\|)/, null, { sol: true, push: 'keyword_invocation' }), + r(/(?=[^\s*])/, null, { sol: true, push: 'keyword_invocation' }) ]; +const RULE_SETTING_SIMPLE = r( + /(\t+| +)(\[\s*)(arguments|documentation|return|timeout)(\s*\])(\s*)/i, + [null, TT.MT, TT.MT, TT.MT, null], + { sol: true } +); + +const RULE_SETTING_SIMPLE_PIPE = r( + /(\|)(\s+)([^|*]*)(\s+)(\|)(\s+)(\[\s*)(arguments|documentation|return|timeout)(\s*\])(\s*)(\|?)/i, + [TT.BK, null, TT.SH, null, TT.BK, null, TT.MT, TT.MT, TT.MT, null, TT.BK], + { sol: true } +); + /** rules for data rows inside a keyword table */ states.keywords = [ RULE_ELLIPSIS, RULE_TAGS, RULE_SETTING_KEYWORD, - r( - /([\|\s]*\s*)(\[\s*)(arguments|documentation|return|timeout)(\s*\])(\s*\|?)/i, - ['bracket', 'meta', 'meta', 'meta', 'bracket'], - { sol: true } - ), + RULE_SETTING_SIMPLE, + RULE_SETTING_SIMPLE_PIPE, r(/(?=[^\s$&%@*|]+)/, null, { sol: true, push: 'keyword_def' }), RULE_START_LOOP_OLD, RULE_START_LOOP_NEW, @@ -315,35 +413,35 @@ states.keywords = [ /** a keyword name fragment before an inline variable */ const KEYWORD_WORD_BEFORE_VAR = /([^\s]*?(?=[\$&%@]\{))/i; /** a keyword name fragment before a separator */ -const KEYWORD_WORD_BEFORE_SEP = /[^\s\|]+(?=$|[|]|\t)/; +const KEYWORD_WORD_BEFORE_SEP = /[^\s\|]+(?=$|[|]|\t| +)/; /** a keyword name fragment before a non-separator whitespace character */ const KEYWORD_WORD_BEFORE_WS = /([^\n\$\s*=\|]+?(?= ))/i; states.keyword_def = [ RULE_VAR_START, - r(/\}(?=$)/, 'variable-2'), + RULE_LINE_ENDS_WITH_VAR, RULE_VAR_END, r(/ /, null), - r(KEYWORD_WORD_BEFORE_VAR, 'def'), - r(KEYWORD_WORD_BEFORE_SEP, 'def', { pop: true }), - r(KEYWORD_WORD_BEFORE_WS, 'def'), + r(KEYWORD_WORD_BEFORE_VAR, TT.DF), + r(KEYWORD_WORD_BEFORE_SEP, TT.DF, { pop: true }), + r(KEYWORD_WORD_BEFORE_WS, TT.DF), r(/(?=$)/, null, { sol: true, pop: true }) ]; /** A range as used in for loops */ const RULE_RANGE = r(/([\|\s]*\s*)(IN)( RANGE| ENUMERATE| ZIP)?/, [ null, - 'atom', - 'atom' + TT.AM, + TT.AM ]); states.loop_start_new = [ RULE_RANGE, - r(/[.]{3}/, 'bracket'), + r(/[.]{3}/, TT.BK), RULE_VAR_START, - r(/\}(?=$)/, 'variable-2'), + r(/\}(?=$)/, TT.V2), RULE_VAR_END, - r(/([\|\s]*\s*)(END)/, [null, 'atom'], { sol: true, pop: true }), + r(/([\|\s]*\s*)(END)/, [null, TT.AM], { sol: true, pop: true }), RULE_WS_LINE, ...RULES_KEYWORD_INVOKING, ...base @@ -353,7 +451,7 @@ states.loop_start_old = [ r(/(?=.*)/, null, { sol: true, next: 'loop_body_old' }), RULE_RANGE, RULE_VAR_START, - r(/\}(?=$)/, 'variable-2'), + r(/\}(?=$)/, TT.V2), RULE_VAR_END, ...base ]; @@ -368,73 +466,88 @@ states.loop_body_old = [ ), token: rule.token instanceof Array - ? [null, 'bracket', ...rule.token] - : [null, 'bracket', rule.token] + ? [null, TT.BK, ...rule.token] + : [null, TT.BK, rule.token] }; }), r(/(?=\s+[^\\])/, null, { pop: true, sol: true }), ...base ]; +const RULE_CASE_SETTING_SIMPLE = r( + /(\t+| +)(\[\s*)(documentation|timeout)(\s*\])(\s*)/i, + [null, TT.MT, TT.MT, TT.MT, null], + { sol: true } +); + +const RULE_CASE_SETTING_SIMPLE_PIPE = r( + /(\|)(\s+)([^|*]*)(\s+)(\|)(\s+)(\[\s*)(documentation|timeout)(\s*\])(\s*)(\|?)/i, + [TT.BK, null, TT.SH, null, TT.BK, null, TT.MT, TT.MT, TT.MT, null, TT.BK], + { sol: true } +); + /** rules for data rows inside test/task definition */ states.test_cases = [ + ...RULES_TABLE, + RULE_WS_LINE, RULE_ELLIPSIS, RULE_TAGS, RULE_SETTING_KEYWORD, - r( - /([\|\s]*\s*)(\[\s*)(documentation|timeout)(\s*\])/i, - ['bracket', 'meta', 'meta', 'meta'], - { sol: true } - ), + RULE_CASE_SETTING_SIMPLE, + RULE_CASE_SETTING_SIMPLE_PIPE, RULE_START_LOOP_OLD, RULE_START_LOOP_NEW, - r( - /(\|\s+)([^\s*\|\.][^\|]*?)(\s*)(\|?$)/, - ['bracket', 'string.header', 'bracket'], - { - sol: true - } - ), - r(/(\| +)(.+?)( \| )/, ['bracket', 'string.header', 'bracket'], { + r(/([^|\s*].+?)(?=(\t| +|$))/, TT.SH, { sol: true }), + ...RULES_KEYWORD_INVOKING, + r(/(\|\s+)([^\s*\|\.][^\|]*?)(\s*)(\|?$)/, [TT.BK, TT.SH, TT.BK], { + sol: true + }), + r(/(\| +)([^\|\s].+?)(\s*)( \| )/, [TT.BK, TT.SH, null, TT.BK], { sol: true }), - r(/([^|\s*].+$)/, 'string.header', { sol: true }), - RULE_WS_LINE, - ...RULES_KEYWORD_INVOKING, ...base ]; /** rules for inside of an invoked keyword instance */ states.keyword_invocation = [ + r(/( ?)(=)(\t+| +|\s+\|)/, [null, TT.OP, null]), r(/(?=\s*$)/, null, { pop: true }), - r(/(\\|\.\.\.) +/, 'bracket', { pop: true }), + r(/(\\|\.\.\.) +/, TT.BK, { pop: true }), RULE_VAR_START, RULE_LINE_ENDS_WITH_VAR, RULE_VAR_END, - r(/#.*$/, 'comment', { pop: true }), - r(/( \| | +)/, 'bracket', { pop: true }), - r(/ ?=( +)/, 'operator'), + RULE_COMMENT_POP, + r(/( \| | +|\t+)(?=[$@&])/, TT.BK), + r(/( \| | +|\t+)/, TT.BK, { pop: true }), r(/ /, null), - r(KEYWORD_WORD_BEFORE_VAR, 'keyword'), - r(KEYWORD_WORD_BEFORE_SEP, 'keyword', { pop: true }), - r(KEYWORD_WORD_BEFORE_WS, 'keyword'), + r(KEYWORD_WORD_BEFORE_VAR, TT.KW, { pop: true }), + r(KEYWORD_WORD_BEFORE_SEP, TT.KW, { pop: true }), + r(KEYWORD_WORD_BEFORE_WS, TT.KW), ...base ]; +states.keyword_invocation_no_continue = [ + RULE_NOT_ELLIPSIS_POP, + ...states.keyword_invocation +]; + /** curious rule for the variables table */ states.variables = [...base]; /** rules for inside of a variable reference */ states.variable = [ RULE_VAR_START, - r(VAR_BUILTIN, 'builtin'), + r(VAR_BUILTIN, TT.BI), RULE_NUM, - r(VAR_OP, 'operator'), - r(/\./, 'operator', { push: 'variable_property' }), - r(/\[/, 'bracket', { next: 'variable_index' }), - r(/\}(?=\[)/, 'variable-2'), - r(/[^}\n$]/, 'variable-2'), - r(/^(?=\})/, 'variable-2', { pop: true }) + r(VAR_OP, TT.OP), + r(/(:)(.*?[^\\])(?=\}\s*$)/, [TT.OP, TT.S2], { pop: true }), + r(/(:)(.*?[^\\])(?=\})/, [TT.OP, TT.S2]), + r(/\./, TT.OP, { push: 'variable_property' }), + r(/\[/, TT.BK, { next: 'variable_index' }), + r(/\}(?=\[)/, TT.V2), + r(/(?=\}\s*$)/, null, { pop: true }), + r(/\}/, TT.V2, { pop: true }), + r(/[^\{\}\n:]/, TT.V2) ]; /** rules for extended syntax in a variable reference */ @@ -444,28 +557,28 @@ states.variable_property = [ RULE_NUM, RULE_SINGLE_STRING_START, RULE_DOUBLE_STRING_START, - r(VAR_OP, 'operator'), - r(/\(/, 'bracket'), - r(/\)/, 'bracket', { pop: true }), - r(/([a-z_][a-z_\d]*)(=)/i, ['variable-2', 'operator']), - r(/,/, 'punctuation'), - r(/[^}](?=\})/, 'property', { pop: true }), - r(/(^\})(\s*(?=$|\n))/, ['bracket', null], { pop: true }), - r(/^\t*(?=$|\n)/, null, { pop: true }), - r(/[^}]/, 'property') + r(VAR_OP, TT.OP), + r(/\(/, TT.BK), + r(/\)/, TT.BK, { pop: true }), + r(/([a-z_][a-z_\d]*)(=)/i, [TT.V2, TT.OP]), + r(/,/, TT.PC), + r(/[^}](?=\})/, TT.PR, { pop: true }), + r(/(\})(\s*(?=$|\n))/, [TT.BK, null], { pop: true }), + r(/\t*(?=$|\n)/, null, { pop: true }), + r(/[^}]/, TT.PR) ]; /** rules for strings with single quotes */ states.single_string = [ - r(/\\'/, 'string'), - r(/'/, 'string', { pop: true }), - r(/./, 'string') + r(/\\'/, TT.ST), + r(/'/, TT.ST, { pop: true }), + r(/./, TT.ST) ]; /** rules for strings with double quotes */ states.double_string = [ - r(/\\"/, 'string'), - r(/"/, 'string', { pop: true }), - r(/./, 'string') + r(/\\"/, TT.ST), + r(/"/, TT.ST, { pop: true }), + r(/./, TT.ST) ]; /** rules for square-bracketed index referencing */ @@ -473,12 +586,12 @@ states.variable_index = [ RULE_VAR_START, RULE_VAR_END, RULE_NUM, - r(/\[/, 'bracket'), - r(/\](?=\])/, 'bracket'), - r(/(\])(\})( ?=?)/, ['bracket', 'variable-2', 'operator'], { pop: true }), - r(/(\])(\[)/, 'bracket'), - r(/\]/, 'bracket', { pop: true }), - r(/[^\]]/, 'string') + r(/\[/, TT.BK), + r(/\](?=\])/, TT.BK), + r(/(\])(\})( ?=?)/, [TT.BK, TT.V2, TT.OP], { pop: true }), + r(/(\])(\[)/, TT.BK), + r(/\]/, TT.BK, { pop: true }), + r(/[^\]]/, TT.ST) ]; /** well-known mime type for robot framework (pygments, etc.) */ @@ -486,7 +599,7 @@ export const MIME_TYPE = 'text/x-robotframework'; /** the canonical CodeMirror mode name */ export const MODE_NAME = 'robotframework'; /** the human-readable name of the CodeMirror mode */ -export const MODE_LABEL = 'robotframework'; +export const MODE_LABEL = 'Robot Framework'; /** primary file extension */ export const DEFAULT_EXTENSION = 'robot'; /** all recognized file extensions */