-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenslex.mll
129 lines (111 loc) · 4.09 KB
/
genslex.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
{
[@@@ocaml.warning "-33"]
open Gensl
open Parsing
open Basetypes
open Parsetree
open ParserTypes
}
let digit = ['0'-'9']
let octal = ['0'-'7']
let hexalphabet = ['0'-'9' 'a'-'f' 'A'-'F']
let hexbyte = hexalphabet hexalphabet
let lowercase = ['a'-'z']
let uppercase = ['A'-'Z']
let alpha = lowercase | uppercase
let alphadigit = alpha | digit
let space = [' ' '\t' '\n']
let base64alphabet = alphadigit | ['+' '/' '=']
let base64digit = base64alphabet base64alphabet base64alphabet base64alphabet
(* per ocaml lexing rules *)
(* ref: https://caml.inria.fr/pub/docs/manual-ocaml/lex.html#sss:character-literals *)
(* XXX \o000 not supported now due to limits of Scanf.unescaped *)
(* XXX implement [unscape] in-house for oc/js compatibility *)
let escape = '\\' (['"' '\\' '\'' 'n' 'r' 't' 'b' ' '] |
(digit digit digit) |
('x' hexbyte)
(* | ('o' octal octal octal) *)
)
let instring = [^ '"' '\\'] | escape
let boolprefix = "b:" | "bool:"
let hexprefix = "hex:"
let base64prefix = "b64:" | "base64:"
let strbytesprefix = "strbytes:"
let csymbprefix_std = "!"
let csymbprefix_app = "!!"
rule csymb_std = parse
| "toplevel" { `Toplevel } | "envelop" { `Envelop } | "metadata" { `Metadata }
| "desc" { `Desc } | "hash" { `Hash } | "uuid" { `Uuid } | "version" { `Version }
| "list" { `List } | "vector" { `Vector } | "set" { `Set } | "map" { `Map }
| "int" { `Int } | "uint" { `Uint } | "float" { `Float } | "timestamp" { `Timestamp }
and csymb_app = parse
| "app01" { `Appsymb01 } | "app02" { `Appsymb02 } | "app03" { `Appsymb03 } | "app04" { `Appsymb04 }
| "app05" { `Appsymb05 } | "app06" { `Appsymb06 } | "app07" { `Appsymb07 } | "app08" { `Appsymb08 }
| "app09" { `Appsymb09 } | "app10" { `Appsymb10 } | "app11" { `Appsymb11 } | "app12" { `Appsymb12 }
and token = parse
| eof { TkEof }
| (space+ as lxm) { TkSpaces lxm }
(* token TkSymbol *)
| (lowercase alphadigit* as lxm) { TkSymbol lxm }
(* token TkCodifiedSymbol *)
| csymbprefix_std { TkCodifiedSymbol (csymb_std lexbuf) }
| csymbprefix_app { TkCodifiedSymbol (csymb_app lexbuf) }
(* token TkString *)
| '"' (instring* as lxm) '"' { TkString (Scanf.unescaped lxm) }
(* token TkBool *)
| boolprefix "true" { TkBool true }
| boolprefix "false" { TkBool false }
(* token TkNumeric *)
| ((['+' '-']? digit+ '.'? digit*) as num)
(alpha+ as suffix)?
{ let suffix = Option.value ~default:"" suffix in TkNumeric (num, suffix) }
| ((['+' '-']? digit+ '/' digit+) as num)
(alpha+ as suffix)?
{ let suffix = Option.value ~default:"" suffix in TkNumeric (num, suffix) }
(* TkBytes *)
| hexprefix (hexbyte+ as lxm) { TkBytes (Hex.to_bytes (`Hex lxm)) }
| base64prefix (base64digit+ as lxm) { TkBytes (Base64.decode_exn lxm |> Bytes.of_string) }
| strbytesprefix '"' (instring* as lxm) '"' { TkBytes (Scanf.unescaped lxm |> Bytes.of_string) }
| '(' { TkParenOpen }
| ')' { TkParenClose }
| '[' { TkBracketOpen }
| ']' { TkBracketClose }
| '{' { TkCurlyOpen }
| '}' { TkCurlyClose }
| '#' (digit+ as k)? '[' { TkPoundBracketOpen (Option.map int_of_string k) }
| "#{" { TkPoundCurlyOpen }
| "," (digit+ as k) { TkPickK (false, int_of_string k) }
| "." (digit+ as k) { TkGrabK (false, int_of_string k) }
| "," (digit+ as k) "." { TkPickK (true, int_of_string k) }
| "." (digit+ as k) "." { TkGrabK (true, int_of_string k) }
| "," { TkPickOne true }
| "." { TkGrabOne true }
| ",," { TkPickAll }
| ".." { TkGrabAll }
| "." space { TkGrabPoint }
| ":" { TkKeywordIndicator }
| "@>" { TkAnnoNextIndicator }
| "@<" { TkAnnoPrevIndicator }
| "@" { TkAnnoStandaloneIndicator }
{
module Lexer : Lexer with
type buffer = Lexing.lexbuf
and type location = Lexing.position
= struct
open Lexing
type buffer = Lexing.lexbuf
type location = Lexing.position
type nonrec pstate = (buffer, location) pstate
let loc buf = buf.lex_curr_p
let source buf = `DirectInput (Some (loc buf).pos_fname)
let lexer buf =
let tok = token buf in
let span = {
span_start = buf.lex_start_p;
span_end = buf.lex_curr_p;
span_leading = NoLeadingInfo;
span_source = source buf;
} in
Ok (tok,span)
end
}