-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.mll
240 lines (197 loc) · 6.59 KB
/
lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
(*
The lexical analyzer: lexer.ml is generated automatically
from lexer.mll.
The only modification commonly needed here is adding new keywords to the
list of reserved words at the top.
*)
{
open Support.Error
let reservedWords = [
(* Keywords *)
("type", fun i -> Parser.TYPE i);
("inert", fun i -> Parser.INERT i);
("lambda", fun i -> Parser.LAMBDA i);
("Top", fun i -> Parser.TTOP i);
("if", fun i -> Parser.IF i);
("then", fun i -> Parser.THEN i);
("else", fun i -> Parser.ELSE i);
("true", fun i -> Parser.TRUE i);
("false", fun i -> Parser.FALSE i);
("Bool", fun i -> Parser.BOOL i);
("Bot", fun i -> Parser.TBOT i);
("let", fun i -> Parser.LET i);
("in", fun i -> Parser.IN i);
("fix", fun i -> Parser.FIX i);
("letrec", fun i -> Parser.LETREC i);
("String", fun i -> Parser.USTRING i);
("unit", fun i -> Parser.UNIT i);
("Unit", fun i -> Parser.UUNIT i);
("as", fun i -> Parser.AS i);
("timesfloat", fun i -> Parser.TIMESFLOAT i);
("Float", fun i -> Parser.UFLOAT i);
("iszero", fun i -> Parser.ISZERO i);
("try", fun i -> Parser.TRY i);
("with", fun i -> Parser.WITH i);
("error", fun i -> Parser.ERROR i);
("cast", fun i -> Parser.CAST i);
(* Symbols *)
("+", fun i -> Parser.PLUS i);
("+?", fun i -> Parser.PLUSEX i);
("-", fun i -> Parser.MINUS i);
("-?", fun i -> Parser.MINUSEX i);
("_", fun i -> Parser.USCORE i);
("'", fun i -> Parser.APOSTROPHE i);
("\"", fun i -> Parser.DQUOTE i);
("!", fun i -> Parser.BANG i);
("#", fun i -> Parser.HASH i);
("$", fun i -> Parser.TRIANGLE i);
("*", fun i -> Parser.STAR i);
("|", fun i -> Parser.VBAR i);
(".", fun i -> Parser.DOT i);
(";", fun i -> Parser.SEMI i);
(",", fun i -> Parser.COMMA i);
("/", fun i -> Parser.SLASH i);
(":", fun i -> Parser.COLON i);
("::", fun i -> Parser.COLONCOLON i);
("=", fun i -> Parser.EQ i);
("==", fun i -> Parser.EQEQ i);
("[", fun i -> Parser.LSQUARE i);
("<", fun i -> Parser.LT i);
("<=", fun i -> Parser.LE i);
("{", fun i -> Parser.LCURLY i);
("(", fun i -> Parser.LPAREN i);
("<-", fun i -> Parser.LEFTARROW i);
("{|", fun i -> Parser.LCURLYBAR i);
("[|", fun i -> Parser.LSQUAREBAR i);
("}", fun i -> Parser.RCURLY i);
(")", fun i -> Parser.RPAREN i);
("]", fun i -> Parser.RSQUARE i);
(">", fun i -> Parser.GT i);
(">=", fun i -> Parser.GE i);
("|}", fun i -> Parser.BARRCURLY i);
("|>", fun i -> Parser.BARGT i);
("|]", fun i -> Parser.BARRSQUARE i);
(* Special compound symbols: *)
(":=", fun i -> Parser.COLONEQ i);
("->", fun i -> Parser.ARROW i);
("=>", fun i -> Parser.DARROW i);
("==>", fun i -> Parser.DDARROW i);
]
(* Support functions *)
type buildfun = info -> Parser.token
let (symbolTable : (string,buildfun) Hashtbl.t) = Hashtbl.create 1024
let _ =
List.iter (fun (str,f) -> Hashtbl.add symbolTable str f) reservedWords
let createID i str =
try (Hashtbl.find symbolTable str) i
with _ ->
if (String.get str 0) >= 'A' && (String.get str 0) <= 'Z' then
Parser.UCID {i=i;v=str}
else
Parser.LCID {i=i;v=str}
let lineno = ref 1
and depth = ref 0
and start = ref 0
and filename = ref ""
and startLex = ref dummyinfo
let create inFile stream =
if not (Filename.is_implicit inFile) then filename := inFile
else filename := Filename.concat (Sys.getcwd()) inFile;
lineno := 1; start := 0; Lexing.from_channel stream
let newline lexbuf = incr lineno; start := (Lexing.lexeme_start lexbuf)
let info lexbuf =
createInfo (!filename) (!lineno) (Lexing.lexeme_start lexbuf - !start)
let text = Lexing.lexeme
let stringBuffer = ref (String.create 2048)
let stringEnd = ref 0
let resetStr () = stringEnd := 0
let addStr ch =
let x = !stringEnd in
let buffer = !stringBuffer
in
if x = String.length buffer then
begin
let newBuffer = String.create (x*2) in
String.blit buffer 0 newBuffer 0 x;
String.set newBuffer x ch;
stringBuffer := newBuffer;
stringEnd := x+1
end
else
begin
String.set buffer x ch;
stringEnd := x+1
end
let getStr () = String.sub (!stringBuffer) 0 (!stringEnd)
let extractLineno yytext offset =
int_of_string (String.sub yytext offset (String.length yytext - offset))
}
(* The main body of the lexical analyzer *)
rule main = parse
[' ' '\009' '\012']+ { main lexbuf }
| [' ' '\009' '\012']*("\r")?"\n" { newline lexbuf; main lexbuf }
| "*/" { error (info lexbuf) "Unmatched end of comment" }
| "/*" { depth := 1; startLex := info lexbuf; comment lexbuf; main lexbuf }
| "# " ['0'-'9']+
{ lineno := extractLineno (text lexbuf) 2 - 1; getFile lexbuf }
| "# line " ['0'-'9']+
{ lineno := extractLineno (text lexbuf) 7 - 1; getFile lexbuf }
| ['0'-'9']+
{ Parser.INTV{i=info lexbuf; v=int_of_string (text lexbuf)} }
| ['0'-'9']+ '.' ['0'-'9']+
{ Parser.FLOATV{i=info lexbuf; v=float_of_string (text lexbuf)} }
| ['A'-'Z' 'a'-'z' '_']
['A'-'Z' 'a'-'z' '_' '0'-'9' '\'']*
{ createID (info lexbuf) (text lexbuf) }
| ":=" | "<:" | "<-" | "->" | "=>" | "==>"
| "{|" | "|}" | "<|" | "|>" | "[|" | "|]" | "=="
| ">=" | "<=" | "+?" | "-?"
{ createID (info lexbuf) (text lexbuf) }
| ['~' '%' '\\' '+' '-' '&' '|' ':' '@' '`' '$']+
{ createID (info lexbuf) (text lexbuf) }
| ['*' '#' '/' '!' '?' '^' '(' ')' '{' '}' '[' ']' '<' '>' '.' ';' '_' ','
'=' '\'']
{ createID (info lexbuf) (text lexbuf) }
| "\"" { resetStr(); startLex := info lexbuf; string lexbuf }
| eof { Parser.EOF(info lexbuf) }
| _ { error (info lexbuf) "Illegal character" }
and comment = parse
"/*"
{ depth := succ !depth; comment lexbuf }
| "*/"
{ depth := pred !depth; if !depth > 0 then comment lexbuf }
| eof
{ error (!startLex) "Comment not terminated" }
| [^ '\n']
{ comment lexbuf }
| "\n"
{ newline lexbuf; comment lexbuf }
and getFile = parse
" "* "\"" { getName lexbuf }
and getName = parse
[^ '"' '\n']+ { filename := (text lexbuf); finishName lexbuf }
and finishName = parse
'"' [^ '\n']* { main lexbuf }
and string = parse
'"' { Parser.STRINGV {i = !startLex; v=getStr()} }
| '\\' { addStr(escaped lexbuf); string lexbuf }
| '\n' { addStr '\n'; newline lexbuf; string lexbuf }
| eof { error (!startLex) "String not terminated" }
| _ { addStr (Lexing.lexeme_char lexbuf 0); string lexbuf }
and escaped = parse
'n' { '\n' }
| 't' { '\t' }
| '\\' { '\\' }
| '"' { '\034' }
| '\'' { '\'' }
| ['0'-'9']['0'-'9']['0'-'9']
{
let x = int_of_string(text lexbuf) in
if x > 255 then
error (info lexbuf) "Illegal character constant"
else
Char.chr x
}
| [^ '"' '\\' 't' 'n' '\'']
{ error (info lexbuf) "Illegal character constant" }
(* *)