-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtiger-parser.rkt
293 lines (241 loc) · 9.23 KB
/
tiger-parser.rkt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
#lang racket/base
(require parser-tools/lex
racket/list
racket/contract
parser-tools/yacc
(prefix-in : parser-tools/lex-sre)
"core-ast.rkt"
"source-ast.rkt")
(define-tokens lang-tokens (integer identifier comparison */ string))
(define-empty-tokens lang-empty-tokens
(eof for nil break period comma semi-colon colon
space plus minus and or arrow
equal not-equal of if then else while do to let in end
type array var function
assignment open-paren close-paren open-bracket
close-bracket open-brace close-brace))
(define-lex-abbrev digit (char-set "0123456789"))
(define-lex-abbrev inter-space (:* whitespace))
(define (comment-lexer port)
(define (no-state char)
(case char
((#\/) (slash (read-char port)))
((#\*) (star (read-char port)))
(else (no-state (read-char port)))))
(define (slash char)
(case char
((#\*) (comment-lexer port) (no-state (read-char port)))
(else (no-state char))))
(define (star char)
(case char
((#\/) (void))
(else (no-state char))))
(no-state (read-char port)))
(define (string-lexer port start-pos)
(define (digit? x)
(and (memq x '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)) #t))
(define (loop next-char chars)
(define (continue char)
(loop (read-char port) (cons char chars)))
(case next-char
((#\")
(let-values (((line column pos) (port-next-location port)))
(make-position-token
(token-string (list->string (reverse chars)))
start-pos
(make-position pos line column))))
((#\\)
(let ((char (read-char port)))
(cond
((memq char '(#\" #\\)) (continue char))
((equal? char #\t) (continue #\tab))
((equal? char #\n) (continue #\newline))
((digit? char)
(let ((char2 (read-char port)) (char3 (read-char port)))
(if (and (digit? char2) (digit? char3))
(continue (integer->char (string->number (string char char2 char3))))
(error 'string-lexer "Bad digit escape sequence"))))
((equal? char #\^)
(error 'string-lexer "Control escapes not supported yet"))
((char-whitespace? char)
(let whitespace-loop ((char (read-char port)))
(cond
((char-whitespace? char) (whitespace-loop (read-char port)))
((equal? char #\\) (loop (read-char port) chars))
(else (error 'string-lexer "Bad character ~a in formatting escape" char)))))
(else (error 'string-lexer "Unknown escape character ~a" char)))))
(else (loop (read-char port) (cons next-char chars)))))
(loop (read-char port) empty))
(define lang-lexer
(lexer-src-pos
(whitespace (return-without-pos (lang-lexer input-port)))
("/*" (begin (comment-lexer input-port) (return-without-pos (lang-lexer input-port))))
("\"" (return-without-pos (string-lexer input-port start-pos)))
((:+ digit) (token-integer (string->number lexeme)))
("->" (token-arrow))
("for" (token-for))
("nil" (token-nil))
("of" (token-of))
("if" (token-if))
("then" (token-then))
("else" (token-else))
("while" (token-while))
("do" (token-do))
("to" (token-to))
("let" (token-let))
("in" (token-in))
("end" (token-end))
("type" (token-type))
("array" (token-array))
("break" (token-break))
("var" (token-var))
("function" (token-function))
("(" (token-open-paren))
(")" (token-close-paren))
("[" (token-open-bracket))
("]" (token-close-bracket))
("{" (token-open-brace))
("}" (token-close-brace))
("," (token-comma))
(";" (token-semi-colon))
(":" (token-colon))
("+" (token-plus))
("-" (token-minus))
("&" (token-and))
("|" (token-or))
((char-set "*/") (token-*/ (string->symbol lexeme)))
(":=" (token-assignment))
("=" (token-equal))
("<>" (token-not-equal))
("." (token-period))
((:or "<" "<=" ">" ">=")
(token-comparison (string->symbol lexeme)))
((:: alphabetic (:* alphabetic digit #\")) (token-identifier (string->symbol lexeme)))
((eof) (token-eof))))
(define lang-parser
(parser
(grammar
(decs (() empty)
((dec decs) (cons $1 $2)))
(dec ((type id equal ty) (make-type-declaration $2 $4))
((var id assignment expr)
(make-untyped-variable-declaration $2 $4))
((var id colon id assignment expr)
(make-variable-declaration $2 (make-type-reference $4) $6))
((function id open-paren tyfields close-paren equal expr)
(make-function-declaration $2 $4 #f $7))
((function id open-paren tyfields close-paren colon id equal expr)
(make-function-declaration $2 $4 (make-type-reference $7) $9)))
(ty ((id) (make-type-reference $1))
((open-paren ty-seq close-paren arrow ty) (make-function-type $2 $5))
((ty arrow ty) (make-function-type (list $1) $3))
((open-brace tyfields close-brace) (make-record-type $2))
((array of id) (make-array-type (make-type-reference $3))))
(tyfields (() empty)
((id colon id tyfields-comma) (cons (cons $1 (make-type-reference $3)) $4)))
(tyfields-comma
(() empty)
((comma id colon id tyfields-comma) (cons (cons $2 (make-type-reference $4)) $5)))
(ty-seq (() empty)
((ty ty-seq-comma) (cons $1 $2)))
(ty-seq-comma
(() empty)
((comma ty ty-seq-comma) (cons $2 $3)))
(expr ((val) $1)
((expr +- expr) (prec plus) (make-math $2 $1 $3))
((expr */ expr) (make-math $2 $1 $3))
((expr and expr) (make-math '& $1 $3))
((expr or expr) (make-math '\| $1 $3))
((expr comparison expr) (make-comparison $2 $1 $3 #f))
((expr equal expr) (make-equality '= $1 $3 #f))
((expr not-equal expr) (make-equality '<> $1 $3 #f))
((lvalue assignment expr) (make-assignment $1 $3))
((expr open-paren close-paren) (make-function-call $1 empty #f))
((expr open-paren expr expr-comma-seq close-paren)
(make-function-call $1 (cons $3 $4) #f))
((minus expr) (make-negation $2))
((array-creation) $1)
((record-creation) $1)
((if expr then expr) (make-if-then-else $2 $4 #f #f))
((if expr then expr else expr) (make-if-then-else $2 $4 $6 #f))
((while expr do expr) (make-while-loop $2 $4))
((for id assignment expr to expr do expr)
(make-for-loop $2 $4 $6 $8))
((break) (make-break))
((let decs in expr-seq end) (make-binder $2 (make-sequence $4)))
((open-paren expr-seq close-paren) (make-sequence $2))
)
(record-creation
((id open-brace close-brace)
(make-create-record (make-type-reference $1) empty))
((id open-brace id-equal-expr id-equal-expr-comma-seq close-brace)
(make-create-record (make-type-reference $1) (cons $3 $4))))
(id-equal-expr ((id equal expr) (cons $1 $3)))
(id-equal-expr-comma-seq
(() empty)
((comma id-equal-expr id-equal-expr-comma-seq) (cons $2 $3)))
(array-creation
((id open-bracket expr close-bracket of expr)
(make-create-array (make-type-reference $1) $3 $6)))
(+- ((plus) '+) ((minus) '-))
(expr-comma-seq (() empty)
((comma expr expr-comma-seq) (cons $2 $3)))
(expr-seq (() empty)
((expr expr-semi-colon-seq) (cons $1 $2)))
(expr-semi-colon-seq (() empty)
((semi-colon expr expr-semi-colon-seq) (cons $2 $3)))
(lvalue ((id lvalue2) ($2 (make-identifier $1))))
(lvalue2 (() (lambda (base) base))
((open-bracket expr close-bracket lvalue2)
(lambda (base) ($4 (make-array-ref base $2 #f))))
((period id lvalue2)
(lambda (base) ($3 (make-field-ref base $2 #f)))))
(val ((lvalue) $1)
((literal) $1))
(literal ((integer-literal) $1)
((string-literal) $1)
((nil) (make-nil #f)))
(integer-literal ((integer) (make-integer-literal $1)))
(string-literal ((string) (make-string-literal $1)))
(id ((identifier) $1))
)
(precs
(nonassoc of open-brace close-brace
open-bracket close-bracket
semi-colon
close-paren comma if then while do to for let in end)
(left else)
(right assignment)
(left or)
(left and)
(nonassoc comparison equal not-equal)
(left plus)
(left */)
(nonassoc minus)
(left open-paren)
(right arrow)
)
(tokens lang-tokens lang-empty-tokens)
(start expr)
(src-pos)
(error
(lambda (tok-ok? tok-name tok-value start-pos end-pos)
(if tok-ok?
(error 'parser "Got unexpected token ~a(~a) at ~a:~a-~a:~a"
tok-name tok-value
(position-line start-pos)
(position-col start-pos)
(position-line end-pos)
(position-col end-pos))
(error 'parser "Bad Token at ~a:~a-~a:~a"
(position-line start-pos)
(position-col start-pos)
(position-line end-pos)
(position-col end-pos)))))
(end eof)))
(define (parse p/s)
(let ((port (if (string? p/s) (open-input-string p/s) p/s)))
(port-count-lines! port)
(lang-parser (lambda () (lang-lexer port)))))
(provide/contract
(parse (-> (or/c string? port?) expression?)))