-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlex.go
189 lines (172 loc) · 4.48 KB
/
lex.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
package vera
import (
"errors"
"fmt"
"strings"
"unicode"
)
const (
negateSym = '!'
andSym = '&'
orSym = '|'
xorSym = '^'
condSym = '>'
bicondSym = '='
)
type lexemeType byte
func (lt lexemeType) String() string {
switch lt {
case ltFalse:
return "False"
case ltTrue:
return "True"
case ltNegate:
return "Negate"
case ltOperator:
return "Operator"
case ltOpenParen:
return "OpenParen"
case ltCloseParen:
return "CloseParen"
case ltStatement:
return "Statement"
default:
panic("lexemeType not added to String method!")
}
}
const (
ltFalse lexemeType = iota
ltTrue
ltNegate
ltOperator
ltOpenParen
ltCloseParen
ltStatement
)
type lexeme struct {
t lexemeType
v byte
}
type lexerResult struct {
l lexeme
err error
}
// statefn is a state combined with an associated action. See Rob Pike's talk on lexical scanning.
type statefn func(byte, *lexer) (lexemeType, statefn, error)
type lexer struct {
input string
c chan lexerResult
nextIdx int
nestCnt int
allowEOF bool
}
// removeAllWS removes all the whitespace from a string and returns the new string, where whitespace is identified
// according to unicode.IsSpace.
func removeAllWS(str string) string {
var b strings.Builder
b.Grow(len(str))
for _, ch := range str {
if !unicode.IsSpace(ch) {
b.WriteRune(ch)
}
}
return b.String()
}
// lex lexes the given string in a separate goroutine and outputs the resultant lexerResults over the returned channel.
func lex(input string) chan lexerResult {
l := &lexer{
input: removeAllWS(input),
// Arbitrary buffer size.
c: make(chan lexerResult, 10),
allowEOF: false,
}
go l.run()
return l.c
}
// run is the main loop for a lexer. It should be called in a separate goroutine.
func (l *lexer) run() {
for sfn := lexStatement; sfn != nil; {
n, eof := l.next()
if eof {
if !l.allowEOF {
l.c <- lexerResult{err: errors.New("unexpected EOF")}
}
break
}
var lt lexemeType
var err error
lt, sfn, err = sfn(n, l)
if err != nil {
l.c <- lexerResult{err: err}
break
}
l.c <- lexerResult{l: lexeme{lt, n}}
}
// Closing the channel without any errors implies EOF.
close(l.c)
}
// next returns the next byte in the input string. The boolean return value indicates if the end of the string was
// reached (i.e. EOF); if it is true, the byte return value should be disregarded.
func (l *lexer) next() (byte, bool) {
if l.nextIdx == len(l.input) {
return 0, true
}
// Indexing into string => we are not expecting UTF-8 chars with width > 1 byte.
next := l.input[l.nextIdx]
l.nextIdx++
return next, false
}
// nest increments nestCnt and sets allowEOF as appropriate.
func (l *lexer) nest() {
l.nestCnt++
l.allowEOF = false
}
// denest decrements nestCnt and sets allowEOF as appropriate. The boolean return value indicates success; false is
// returned if nestCnt was already zero when denest was called (i.e. parentheses not matched).
func (l *lexer) denest() bool {
if l.nestCnt == 0 {
// false return indicates failure.
return false
}
l.nestCnt--
l.allowEOF = l.nestCnt == 0
return true
}
// lexStatement is a statefn for parsing the start of a statement (this includes opening parentheses, "0", "1", and
// letters) or negation.
func lexStatement(n byte, l *lexer) (lexemeType, statefn, error) {
// By default, allow EOF if there are no unmatched parentheses.
// Some branches in the below switch set the allowEOF flag based on other conditions.
l.allowEOF = l.nestCnt == 0
switch n {
case negateSym:
l.allowEOF = false
return ltNegate, lexStatement, nil
case '(':
l.nest()
return ltOpenParen, lexStatement, nil
case '0':
return ltFalse, lexOperator, nil
case '1':
return ltTrue, lexOperator, nil
}
if ('a' <= n && n <= 'z') || ('A' <= n && n <= 'Z') {
return ltStatement, lexOperator, nil
}
return 0, nil, fmt.Errorf("unexpected char '%c'; expected '%c', '(', '0', '1', or a statement", n, negateSym)
}
// lexOperator is a statefn for parsing a binary operator or a closing parenthesis.
func lexOperator(n byte, l *lexer) (lexemeType, statefn, error) {
switch n {
case ')':
if !l.denest() {
return 0, nil, errors.New("unexpected closing parenthesis: no corresponding opening parenthesis")
}
return ltCloseParen, lexOperator, nil
case andSym, orSym, xorSym, condSym, bicondSym:
l.allowEOF = false
return ltOperator, lexStatement, nil
}
return 0, nil, fmt.Errorf("unexpected char '%c'; expected ')', '%c', '%c', '%c', '%c', or '%c'",
n, andSym, orSym, xorSym, condSym, bicondSym)
}