-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathLuaHighlighter.cpp
403 lines (382 loc) · 13.6 KB
/
LuaHighlighter.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
/*
* Copyright 2000-2019 Rochus Keller <mailto:[email protected]>
*
* This file is part of the CARA (Computer Aided Resonance Assignment,
* see <http://cara.nmr.ch/>) NMR Application Framework (NAF) library.
*
* The following is the license that applies to this copy of the
* library. For a license to use the library under conditions
* other than those described here, please email to [email protected].
*
* GNU General Public License Usage
* This file may be used under the terms of the GNU General Public
* License (GPL) versions 2.0 or 3.0 as published by the Free Software
* Foundation and appearing in the file LICENSE.GPL included in
* the packaging of this file. Please review the following information
* to ensure GNU General Public Licensing requirements will be met:
* http://www.fsf.org/licensing/licenses/info/GPLv2.html and
* http://www.gnu.org/copyleft/gpl.html.
*/
#include "LuaHighlighter.h"
//#include <Lua.h>
#include <QtDebug>
using namespace Lua;
// TODO: this implementation doesn't properly cover all test cases by puc-lua yet; especially
// it has troubles with nested long comments and strings; use LuaLexer
Highlighter::Highlighter(QTextDocument *parent) :
QSyntaxHighlighter(parent)
{
HighlightingRule rule;
// Die Regeln werden in der hier gegebenen Reihenfolge abgearbeitet
d_commentFormat.setProperty( TokenProp, Comment );
d_commentFormat.setForeground(Qt::darkGreen);
// Das wird neu unten gemacht mit den Multiline-Formaten
// rule.pattern = QRegExp("--+[^\n]*");
// rule.format = d_commentFormat;
// rule.name = "Single Line Comment";
// d_rules.append(rule);
d_literalFormat.setProperty( TokenProp, LiteralString );
d_literalFormat.setForeground(Qt::darkRed);
// Quelle: http://stackoverflow.com/questions/481282/how-can-i-match-double-quoted-strings-with-escaped-double-quote-characters
rule.pattern = QRegExp( "\"(?:[^\\\\\"]|\\\\.)*\"" ); // TODO: verhindern, dass '"'abc'"' als String "'abc'" interpretiert wird!
rule.pattern.setMinimal(true);
rule.name = "Double Quote String";
rule.format = d_literalFormat;
d_rules.append(rule);
rule.pattern = QRegExp( "'(?:[^\\\\']|\\\\.)*'" );
rule.pattern.setMinimal(true);
rule.name = "Single Quote String";
d_rules.append(rule);
QTextCharFormat keywordFormat;
keywordFormat.setProperty( TokenProp, Keyword );
keywordFormat.setForeground(QColor(0x00,0x00,0x7f));
keywordFormat.setFontWeight(QFont::Bold);
QStringList keywordPatterns;
keywordPatterns << "\\band\\b" << "\\bbreak\\b" << "\\bdo\\b"
<< "\\belse\\b" << "\\belseif\\b" << "\\bend\\b"
<< "\\bfalse\\b" << "\\bfor\\b" << "\\bfunction\\b"
<< "\\bif\\b" << "\\bin\\b" << "\\blocal\\b"
<< "\\bnil\\b" << "\\bnot\\b" << "\\bor\\b"
<< "\\brepeat\\b" << "\\breturn\\b" << "\\bthen\\b"
<< "\\btrue\\b" << "\\buntil\\b" << "\\bwhile\\b";
foreach (const QString &pattern, keywordPatterns)
{
rule.pattern = QRegExp(pattern);
rule.format = keywordFormat;
rule.name = "Keyword " + pattern.toUtf8();
d_rules.append(rule);
}
QTextCharFormat numbers;
numbers.setProperty( TokenProp, Number );
numbers.setForeground(Qt::red);
// 0xff 0x56
rule.pattern = QRegExp("(^[a-zA-Z_]|\\b)0x[0-9a-fA-F]+" );
// (^[a-zA-Z_]|\\b) heisst, dass 0x nicht Teil eines Idents sein darf
rule.format = numbers;
rule.name = "Number";
d_rules.append(rule);
QTextCharFormat idents; // Ist nötig, damit Idents der Form "abc123" nicht als Zahl interpretiert werden
idents.setProperty( TokenProp, Ident );
idents.setForeground(Qt::black);
rule.pattern = QRegExp("(\\b)[a-zA-Z_][a-zA-Z0-9_]*");
// ([^0-9]|\\b) heisst, entweder ist der Ident am Anfang der Zeile oder nicht unmittelbar nach Zahl.
// Damit verhindern wir, dass 314.16e-2 das "e" als Ident verwendet wird.
// Issue: "[^0-9]|\\b" or "[^0-9]" also detect on idents like ".b"
rule.format = idents;
rule.name = "Ident";
d_rules.append(rule);
// 3 3.0 3.1416 314.16e-2 0.31416E1 12E2 .123 .16e-2 .31416E1
rule.pattern = QRegExp("[0-9]*[\\.]?[0-9]+([eE][-+]?[0-9]+)?" );
rule.format = numbers;
rule.name = "Number";
d_rules.append(rule);
keywordFormat.setProperty( TokenProp, Other );
QStringList otherTokens; // Zuerst die langen, dann die kurzen
otherTokens << "\\.\\.\\." << "\\.\\." << "==" << "~=" << "<=" << ">=" <<
"\\*" << "/" << "%" << "\\^" << "#" << "<" << ">" << "=" <<
"\\(" << "\\)" << "\\{" << "\\}" << "\\[" << "\\]" << ";" << ":" << "," <<
"\\+" << "-" << "\\.";
foreach (const QString &pattern, otherTokens)
{
rule.pattern = QRegExp(pattern);
rule.format = keywordFormat;
rule.name = "Token " + pattern.toUtf8();
d_rules.append(rule);
}
}
static inline void empty( QString& str, int start, int len )
{
// Kostet fast nichts, da kein dynamischer Speicher angelegt oder freigegeben wird
for( int i = start; i < ( start + len ); i++ )
str[i] = QChar(' ');
}
struct Mark
{
enum Kind {
LineCmt, // --
StartMlCmt, // --[[ or --[=[ mit d_num Anz. =
StartMlStr, // [[ or [=[ mit d_num Anz. =
EndMlStrOrCmt,// ]] or ]=] mit d_num Anz. =
Done
};
int d_pos; // Position in text
quint8 d_num; // Anzahl Gleichheitszeichen
quint8 d_kind; // Kind
int len() const
{
switch( d_kind )
{
case LineCmt:
return 2;
case StartMlCmt:
return 2 + d_num + 2;
case StartMlStr:
case EndMlStrOrCmt:
return d_num + 2;
}
return 0;
}
Mark():d_pos(-1),d_num(0),d_kind(Done){}
};
typedef QList<Mark> Marks;
static Mark _nextMark2( const QString& text, int from = 0 )
{
// Suche nach "--", "--[[", "--[=[", "[[", "[=[", "]]", "]=]"
Mark res;
for( int i = from; i < text.size(); i++ )
{
const QChar c = text[i];
if( c == QChar('-') && i + 1 < text.size() && text[i+1] == QChar('-') )
{
// "--" found
res.d_kind = Mark::LineCmt;
res.d_pos = i;
if( i + 3 < text.size() && text[i+2] == QChar('[') )
{
// "--[" found
if( text[i+3] == QChar('=') )
{
int j = i+4;
while( j < text.size() && text[j] == QChar('=') )
j++;
if( j < text.size() && text[j] == QChar('[') )
{
// "--[=[" found
res.d_kind = Mark::StartMlCmt;
res.d_num = j - ( i + 2 ) - 1;
return res;
}
}else if( text[i+3] == QChar('[') )
{
// "--[[" found
res.d_kind = Mark::StartMlCmt;
return res;
}
}
return res; // es ist in jedem Fall ein Single Line Comment, wenn man hier ankommt
}else if( c == QChar(']') && i + 1 < text.size() )
{
if( text[i+1] == QChar('=') )
{
int j = i+2;
while( j < text.size() && text[j] == QChar('=') )
j++;
if( j < text.size() && text[j] == QChar(']') )
{
// "]=]" found
res.d_kind = Mark::EndMlStrOrCmt;
res.d_pos = i;
res.d_num = j - i - 1;
return res;
}
}else if( text[i+1] == QChar(']') )
{
// "]]" found
res.d_kind = Mark::EndMlStrOrCmt;
res.d_pos = i;
return res;
}
}else if( c == QChar('[') && i + 1 < text.size() )
{
if( text[i+1] == QChar('=') ) // "[=" found
{
int j = i+2;
while( j < text.size() && text[j] == QChar('=') )
j++;
if( j < text.size() && text[j] == QChar('[') )
{
// "[=[" found
res.d_kind = Mark::StartMlStr;
res.d_pos = i;
res.d_num = j - i - 1;
return res;
}
}else if( text[i+1] == QChar('[') ) // "[[" found
{
res.d_kind = Mark::StartMlStr;
res.d_pos = i;
return res;
}
}
}
return res;
}
static Marks _findMarks( const QString& text )
{
Marks marks; // alle Marks der Zeile
Mark pos = _nextMark2( text );
while( pos.d_kind != Mark::Done )
{
marks.append(pos);
pos = _nextMark2( text, pos.d_pos + pos.len() );
}
return marks;
}
union BlockState
{
int d_int; // Initialwert ist -1, was 0xffffffff entspricht
struct Data
{
unsigned int startOfComment:1; // Auf der Zeile beginnt ein Kommentar, der dort nicht endet
unsigned int endOfStrOrCmnt:1; // Auf der Zeile endet ein Kommentar oder String, der dort nicht beginnt
unsigned int allLineComment:1; // Die ganze Zeile gehört zu einem Kommentar, der darüber beginnt und darunter endet
unsigned int startOfString:1;
unsigned int allLineString:1;
unsigned int level:8; // Anz. "="
unsigned int dummy:18;
unsigned int unitialized:1;
} d_state;
};
void Highlighter::highlightBlock(const QString & block)
{
QString text = block; // wir machen Kopie, damit wir die geparsten Stellen rauslöschen können
BlockState prev;
prev.d_int = previousBlockState();
if( prev.d_state.unitialized )
prev.d_int = 0;
BlockState newCur;
newCur.d_int = 0;
Marks marks = _findMarks( text );
int marksDone = 0;
if( prev.d_state.startOfComment || prev.d_state.allLineComment )
{ // wir sind in einem Kommentar drin
// prüfe, ob er hier endet; suche das erste End; alle davor liegenden Starts und anderen Marks werden ignoriert
for( int i = 0; i < marks.size(); i++ )
{
if( marks[i].d_kind == Mark::EndMlStrOrCmt && marks[i].d_num == prev.d_state.level )
{ // wir sind auf ein End gestossen
marksDone = i + 1;
newCur.d_state.endOfStrOrCmnt = true;
newCur.d_state.level = prev.d_state.level;
stamp( text, 0, marks[i].d_pos + marks[i].len(), d_commentFormat );
break;
}
}
if( marksDone == 0 )
{ // keine wirksamen Comment Marks gefunden; die ganze Zeile ist auch ein Kommentar
newCur.d_state.allLineComment = true;
newCur.d_state.level = prev.d_state.level;
stamp( text, 0, text.size(), d_commentFormat );
marksDone = marks.size();
}
}else if( prev.d_state.startOfString || prev.d_state.allLineString )
{
// wir sind in einem String drin
// prüfe, ob er hier endet; suche das erste End; alle davor liegenden Starts und anderen Marks werden ignoriert
for( int i = 0; i < marks.size(); i++ )
{
if( marks[i].d_kind == Mark::EndMlStrOrCmt && marks[i].d_num == prev.d_state.level )
{ // wir sind auf ein End gestossen
marksDone = i + 1;
newCur.d_state.endOfStrOrCmnt = true;
newCur.d_state.level = prev.d_state.level;
stamp( text, 0, marks[i].d_pos + marks[i].len(), d_literalFormat );
break;
}
}
if( marksDone == 0 )
{ // keine wirksamen String Marks gefunden; die ganze Zeile ist auch ein String
newCur.d_state.allLineString = true;
newCur.d_state.level = prev.d_state.level;
stamp( text, 0, text.size(), d_literalFormat );
marksDone = marks.size();
}
}
// Suche ganze LineCmt oder Ml-Paare
for( int i = marksDone; i < marks.size(); i++ )
{
if( marks[i].d_kind == Mark::LineCmt )
{
stamp( text, marks[i].d_pos, text.size() - marks[i].d_pos, d_commentFormat );
marksDone = marks.size();
break;
}else if( marks[i].d_kind == Mark::StartMlCmt || marks[i].d_kind == Mark::StartMlStr )
{
for( int j = i + 1; j < marks.size(); j++ )
{
if( marks[j].d_num == marks[i].d_num )
{
stamp( text, marks[i].d_pos, marks[j].d_pos - marks[i].d_pos + marks[j].len(),
(marks[i].d_kind == Mark::StartMlCmt)?d_commentFormat:d_literalFormat );
marks[i].d_kind = Mark::Done;
marks[j].d_kind = Mark::Done; // als gesehen markieren
}
}
}
}
// Suche offene Enden
for( int i = marksDone; i < marks.size(); i++ )
{
if( marks[i].d_kind == Mark::StartMlCmt || marks[i].d_kind == Mark::StartMlStr )
{
if( marks[i].d_kind == Mark::StartMlCmt )
newCur.d_state.startOfComment = true;
else
newCur.d_state.startOfString = true;
newCur.d_state.level = marks[i].d_num;
stamp( text, marks[i].d_pos, text.size() - marks[i].d_pos,
(marks[i].d_kind == Mark::StartMlCmt)?d_commentFormat:d_literalFormat );
}
}
setCurrentBlockState( newCur.d_int );
//qDebug() << "**********";
foreach( const HighlightingRule &rule, d_rules )
{
QRegExp expression(rule.pattern);
int index = expression.indexIn(text);
while( index >= 0 )
{
const int length = expression.matchedLength();
//qDebug() << "hit" << rule.name << ":" << text.mid( index, length );
stamp( text, index, length, rule.format );
index = expression.indexIn(text, index + length);
}
}
}
void Highlighter::stamp(QString &text, int start, int len, const QTextCharFormat &f)
{
setFormat(start, len, f );
// Vermeide, dass mehrere Regeln auf denselben Text angewendet werden
empty( text, start, len );
// qDebug() << "empty:" << text;
}
QString Highlighter::format(int tokenType)
{
switch( tokenType )
{
case Ident:
return tr("Ident");
case Keyword:
return tr("Keyword");
case Number:
return tr("Number");
case LiteralString:
return tr("String");
case Comment:
return tr("Comment");
case Other:
return tr("Other");
default:
return QString();
}
}