-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpiq_transform.py
executable file
·605 lines (461 loc) · 17.2 KB
/
piq_transform.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
#!/usr/bin/env python
#
# TODO
#
# - more restrictive parsing -- allow transform only inside [ ... ] or single ()
import sys
import StringIO
import tokenize
import token as pytoken
import keyword as pykeyword
import ast
import piq
def is_token(l, i, toknum, tokval=None):
if i >= len(l):
return False
token = l[i]
if toknum == token[0]:
if tokval == None:
return True
else:
return (tokval == token[1])
else:
return False
def is_token_op(l, i, opname=None):
return is_token(l, i, pytoken.OP, tokval=opname)
def is_token_op_in(l, i, opnames):
for opname in opnames:
if is_token_op(l, i, opname):
return True
return False
def is_token_name(l, i):
return is_token(l, i, pytoken.NAME)
def peek_token(l, i):
if i >= len(l):
return None
else:
return l[i]
def pop_token(l, i):
return (l[i], i + 1)
def is_token_keyword(l, i):
token = peek_token(l, i)
return (token is not None and token[0] == pytoken.NAME and pykeyword.iskeyword(token[1]))
def is_token_op_value_start(l, i):
return is_token_op_in(l, i, ['`', '(', '[', '{', '-', '+', '~'])
def is_token_value_start(l, i):
return (not is_token_op(l, i) or is_token_op_value_start(l, i)) and not is_token_keyword(l, i) and not is_token(l, i, pytoken.ENDMARKER)
def make_token(toknum, tokval, tokstart = None, tokend = None):
if tokstart is None:
return (toknum, tokval)
else:
if tokend is None:
tokend = tokstart
return (toknum, tokval, tokstart, tokend, None)
def is_piq_name_start(l, i):
return is_token_op(l, i, '.') and is_token_name(l, i + 1)
def is_piq_name_continue(l, i):
return is_token_op(l, i, '-') and is_token_name(l, i + 1)
# skip insignificant tokens
def skip_nl_and_comment_tokens(l, i, accu):
if is_token(l, i, tokenize.NL) or is_token(l, i, tokenize.COMMENT):
token, i = pop_token(l, i)
accu.append(token)
return skip_nl_and_comment_tokens(l, i, accu) # see if we've got more of these
else:
return i
def transform_piq_name(filename, l, i, accu, name=None, name_loc=None):
# '.' in case of name start, '-' in case of another name segment
#
# TODO: make sure '-' immediately follow preceeding name segment
dot_or_dash_token, i = pop_token(l, i)
dot_loc = dot_or_dash_token[3]
dot_or_dash = dot_or_dash_token[1]
name_token, i = pop_token(l, i)
name_token_val = name_token[1]
if name is None:
name = ''
else:
name += dot_or_dash
name += name_token_val
if name_loc is None:
name_loc = dot_loc
def accu_append_keyword(keyword):
def make_loc(loc):
return [
(pytoken.OP, ','),
(pytoken.OP, '('),
#(pytoken.STRING, "'" + filename + "'"),
#(pytoken.OP, ','),
(pytoken.NUMBER, str(loc[0])), # line
(pytoken.OP, ','),
(pytoken.NUMBER, str(loc[1])), # column
(pytoken.OP, ')')
]
accu.extend([
(pytoken.NAME, keyword),
(pytoken.OP, '('),
(pytoken.STRING, "'" + name + "'")
])
accu.extend(make_loc(name_loc))
accu.extend([
(pytoken.OP, ')')
])
if is_piq_name_start(l, i):
# next token is also a name => this name is chained with another Piq
# name => recurse
i = transform_piq_name(filename, l, i, accu, name, name_loc)
elif is_piq_name_continue(l, i):
# next token is a '-' followed by another name segment => recurse
i = transform_piq_name(filename, l, i, accu, name, name_loc)
else:
# something else
# skip whitespace
nl_and_comment_accu = []
i = skip_nl_and_comment_tokens(l, i, nl_and_comment_accu)
if is_token_op_in(l, i, [')', ']', ',']):
# end of name
accu_append_keyword('_piq_make_name')
elif is_token_value_start(l, i):
# value juxtaposition
accu_append_keyword('_piq_make_named')
accu.append((pytoken.OP, '**'))
elif is_token_op(l, i, '*') and is_token_value_start(l, i + 1):
# splice
accu_append_keyword('_piq_make_splice')
# replace '*' with '**' which has a higher precedence and stronger
# binding
_, i = pop_token(l, i)
accu.append((pytoken.OP, '**'))
else:
# something else, likely an error
error_tok = peek_token(l, i)
error_tok_loc = error_tok[3]
loc = piq.make_loc((error_tok_loc[0], error_tok_loc[1]))
raise piq.ParseError(loc, "label must be followed by value, '*' value, or one of ')', ']', ','")
# insert back newlines and comments
accu.extend(nl_and_comment_accu)
return i
def transform_token_list(filename, l):
accu = []
i = 0
i = skip_nl_and_comment_tokens(l, i, accu)
piq_name_allowed = False
while True:
if i >= len(l):
return accu
if piq_name_allowed and is_piq_name_start(l, i):
i = transform_piq_name(filename, l, i, accu)
# Piq name can not be immediately followed by another Piq name
piq_name_allowed = False
else:
# Piq name is allowed only after these tokens
#
# TODO: allow only commas inside lists
piq_name_allowed = is_token_op_in(l, i, ['(', '[', ','])
token, i = pop_token(l, i)
accu.append(token)
i = skip_nl_and_comment_tokens(l, i, accu)
def tokenize_common(infile):
tokens = tokenize.generate_tokens(infile)
return list(tokens)
def tokenize_string(s):
return tokenize_common(StringIO.StringIO(s).readline)
def tokenize_file(filename):
with open(filename, 'rb') as infile:
return tokenize_common(infile.readline)
def tokenize_and_transform_string(s, filename='-'):
tokens = tokenize_string(s)
return transform_token_list(filename, tokens)
def tokenize_and_transform_file(filename):
tokens = tokenize_file(filename)
return transform_token_list(filename, tokens)
class AstExprWrapper(ast.NodeTransformer):
"""Wraps all (load) expressions in a call to piq.ObjectProxy()"""
def visit(self, node):
node = self.generic_visit(node)
ctx = getattr(node, 'ctx', None)
if isinstance(node, ast.expr) and (not ctx or isinstance(ctx, ast.Load)):
# TODO, XXX: don't wrap Call.func, because it can never result in a
# piq data structure, also won't need __call__ method in
# piq.ObjectProxy()
# TODO: transform (_piq_make_named(name, loc) ** value) into
# Named(name, loc, value); similarly, for _piq_make_splice()
#
# this way, we won't need AbstractNamed and AbstractSplice runtime
#print "NODE:", node, list(ast.iter_fields(node))
def make_node(new_node):
return ast.copy_location(new_node, node)
lineno = make_node(ast.Num(node.lineno))
col_offset = make_node(ast.Num(node.col_offset))
func = make_node(ast.Name(id='_piq_wrap_object', ctx=ast.Load()))
return make_node(ast.Call(
func=func,
args=[node, lineno, col_offset],
keywords=[]
))
else:
return node
class AstOverrideOperators(ast.NodeTransformer):
"""Wraps all (load) expressions in a call to piq.ObjectProxy()"""
def visit(self, node):
node = self.generic_visit(node)
def make_node(new_node):
return ast.copy_location(new_node, node)
def make_operator_node(name, args):
func = make_node(ast.Name(id='_piq_operator_' + name, ctx=ast.Load()))
return make_node(ast.Call(
func=func,
args=args,
keywords=[]
))
def make_lazy_bool_operator_node(name, args):
def make_lazy_arg_node(body):
args = make_node(ast.arguments(
args=[],
vararg=None,
kwarg=None,
defaults=[]
))
return make_node(ast.Lambda(
args=args,
body=body
))
first_eager_arg_node, rest_args = args[0], args[1:]
rest_lazy_arg_nodes = [make_lazy_arg_node(x) for x in rest_args]
return make_operator_node(name, [first_eager_arg_node] + rest_lazy_arg_nodes)
if isinstance(node, ast.BoolOp) and isinstance(node.op, ast.And):
return make_lazy_bool_operator_node('and', node.values)
if isinstance(node, ast.BoolOp) and isinstance(node.op, ast.Or):
return make_lazy_bool_operator_node('or', node.values)
elif isinstance(node, ast.UnaryOp) and isinstance(node.op, ast.Not):
return make_operator_node('not', [node.operand])
elif isinstance(node, ast.Compare) and len(node.ops) == 1 and isinstance(node.ops[0], (ast.In, ast.NotIn)):
# NOTE, XXX: not handling chained comparison operators, for example:
#
# 1 in 2 in 3
# 1 in 2 > 3
# 1 in 2 not in 3
in_node = make_operator_node('in', [node.left, node.comparators[0]])
if isinstance(node.ops[0], ast.In):
return in_node
else:
return make_operator_node('not', [in_node])
else:
return node
class AbstractNamed(object):
def __init__(self, name, loc):
self.name = name
self.loc = loc
def __pow__(self, other):
return piq.Named(self.name, self.loc, other)
class AbstractSplice(AbstractNamed):
def __pow__(self, other):
return piq.Splice(self.name, self.loc, other)
def wrap_object(*args):
return piq.wrap_object(*args)
def make_name(name, loc):
return piq.Name(name, loc)
def make_named(name, loc):
return AbstractNamed(name, loc)
def make_splice(name, loc):
return AbstractSplice(name, loc)
def is_domain_boolean_expression(x):
return (
hasattr(x, '__bool_and__') and hasattr(x, '__bool_or__') and hasattr(x, '__bool_not__')
)
def check_is_domain_boolean_expression(x):
# not allowed to mix domain boolean expressions and non-domain ones
#
# TODO, XXX: produce an actual error message, or leave such validation to
# domain implementation
assert is_domain_boolean_expression(x)
def check_is_not_domain_boolean_expression(x):
assert not is_domain_boolean_expression(x)
def operator_and(accu, *args):
def binary_and(accu, arg):
if is_domain_boolean_expression(accu):
arg = arg()
check_is_domain_boolean_expression(arg)
res = accu.__bool_and__(arg)
check_is_domain_boolean_expression(res)
else:
# standard boolean and evaluation
if not accu:
res = False # short-circuit
else:
arg = arg()
check_is_not_domain_boolean_expression(arg)
res = (accu and arg) # NOTE, XXX: equivalent to just arg
return res
for arg in args:
accu = binary_and(accu, arg)
if accu == False:
break # short-circuit
return accu
def operator_or(accu, *args):
def binary_or(accu, arg):
if is_domain_boolean_expression(accu):
arg = arg()
check_is_domain_boolean_expression(arg)
res = accu.__bool_or__(arg)
check_is_domain_boolean_expression(res)
else:
# standard boolean or evaluation
if accu:
res = accu
else:
arg = arg()
check_is_not_domain_boolean_expression(arg)
res = (accu or arg) # NOTE, XXX: equivalent to just arg
return res
for arg in args:
accu = binary_or(accu, arg)
if accu and not is_domain_boolean_expression(accu):
break # short-circuit
return accu
def operator_not(arg):
if is_domain_boolean_expression(arg):
res = arg.__bool_not__()
check_is_domain_boolean_expression(res)
else:
res = (not arg)
return res
def operator_in(left, right):
if hasattr(left, '__in__'):
res = left.__in__(right)
check_is_domain_boolean_expression(res)
else:
res = (left in right)
return res
# this is tweaked version of tokenize.Untokenizer.compact()
#
def untokenize(tokens):
# stock Python's tokenize.untokenize() has a bug/omission when it doesn't
# correctly carry indent state upon switching from Untokenizer.untokenize()
# to Untokenizer.compat()
#
# to make it happy, we are forcing it to switch to Untokenizer.compat() on
# the first token
first_token = tokens[0]
tokens[0] = (first_token[0], first_token[1])
return tokenize.untokenize(tokens)
def parse_file(filename):
tokens = tokenize_and_transform_file(filename)
source = untokenize(tokens)
return ast.parse(source, filename, 'exec')
# for AST transformations see
#
# https://docs.python.org/3/library/ast.html
# https://docs.python.org/2/library/ast.html
# http://greentreesnakes.readthedocs.io/
def parse_and_transform_file(filename, transform_expressions=True, transform_operators=True):
ast = parse_file(filename)
if transform_expressions:
ast = AstExprWrapper().visit(ast)
if transform_operators:
ast = AstOverrideOperators().visit(ast)
return ast
def exec_file(filename, user_globals=None, transform_operators=False):
transformed_ast = parse_and_transform_file(filename, transform_operators=transform_operators)
if user_globals is not None:
assert isinstance(user_globals, dict)
exec_globals = user_globals
else:
exec_globals = {}
exec_globals.update(dict(
_piq_wrap_object = wrap_object,
_piq_make_name = make_name,
_piq_make_named = make_named,
_piq_make_splice = make_splice,
_piq_operator_and = operator_and,
_piq_operator_or = operator_or,
_piq_operator_not = operator_not,
_piq_operator_in = operator_in,
))
exec(compile(transformed_ast, filename, 'exec'), exec_globals)
def main():
arg_tokenize = False
arg_tokenize_transform = False
arg_parse = False
arg_parse_transform = False
arg_transform_operators = False
arg_transform_expressions = False
arg_abstract_output = False
args = sys.argv[1:]
i = 0
while True:
if i >= len(args):
break
a = args[i]
if a in ['-t', '--tokenize']:
arg_tokenize = True
arg_abstract_output = True
elif a in ['-tt', '--tokenize-transform']:
arg_tokenize_transform = True
elif a in ['-p', '--parse']:
arg_parse = True
arg_abstract_output = True
elif a in ['-pt', '--parse-transform']:
arg_parse_transform = True
arg_transform_expressions = True
arg_transform_operators = True
elif a in ['-pte', '--parse-transform-expressions']:
arg_parse_transform = True
arg_transform_expressions = True
elif a in ['-pto', '--parse-transform-operators']:
arg_parse_transform = True
arg_transform_operators = True
elif a in ['-a', '--abstract-output']:
arg_abstract_output = True
elif a.startswith('-'):
pass
else:
break # positional argument
i += 1
positional_arg = args[i]
filename = positional_arg
def print_tokens(tokens):
if arg_abstract_output:
res = []
for token in tokens:
res.append((pytoken.tok_name[token[0]], token[1]))
print res
else:
print untokenize(tokens)
def print_ast(output_ast):
if arg_abstract_output:
try:
import astunparse
print astunparse.dump(output_ast)
except ImportError:
print ast.dump(output_ast)
else:
try:
import astunparse
print astunparse.unparse(output_ast)
except ImportError:
sys.exit("couldn't import 'astunparse'")
if arg_tokenize:
tokens = tokenize_file(filename)
print_tokens(tokens)
elif arg_tokenize_transform:
tokens = tokenize_and_transform_file(filename)
print_tokens(tokens)
elif arg_parse:
source_ast = parse_file(filename)
print_ast(source_ast)
elif arg_parse_transform:
transformed_ast = parse_and_transform_file(
filename,
transform_operators=arg_transform_operators,
transform_expressions=arg_transform_expressions)
print_ast(transformed_ast)
else:
# XXX
try:
exec_file(filename, transform_operators=True)
except piq.ParseError as e:
sys.stderr.write(filename + ':' + str(loc.line) + ': ' + error)
sys.exit(1)
if __name__ == '__main__':
main()