-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse.py
33 lines (32 loc) · 1.28 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import pandas as pd
import lxml
import html5lib
import bs4
import numpy as np
with open("LLtable.html") as fp:
df_list = pd.read_html(fp, encoding='ISO-8859-1') # this parses all the tables in webpages to a list
df = df_list[0]
df.head()
for index, row in df.iterrows():
for i in df.head():
r = row[i]
if not (pd.isnull(r)):
x = i.replace("$", "ending").strip()
try:
y = r.split("::=")[0].strip()
rule = r.split("::=")[1:]
for k in rule:
k = k.split()
if k[0] in ["Exp", "Statement", "DeclareVariable"] :
if rule[-1].split() != k:
del k[-1]
if k == ["ε"]:
print(f"insertMember({x}, {y}, 0);")
continue
print(f"insertMember({x}, {y}, {len(k)}", end="")
for l in k:
l = l.replace("ε", "eps").replace("$", "ending").replace("S,", "ProgramBody,")
print(f",\n\t\tpartOfRulesRightSide({l})", end="")
print(");")
except:
pass