-
Notifications
You must be signed in to change notification settings - Fork 308
/
Copy pathnemo-odf-to-txt
executable file
·48 lines (39 loc) · 1.26 KB
/
nemo-odf-to-txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/python3
import sys
import zipfile
from pathlib import Path
from html.parser import HTMLParser
class Parser(HTMLParser):
parsed = []
get_next_data = False
concat_next_data = False
def handle_starttag(self, tag, attrs):
if tag.startswith("text") or tag.startswith("meta"):
if tag == "text:s":
self.concat_next_data = True
self.get_next_data = True
def handle_endtag(self, tag):
pass
def handle_data(self, data):
if self.get_next_data:
if data != "\n":
if self.concat_next_data:
self.parsed[-1] += " " + data
self.concat_next_data = False
else:
self.parsed.append(data.strip())
self.get_next_data = False
path = sys.argv[1]
parser = Parser()
zipfile = zipfile.ZipFile(path)
files = zipfile.infolist()
for f in files:
if f.filename in ("meta.xml", "content.xml"):
contents = zipfile.read(f.filename).decode()
parser.feed(contents)
if len(parser.parsed) == 0:
continue
out_str = ", ".join(parser.parsed)
print(f"{Path(f.filename).stem}: {out_str}\n", flush=True, file=sys.stdout)
parser.parsed = []
exit(0)