forked from pychess/pychess
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpgn2ecodb.py
84 lines (63 loc) · 2.71 KB
/
pgn2ecodb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from __future__ import print_function
# English eco.pgn was converted from
# http://www.chessville.com/downloads_files/instructional_materials/ECO_Codes_With_Names_and_Moves.zip
# others from wikipedia
import os
import sys
import sqlite3
import struct
from pychess.compat import memoryview, unicode
from pychess.Savers.pgn import load
from pychess.System.protoopen import protoopen
from pychess.System.prefix import addDataPrefix
from pychess.Utils.eco import hash_struct
path = os.path.join(addDataPrefix("eco.db"))
conn = sqlite3.connect(path)
if __name__ == '__main__':
c = conn.cursor()
c.execute("drop table if exists openings")
# Unfortunately sqlite doesn't support uint64, so we have to use blob type to store polyglot-hash values
c.execute("create table openings(hash blob, base integer, eco text, lang text, opening text, variation text)")
def feed(pgnfile, lang):
cf = load(protoopen(pgnfile))
rows = []
old_eco = ""
ply_max = 0
for i, game in enumerate(cf.games):
model = cf.loadToModel(i)
eco = cf._getTag(i, "ECO")[:3]
opening = cf._getTag(i, "Opening")
if opening is None:
opening = ""
variation = cf._getTag(i, "Variation")
if variation is None:
variation = ""
base = int(old_eco != eco)
ply = len(model.moves)
ply_max = max(ply_max, ply)
if ply == 0:
cu = conn.cursor()
cu.execute("select * from openings where eco=? and lang='en' and base=1", (eco,))
res = cu.fetchone()
if res is not None:
hash = res[0]
else:
hash = memoryview(hash_struct.pack(model.boards[-1].board.hash))
if opening:
rows.append((hash, base, unicode(eco), unicode(lang), unicode(opening), unicode(variation)))
old_eco = eco
c.executemany("insert into openings(hash, base, eco, lang, opening, variation) values (?, ?, ?, ?, ?, ?)", rows)
conn.commit()
print("Max ply was %s" % ply_max)
# Several eco list contains only eco+name pairs, so
# we will use base ECO line positions from en eco.pgn
print("processing en eco.pgn")
feed("lang/en/eco.pgn", "en")
for lang in [d for d in os.listdir("lang") if os.path.isdir("lang/"+d)]:
if lang == "en":
continue
pgnfile = "lang/%s/eco.pgn" % lang
if os.path.isfile(pgnfile):
print("processing %s eco.pgn" % lang)
feed(pgnfile, lang)
conn.close()