-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaddPleiadesURIfolder.py
165 lines (107 loc) · 3.95 KB
/
addPleiadesURIfolder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import re
from pathlib import Path
import xml.etree.ElementTree as ET
import datetime
import csv
import os
# Classes
class GettyEntry:
def __init__(self, gettyURL, pleiadesURL):
self.gettyURL = gettyURL
self.pleiadesURL = pleiadesURL
# Attributes
initKey = ''
importedCSV = []
choiceIsCorr = True
isXML = True
isPath = True
##############################
### Import concordance CSV ###
##############################
# Status
print('Status: Program started', datetime.datetime.now())
try:
##################################
### Importpath concordance csv ###
##################################
# Change Path to tgn-pleiades concordance?
choice = input('Do you want to change de default path to csv file? (y/n) ')
while choiceIsCorr:
if 'y' in choice:
# Set new for csvFile
CSVfile = Path(input('Enter path to csv file: '))
choiceIsCorr = False
elif 'n' in choice:
CSVfile = Path('../pleiades-tgn-master/leiades-tgn.csv')
choiceIsCorr = False
elif not ('y' or 'n') in choice:
print('Please enter \'y\' for yes or \'n\' for no.')
############################
### Importpath xml files ###
############################
# Test if path contains xmlFile
while isPath:
# Import
importFolder = Path(input('Enter input folder: '))
if '.xml' in str(importFolder):
print('Please enter a folder path, not a file path. For further information see README.md.')
else:
isPath = False
isPath = True
# Save filenames
files = os.listdir(importFolder)
###########################
### Exportpath xml file ###
###########################
# Export
while isPath:
exportFolder = Path(input('Enter different output folder: '))
if '.xml' in str(exportFolder):
print('Please enter a folder path, not a file path. For further information see README.md.')
elif importFolder == exportFolder:
print('Please enter different folder.')
else:
isPath = False
##################
### Import CSV ###
##################
# import csvFile
with open(CSVfile, 'r', encoding='utf-8') as csvFile:
csvFile = csv.reader(csvFile)
# save ID's in list
for line in csvFile:
entry = GettyEntry(line[0], line[1])
importedCSV.append(entry)
### for every xmlFile in directory
for XMLfile in files:
if '.xml' in str(XMLfile):
# open xmlFile
tree = ET.parse(importFolder / XMLfile)
root = tree.getroot()
#####################
### Parse xmlFile ###
#####################
for element in root.findall('.//*[@key]'):
# test if key = tgn no
if 'tgn' in element.get('key', ''):
# extract getty no from key attribute
# '\d' does not work, match-function not supported
key = re.search(r'(tgn,)(.*)', element.get('key', '')).group(2)
# create getty URI
key = 'http://vocab.getty.edu/tgn/' + key
# compair getty URI with csvFile
for entry in importedCSV:
# if getty URI is in csv add getty URI and pleiades URI
if key in entry.gettyURL:
# add getty URI als ref attribute
element.set('ref', entry.pleiadesURL)
##########################
### Export new xmlFile ###
##########################
exportFile = exportFolder / XMLfile
tree.write(exportFile)
print('Status: ' + XMLfile + ' exported', datetime.datetime.now())
except FileNotFoundError as fnfError:
print(fnfError)
except ValueError as valueError:
print(valueError)