This repository has been archived by the owner on Aug 3, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
csv2marc.py
executable file
·128 lines (110 loc) · 3.32 KB
/
csv2marc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import csv, os, sys
from pymarc import Field, MARCWriter, Record
class CSV2MARC (object):
"""
Converts CSV to MARC records.
"""
def __init__(self):
"""
Load the CSV file.
"""
if len(sys.argv) > 1:
filepath = sys.argv[1]
else:
raise Exception(
"You need to provide a file path to the CSV file as an argument."
)
try:
self.reader = csv.reader(
open(filepath, "r"),
delimiter = ","
)
except IOError:
print >>sys.stderr, "Cannot open {0}".format(filepath)
raise SystemExit
output = "{0}.mrc".format(os.path.splitext(filepath)[0])
self.file = open(output, "w")
# State variables
self.sysno = False
self.record = False
self.field = False
self.fieldTag = False
self.fieldTagOccurrence = False
self.subfieldLabel = False
self.subfieldLabelOccurrence = False
self.line = False
def checkFieldChange(self, fieldTag, fieldTagOccurrence):
if (self.fieldTag != fieldTag) or ((self.fieldTag == fieldTag) and (self.fieldTagOccurrence != fieldTagOccurrence)):
return True
else:
return False
def checkRecordChange(self, sysno):
if not (sysno == self.sysno):
return True
else:
return False
def writeMARCRecord(self, record):
writer = MARCWriter(self.file)
writer.write(record)
def getNewRecord(self, sysno):
self.sysno = sysno
self.record = Record()
def getNewField(self, line):
self.fieldTag = line["fieldTag"]
self.fieldTagOccurrence = line["fieldTagOccurrence"]
if line["subfieldLabel"]:
# Normal field
self.field = Field(
tag = line["fieldTag"],
indicators = [
line["indicator1"],
line["indicator2"]
]
)
else:
# Datafield
self.field = Field(
tag = line["fieldTag"],
data = line["value"]
)
def main(self):
for line in self.reader:
# Parse the line
line = {
"sysno" : line[0],
"fieldTag" : line[1],
"fieldTagOccurrence" : line[2],
"indicator1" : line[3],
"indicator2" : line[4],
"subfieldLabel" : line[5],
"subfieldLabelOccurrence" : line[6],
"value" : line[7],
}
if not self.sysno:
self.getNewRecord(line["sysno"])
if self.checkRecordChange(line["sysno"]):
self.record.add_field(self.field) # Add the last field of the previous record
self.field = False # Remove the last field of the previous record
self.fieldTag = False
self.writeMARCRecord(self.record)
self.getNewRecord(line["sysno"])
if not self.fieldTag:
self.getNewField(line)
if self.checkFieldChange(line["fieldTag"], line["fieldTagOccurrence"]):
self.record.add_field(self.field)
self.getNewField(line)
if line["subfieldLabel"]: # If we have a subfield
self.field.add_subfield(
line["subfieldLabel"],
line["value"]
)
self.record.add_field(self.field) # Write the last field
self.writeMARCRecord(self.record) # Write the last record after the iteration has ended
self.file.close()
def main():
c2m = CSV2MARC()
c2m.main()
if __name__ == "__main__":
main()