-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuildBenchmarkJson.py
42 lines (35 loc) · 1.37 KB
/
buildBenchmarkJson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import csv
import json
# Define the input and output file names
input_file = 'data/experiences.csv'
# output_file = 'data/benchmarkLEs.json'
# benchmark_file = 'data/benchmarkLEs.csv'
output_file = 'data/all_lexp.json'
benchmark_file = 'output/30-terms/output.1.txt'
benchmarkData = dict()
# Initialize a set to store LEs
benchmarkLEs = []
with open(benchmark_file, "r") as file:
# Read each line and add it to the set
for line in file:
benchmarkLEs.append(line.strip())
# Open the input CSV file for reading
with open(input_file, 'r', newline='') as csv_file:
# Create a CSV reader
csv_reader = csv.reader(csv_file)
# Open the output text file for writing
with open(output_file, 'w') as txt_file:
# Iterate through each row in the CSV file
for row in csv_reader:
# Check if there is at least a 4th column value
if len(row) >= 4:
lexp = row[1]
if (lexp in benchmarkLEs):
# output all lexps
# if True:
# Replace newline characters with full stops
text = row[3].replace('\n', ' ').replace('\r', '').replace(' ', ' ').strip()
benchmarkData[lexp] = text
txt_file.write(json.dumps(benchmarkData, indent=4))
print(f'Saved JSON to {output_file}.')
# print(json.dumps(benchmarkData, indent=4))