-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconcat_no_dup.py
35 lines (30 loc) · 1.09 KB
/
concat_no_dup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# List of input text files
input_files = [
"output/60-terms/output.1.txt",
"output/60-terms/output.2.txt",
"output/60-terms/output.3.txt",
"output/60-terms/output.4.txt",
"output/60-terms/output.5.txt",
"output/60-terms/output.6.txt",
"output/60-terms/output.7.txt",
"output/60-terms/output.8.txt",
"output/60-terms/output.9.txt",
"output/60-terms/output.10.txt"]
# Initialize a set to store unique lines
unique_lines = set()
# Iterate through each input file
for file_name in input_files:
try:
with open(file_name, "r") as file:
# Read each line and add it to the set
for line in file:
unique_lines.add(line.strip())
except FileNotFoundError:
print(f"File not found: {file_name}")
# Output file name
output_file = "output/60-terms/concat_1-10.txt"
# Write the unique lines to the output file
with open(output_file, "w") as output:
for line in unique_lines:
output.write(line + "\n")
print(f"Concatenated and removed duplicates from {len(input_files)} files. Result saved in {output_file}")