-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreatejsonl.py
31 lines (25 loc) · 993 Bytes
/
createjsonl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import os
import json
# Define the folder path containing the text files
folder_path = "/Users/bbc 2/business"
# Define the output JSONL file name
output_file = "output.jsonl"
# Function to read text from files and generate JSONL format
def generate_jsonl(folder_path, output_file):
jsonl_data = []
# Iterate over each file in the folder
for filename in os.listdir(folder_path):
if filename.endswith(".txt"):
file_path = os.path.join(folder_path, filename)
with open(file_path, 'r', encoding='utf-8') as file:
text = file.read().strip()
text2 = text.strip()[:100]
# Append the text to jsonl_data
jsonl_data.append({"input_text": text,"out_text":text2})
# Write JSONL data to output file
with open(output_file, 'w') as f:
for line in jsonl_data:
json.dump(line, f)
f.write('\n')
# Call the function
generate_jsonl(folder_path, output_file)