Skip to content

Commit

Permalink
adding gzip option to jsonl file iterator
Browse files Browse the repository at this point in the history
  • Loading branch information
EvanDietzMorris committed Dec 11, 2023
1 parent 63ef381 commit 58a0b29
Showing 1 changed file with 4 additions and 5 deletions.
9 changes: 4 additions & 5 deletions Common/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
import logging
import tarfile
import csv
import gzip
import requests
import pandas as pd
Expand Down Expand Up @@ -569,12 +568,12 @@ def quick_json_loads(item):
return orjson.loads(item)


def quick_jsonl_file_iterator(json_file):
with open(json_file, 'r', encoding='utf-8') as stream:
for line in stream:
def quick_jsonl_file_iterator(json_file, is_gzip=False):
with gzip.open(json_file, 'rt') if is_gzip \
else open(json_file, 'r', encoding='utf-8') as fp:
for line in fp:
yield orjson.loads(line)


def chunk_iterator(iterable, chunk_size):
iterator = iter(iterable)
while True:
Expand Down

0 comments on commit 58a0b29

Please sign in to comment.