Skip to content

Commit

Permalink
removed comments, refactored tests
Browse files Browse the repository at this point in the history
  • Loading branch information
JSabadin committed Jan 9, 2025
1 parent c8d203f commit f975549
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 30 deletions.
2 changes: 1 addition & 1 deletion luxonis_ml/data/datasets/luxonis_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def _save_df_offline(self, pl_df: pl.DataFrame) -> None:

rows = pl_df.to_dicts()

with ParquetFileManager(annotations_path, num_rows=100_000_000) as pfm:
with ParquetFileManager(annotations_path) as pfm:
for row in rows:
uuid_val = row.get("uuid")
if uuid_val is None:
Expand Down
2 changes: 1 addition & 1 deletion luxonis_ml/utils/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def put_dir(
remote_dir: PathType,
uuid_dict: Optional[Dict[str, str]] = None,
mlflow_instance: Optional[ModuleType] = None,
copy_contents: bool = False, # New argument
copy_contents: bool = False,
) -> Optional[Dict[str, str]]:
"""Uploads files to remote storage.
Expand Down
128 changes: 100 additions & 28 deletions tests/test_data/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,44 @@ def generator():
)


def test_clone_and_merge_dataset(tempdir: Path, bucket_storage: BucketStorage):
def test_clone_dataset(tempdir: Path, bucket_storage: BucketStorage):
dataset1_name = "test_clone"
dataset1 = LuxonisDataset(
dataset1_name,
bucket_storage=bucket_storage,
delete_existing=True,
delete_remote=True,
)

def generator1():
for i in range(3):
img = create_image(i, tempdir)
yield {
"file": img,
"annotation": {
"class": "person",
"boundingbox": {"x": 0.1, "y": 0.1, "w": 0.1, "h": 0.1},
},
}

dataset1.add(generator1())
dataset1.make_splits({"train": 0.6, "val": 0.4})

cloned_dataset1 = dataset1.clone(
new_dataset_name=dataset1_name + "_cloned"
)

assert cloned_dataset1.get_splits() == dataset1.get_splits()
assert cloned_dataset1.get_classes() == dataset1.get_classes()
assert cloned_dataset1.get_task_names() == dataset1.get_task_names()
assert cloned_dataset1.get_skeletons() == dataset1.get_skeletons()

df_cloned = cloned_dataset1._load_df_offline()
df_original = dataset1._load_df_offline()
assert df_cloned.equals(df_original)


def test_merge_datasets_inplace(tempdir: Path, bucket_storage: BucketStorage):
dataset1_name = "test_merge_1"
dataset1 = LuxonisDataset(
dataset1_name,
Expand Down Expand Up @@ -535,44 +572,79 @@ def generator2():
dataset2.add(generator2())
dataset2.make_splits({"train": 0.6, "val": 0.4})

cloned_datset1 = dataset1.clone(new_dataset_name=dataset1_name + "_cloned")
cloned_dataset1 = dataset1.clone(
new_dataset_name=dataset1_name + "_cloned"
)
cloned_dataset1_merged_with_dataset2 = cloned_dataset1.merge_with(
dataset2, inplace=True
)

assert cloned_datset1.get_splits() == dataset1.get_splits()
assert cloned_datset1.get_classes() == dataset1.get_classes()
assert cloned_datset1.get_task_names() == dataset1.get_task_names()
assert cloned_datset1.get_skeletons() == dataset1.get_skeletons()
all_classes, _ = cloned_dataset1_merged_with_dataset2.get_classes()
assert set(all_classes) == {"person", "dog"}

df_cloned = cloned_datset1._load_df_offline()
df_original = dataset1._load_df_offline()
assert df_cloned.equals(df_original)
df_cloned_merged = cloned_dataset1_merged_with_dataset2._load_df_offline()
df_merged = dataset1.merge_with(dataset2, inplace=False)._load_df_offline()
assert df_cloned_merged.equals(df_merged)

cloned_datset1_merged_with_dataset2 = cloned_datset1.merge_with(
dataset2, inplace=True

def test_merge_datasets_out_of_place(
tempdir: Path, bucket_storage: BucketStorage
):
dataset1_name = "test_merge_1"
dataset1 = LuxonisDataset(
dataset1_name,
bucket_storage=bucket_storage,
delete_existing=True,
delete_remote=True,
)

def generator1():
for i in range(3):
img = create_image(i, tempdir)
yield {
"file": img,
"annotation": {
"class": "person",
"boundingbox": {"x": 0.1, "y": 0.1, "w": 0.1, "h": 0.1},
},
}

dataset1.add(generator1())
dataset1.make_splits({"train": 0.6, "val": 0.4})

dataset2_name = "test_merge_2"
dataset2 = LuxonisDataset(
dataset2_name,
bucket_storage=bucket_storage,
delete_existing=True,
delete_remote=True,
)

def generator2():
for i in range(3, 6):
img = create_image(i, tempdir)
yield {
"file": img,
"annotation": {
"class": "dog",
"boundingbox": {"x": 0.2, "y": 0.2, "w": 0.2, "h": 0.2},
},
}

dataset2.add(generator2())
dataset2.make_splits({"train": 0.6, "val": 0.4})

dataset1_merged_with_dataset2 = dataset1.merge_with(
dataset2,
inplace=False,
new_dataset_name=dataset1_name + "_" + dataset2_name + "_merged",
)

assert (
dataset1_merged_with_dataset2.get_splits()
== cloned_datset1_merged_with_dataset2.get_splits()
)
assert (
dataset1_merged_with_dataset2.get_classes()
== cloned_datset1_merged_with_dataset2.get_classes()
)
assert (
dataset1_merged_with_dataset2.get_task_names()
== cloned_datset1_merged_with_dataset2.get_task_names()
)
assert (
dataset1_merged_with_dataset2.get_skeletons()
== cloned_datset1_merged_with_dataset2.get_skeletons()
)
all_classes, _ = dataset1_merged_with_dataset2.get_classes()
assert set(all_classes) == {"person", "dog"}

df_merged = dataset1_merged_with_dataset2._load_df_offline()
df_cloned_merged = cloned_datset1_merged_with_dataset2._load_df_offline()
df_cloned_merged = dataset1.merge_with(
dataset2, inplace=True
)._load_df_offline()
assert df_merged.equals(df_cloned_merged)

0 comments on commit f975549

Please sign in to comment.