From 90ea8b60ecef7eef75e42e231745abca005254e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= Date: Thu, 3 Oct 2024 22:41:51 +0200 Subject: [PATCH] Formatting (#179) --- .pre-commit-config.yaml | 3 +- examples/Data_Custom_Example.ipynb | 8 +- examples/Data_Parser_Example.ipynb | 8 +- examples/Embeddings_LDF_Qdrant_Example.ipynb | 20 ++- .../Embeddings_LDF_Weaviate_Example.ipynb | 44 +++-- examples/Embeddings_Processing_Example.ipynb | 20 ++- examples/utils/data_utils.py | 12 +- examples/utils/torch_utils.py | 21 ++- luxonis_ml/data/__main__.py | 4 +- .../data/augmentations/batch_compose.py | 66 +++++-- .../data/augmentations/batch_processors.py | 37 ++-- .../data/augmentations/batch_transform.py | 4 +- luxonis_ml/data/augmentations/batch_utils.py | 23 ++- .../augmentations/custom/letterbox_resize.py | 57 ++++-- luxonis_ml/data/augmentations/custom/mixup.py | 58 +++--- .../data/augmentations/custom/mosaic.py | 107 ++++++++---- luxonis_ml/data/augmentations/utils.py | 131 +++++++++----- luxonis_ml/data/datasets/annotation.py | 121 +++++++++---- luxonis_ml/data/datasets/base_dataset.py | 64 ++++--- luxonis_ml/data/datasets/luxonis_dataset.py | 165 +++++++++++++----- luxonis_ml/data/datasets/source.py | 24 ++- luxonis_ml/data/datasets/utils.py | 19 +- luxonis_ml/data/loaders/__init__.py | 7 +- luxonis_ml/data/loaders/base_loader.py | 9 +- luxonis_ml/data/loaders/luxonis_loader.py | 46 +++-- luxonis_ml/data/parsers/base_parser.py | 55 +++--- .../classification_directory_parser.py | 17 +- luxonis_ml/data/parsers/coco_parser.py | 28 ++- luxonis_ml/data/parsers/create_ml_parser.py | 28 ++- luxonis_ml/data/parsers/darknet_parser.py | 21 ++- luxonis_ml/data/parsers/luxonis_parser.py | 87 +++++---- .../segmentation_mask_directory_parser.py | 13 +- luxonis_ml/data/parsers/solo_parser.py | 78 ++++++--- .../data/parsers/tensorflow_csv_parser.py | 23 ++- luxonis_ml/data/parsers/voc_parser.py | 15 +- luxonis_ml/data/parsers/yolov4_parser.py | 16 +- luxonis_ml/data/parsers/yolov6_parser.py | 8 +- luxonis_ml/data/requirements.txt | 2 +- luxonis_ml/data/utils/data_utils.py | 20 ++- luxonis_ml/data/utils/enums.py | 8 +- luxonis_ml/data/utils/parquet.py | 11 +- luxonis_ml/data/utils/visualizations.py | 23 ++- luxonis_ml/embeddings/methods/OOD.py | 16 +- luxonis_ml/embeddings/methods/duplicate.py | 54 +++--- luxonis_ml/embeddings/methods/mistakes.py | 12 +- .../embeddings/methods/representative.py | 45 +++-- luxonis_ml/embeddings/utils/embedding.py | 1 + luxonis_ml/embeddings/utils/ldf.py | 30 ++-- luxonis_ml/embeddings/utils/model.py | 11 +- luxonis_ml/embeddings/utils/qdrant.py | 121 ++++++++----- luxonis_ml/embeddings/utils/vectordb.py | 19 +- luxonis_ml/embeddings/utils/weaviate.py | 75 +++++--- luxonis_ml/nn_archive/__init__.py | 8 +- luxonis_ml/nn_archive/__main__.py | 12 +- luxonis_ml/nn_archive/archive_generator.py | 12 +- luxonis_ml/nn_archive/config.py | 11 +- .../base_models/head.py | 12 +- .../base_models/head_metadata.py | 69 +++++--- .../base_models/input.py | 23 ++- .../config_building_blocks/enums/data_type.py | 3 +- luxonis_ml/nn_archive/model.py | 4 +- luxonis_ml/nn_archive/utils.py | 3 +- luxonis_ml/tracker/tracker.py | 73 +++++--- luxonis_ml/utils/__main__.py | 3 +- luxonis_ml/utils/config.py | 42 +++-- luxonis_ml/utils/environ.py | 4 +- luxonis_ml/utils/filesystem.py | 122 +++++++------ luxonis_ml/utils/logging.py | 41 +++-- luxonis_ml/utils/registry.py | 13 +- pyproject.toml | 29 ++- tests/test_augmentations/test_letterbox.py | 4 +- tests/test_augmentations/test_mosaic.py | 15 +- tests/test_data/test_annotations.py | 10 +- tests/test_data/test_dataset.py | 57 ++++-- tests/test_data/test_task_ingestion.py | 20 ++- tests/test_nn_archive/test_nn_archive.py | 23 ++- tests/test_utils/test_config.py | 46 +++-- tests/test_utils/test_filesystem.py | 31 +++- 78 files changed, 1763 insertions(+), 842 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2dfb73ff..0131f45d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.8 + rev: v0.6.4 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] @@ -21,4 +21,3 @@ repos: - id: mdformat additional_dependencies: - mdformat-gfm - - mdformat-toc diff --git a/examples/Data_Custom_Example.ipynb b/examples/Data_Custom_Example.ipynb index 42ca2c9b..63f5aaf9 100644 --- a/examples/Data_Custom_Example.ipynb +++ b/examples/Data_Custom_Example.ipynb @@ -240,7 +240,9 @@ " {\n", " \"person\": {\n", " \"labels\": data[\"categories\"][0][\"keypoints\"],\n", - " \"edges\": (np.array(data[\"categories\"][0][\"skeleton\"]) - 1).tolist(),\n", + " \"edges\": (\n", + " np.array(data[\"categories\"][0][\"skeleton\"]) - 1\n", + " ).tolist(),\n", " }\n", " },\n", " task=\"keypoints\",\n", @@ -307,7 +309,9 @@ " for kp in kps:\n", " kp = kp[1:].reshape(-1, 3)\n", " for k in kp:\n", - " cv2.circle(image, (int(k[0] * w), int(k[1] * h)), 2, (0, 255, 0), 2)\n", + " cv2.circle(\n", + " image, (int(k[0] * w), int(k[1] * h)), 2, (0, 255, 0), 2\n", + " )\n", "\n", " plt.imshow(image)\n", " plt.axis(\"off\") # Optional: Hide axis\n", diff --git a/examples/Data_Parser_Example.ipynb b/examples/Data_Parser_Example.ipynb index aaeb7243..051a4c2e 100644 --- a/examples/Data_Parser_Example.ipynb +++ b/examples/Data_Parser_Example.ipynb @@ -82,7 +82,9 @@ "outputs": [], "source": [ "dataset_name = \"coco_test\"\n", - "parser = LuxonisParser(dataset_dir, dataset_name=dataset_name, delete_existing=True)\n", + "parser = LuxonisParser(\n", + " dataset_dir, dataset_name=dataset_name, delete_existing=True\n", + ")\n", "dataset = parser.parse(random_split=True)" ] }, @@ -125,7 +127,9 @@ " for kp in kps:\n", " kp = kp[1:].reshape(-1, 3)\n", " for k in kp:\n", - " cv2.circle(image, (int(k[0] * w), int(k[1] * h)), 2, (0, 255, 0), 2)\n", + " cv2.circle(\n", + " image, (int(k[0] * w), int(k[1] * h)), 2, (0, 255, 0), 2\n", + " )\n", "\n", " plt.imshow(image)\n", " plt.axis(\"off\") # Optional: Hide axis\n", diff --git a/examples/Embeddings_LDF_Qdrant_Example.ipynb b/examples/Embeddings_LDF_Qdrant_Example.ipynb index d0763d3e..46002b3d 100644 --- a/examples/Embeddings_LDF_Qdrant_Example.ipynb +++ b/examples/Embeddings_LDF_Qdrant_Example.ipynb @@ -60,7 +60,9 @@ "outputs": [], "source": [ "# Load the data\n", - "data_loader = load_mnist_data(save_path=\"./data/mnist\", num_samples=640, batch_size=64)" + "data_loader = load_mnist_data(\n", + " save_path=\"./data/mnist\", num_samples=640, batch_size=64\n", + ")" ] }, { @@ -108,7 +110,9 @@ "outputs": [], "source": [ "# Load the data\n", - "data_loader = load_mnist_data(save_path=\"./data/mnist\", num_samples=640, batch_size=64)" + "data_loader = load_mnist_data(\n", + " save_path=\"./data/mnist\", num_samples=640, batch_size=64\n", + ")" ] }, { @@ -159,7 +163,9 @@ " and \"CUDAExecutionProvider\" in onnxruntime.get_available_providers()\n", " else None\n", ")\n", - "ort_session = onnxruntime.InferenceSession(\"./data/resnet50-1.onnx\", providers=provider)\n", + "ort_session = onnxruntime.InferenceSession(\n", + " \"./data/resnet50-1.onnx\", providers=provider\n", + ")\n", "\n", "# Extract embeddings from the dataset\n", "embeddings, labels = extract_embeddings_onnx(\n", @@ -420,7 +426,9 @@ " and \"CUDAExecutionProvider\" in onnxruntime.get_available_providers()\n", " else None\n", ")\n", - "ort_session = onnxruntime.InferenceSession(\"./data/resnet50-1.onnx\", providers=provider)" + "ort_session = onnxruntime.InferenceSession(\n", + " \"./data/resnet50-1.onnx\", providers=provider\n", + ")" ] }, { @@ -446,7 +454,9 @@ "\n", "# Create a collection\n", "qdrant_api.create_collection(\n", - " collection_name=\"Mnist_LDF\", properties=[\"label\", \"image_path\"], vector_size=2048\n", + " collection_name=\"Mnist_LDF\",\n", + " properties=[\"label\", \"image_path\"],\n", + " vector_size=2048,\n", ")" ] }, diff --git a/examples/Embeddings_LDF_Weaviate_Example.ipynb b/examples/Embeddings_LDF_Weaviate_Example.ipynb index a757f16e..e15ac548 100644 --- a/examples/Embeddings_LDF_Weaviate_Example.ipynb +++ b/examples/Embeddings_LDF_Weaviate_Example.ipynb @@ -71,7 +71,9 @@ "outputs": [], "source": [ "# Load the data\n", - "data_loader = load_mnist_data(save_path=\"./data/mnist\", num_samples=640, batch_size=64)" + "data_loader = load_mnist_data(\n", + " save_path=\"./data/mnist\", num_samples=640, batch_size=64\n", + ")" ] }, { @@ -119,7 +121,9 @@ "outputs": [], "source": [ "# Load the data\n", - "data_loader = load_mnist_data(save_path=\"./data/mnist\", num_samples=640, batch_size=64)" + "data_loader = load_mnist_data(\n", + " save_path=\"./data/mnist\", num_samples=640, batch_size=64\n", + ")" ] }, { @@ -170,7 +174,9 @@ " and \"CUDAExecutionProvider\" in onnxruntime.get_available_providers()\n", " else None\n", ")\n", - "ort_session = onnxruntime.InferenceSession(\"./data/resnet50-1.onnx\", providers=provider)\n", + "ort_session = onnxruntime.InferenceSession(\n", + " \"./data/resnet50-1.onnx\", providers=provider\n", + ")\n", "\n", "# Extract embeddings from the dataset\n", "embeddings, labels = extract_embeddings_onnx(\n", @@ -232,7 +238,9 @@ "# Insert the embeddings into the collection\n", "uuids = [str(uuid.uuid5(uuid.NAMESPACE_DNS, str(e))) for e in embeddings]\n", "label_list_dict = [{\"label\": label} for label in labels]\n", - "weaviate_api.insert_embeddings(uuids, embeddings, label_list_dict, batch_size=50)" + "weaviate_api.insert_embeddings(\n", + " uuids, embeddings, label_list_dict, batch_size=50\n", + ")" ] }, { @@ -254,8 +262,12 @@ ], "source": [ "# Search for the nearest neighbors\n", - "search_uuids, scores = weaviate_api.search_similar_embeddings(embeddings[0], top_k=5)\n", - "payloads = weaviate_api.retrieve_payloads_by_ids(search_uuids, properties=[\"label\"])\n", + "search_uuids, scores = weaviate_api.search_similar_embeddings(\n", + " embeddings[0], top_k=5\n", + ")\n", + "payloads = weaviate_api.retrieve_payloads_by_ids(\n", + " search_uuids, properties=[\"label\"]\n", + ")\n", "\n", "# Print the search results\n", "for u, p in zip(search_uuids, payloads):\n", @@ -414,7 +426,9 @@ " and \"CUDAExecutionProvider\" in onnxruntime.get_available_providers()\n", " else None\n", ")\n", - "ort_session = onnxruntime.InferenceSession(\"./data/resnet50-1.onnx\", providers=provider)" + "ort_session = onnxruntime.InferenceSession(\n", + " \"./data/resnet50-1.onnx\", providers=provider\n", + ")" ] }, { @@ -555,8 +569,12 @@ ], "source": [ "# Search for the nearest neighbors\n", - "search_uuids, scores = weaviate_api.search_similar_embeddings(first_emb, top_k=5)\n", - "payloads = weaviate_api.retrieve_payloads_by_ids(search_uuids, properties=[\"label\"])\n", + "search_uuids, scores = weaviate_api.search_similar_embeddings(\n", + " first_emb, top_k=5\n", + ")\n", + "payloads = weaviate_api.retrieve_payloads_by_ids(\n", + " search_uuids, properties=[\"label\"]\n", + ")\n", "\n", "# Print the search results\n", "for u, p, s in zip(search_uuids, payloads, scores):\n", @@ -743,7 +761,9 @@ "source": [ "# Setup Weaviate\n", "weaviate_api = WeaviateAPI(\"http://localhost:8080\")\n", - "weaviate_api.create_collection(collection_name=\"Mnist_LFS\", properties=[\"image_path\"])" + "weaviate_api.create_collection(\n", + " collection_name=\"Mnist_LFS\", properties=[\"image_path\"]\n", + ")" ] }, { @@ -784,7 +804,9 @@ ], "source": [ "# Search for the nearest neighbors\n", - "search_uuids, scores = weaviate_api.search_similar_embeddings(embeddings[0], top_k=5)\n", + "search_uuids, scores = weaviate_api.search_similar_embeddings(\n", + " embeddings[0], top_k=5\n", + ")\n", "\n", "# Print the search results\n", "for u, s in zip(search_uuids, scores):\n", diff --git a/examples/Embeddings_Processing_Example.ipynb b/examples/Embeddings_Processing_Example.ipynb index f227532e..5e5bda3b 100644 --- a/examples/Embeddings_Processing_Example.ipynb +++ b/examples/Embeddings_Processing_Example.ipynb @@ -103,7 +103,9 @@ "source": [ "desired_size = int(len(embeddings) * 0.05)\n", "# desired_size = 10\n", - "selected_image_indices = find_representative_kmedoids(similarity_matrix, desired_size)\n", + "selected_image_indices = find_representative_kmedoids(\n", + " similarity_matrix, desired_size\n", + ")\n", "# selected_image_indices = find_representative_greedy_qdrant(qdrant_client, desired_size, 0, \"mnist3\")" ] }, @@ -356,7 +358,9 @@ "metadata": {}, "outputs": [], "source": [ - "mis_img_paths = qdrant_api.retrieve_payloads_by_ids(missing_img_uuids, [\"image_path\"])\n", + "mis_img_paths = qdrant_api.retrieve_payloads_by_ids(\n", + " missing_img_uuids, [\"image_path\"]\n", + ")\n", "mis_img_paths = [x[\"image_path\"] for x in mis_img_paths]" ] }, @@ -462,10 +466,14 @@ "# find representative images\n", "desired_size = int(len(embeddings) * 0.05)\n", "similarity_matrix = calculate_similarity_matrix(embeddings)\n", - "selected_image_indices = find_representative_kmedoids(similarity_matrix, desired_size)\n", + "selected_image_indices = find_representative_kmedoids(\n", + " similarity_matrix, desired_size\n", + ")\n", "\n", "selcted_ids = np.array(ids)[selected_image_indices].tolist()\n", - "represent_img_paths = w_api.retrieve_payloads_by_ids(selcted_ids, [\"image_path\"])\n", + "represent_img_paths = w_api.retrieve_payloads_by_ids(\n", + " selcted_ids, [\"image_path\"]\n", + ")\n", "represent_img_paths = [x[\"image_path\"] for x in represent_img_paths]\n", "\n", "# plot\n", @@ -626,7 +634,9 @@ "mis_ix, new_y = find_mismatches_centroids(X, y)\n", "\n", "missing_img_uuids = np.array(ids)[mis_ix].tolist()\n", - "mis_img_paths = w_api.retrieve_payloads_by_ids(missing_img_uuids, [\"image_path\"])\n", + "mis_img_paths = w_api.retrieve_payloads_by_ids(\n", + " missing_img_uuids, [\"image_path\"]\n", + ")\n", "mis_img_paths = [x[\"image_path\"] for x in mis_img_paths]\n", "\n", "# plot\n", diff --git a/examples/utils/data_utils.py b/examples/utils/data_utils.py index 21be6c43..8931350f 100644 --- a/examples/utils/data_utils.py +++ b/examples/utils/data_utils.py @@ -27,6 +27,7 @@ Note: This loader is particularly useful when you want to use MNIST data with models that were pre-trained on datasets like ImageNet and expect 3-channel RGB input. """ + import torch import torchvision import torchvision.transforms as transforms @@ -35,7 +36,8 @@ def mnist_transformations() -> transforms.Compose: """Returns composed transformations for the MNIST dataset. - Transforms the images from 1 channel grayscale to 3 channels RGB and resizes them. + Transforms the images from 1 channel grayscale to 3 channels RGB and + resizes them. """ return transforms.Compose( [ @@ -43,7 +45,9 @@ def mnist_transformations() -> transforms.Compose: transforms.Lambda(lambda x: x.convert("RGB")), transforms.Resize((224, 224)), transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ), ] ) @@ -70,7 +74,9 @@ def load_mnist_data( ) # If num_samples is set to -1, use the entire dataset - num_samples = min(num_samples, len(dataset)) if num_samples != -1 else len(dataset) + num_samples = ( + min(num_samples, len(dataset)) if num_samples != -1 else len(dataset) + ) # Create a subset of the dataset using Subset class subset = torch.utils.data.Subset(dataset, torch.arange(num_samples)) diff --git a/examples/utils/torch_utils.py b/examples/utils/torch_utils.py index cdfe5164..5685bb38 100644 --- a/examples/utils/torch_utils.py +++ b/examples/utils/torch_utils.py @@ -11,8 +11,8 @@ # PyTorch and ONNX model loading and exporting functions def load_model_resnet50(discard_last_layer: bool) -> nn.Module: - """Load a pre-trained ResNet-50 model with the last fully connected layer - removed.""" + """Load a pre-trained ResNet-50 model with the last fully connected + layer removed.""" model = models.resnet50(weights=resnet.ResNet50_Weights.IMAGENET1K_V1) if discard_last_layer: model = nn.Sequential( @@ -83,7 +83,9 @@ def save_embeddings( torch.save(labels, save_path + "labels.pth") -def load_embeddings(save_path: str = "./") -> Tuple[torch.Tensor, torch.Tensor]: +def load_embeddings( + save_path: str = "./", +) -> Tuple[torch.Tensor, torch.Tensor]: """Load embeddings and labels tensors from the specified path.""" embeddings = torch.load(save_path + "embeddings.pth") labels = torch.load(save_path + "labels.pth") @@ -99,18 +101,21 @@ def generate_new_embeddings( emb_batch_size: int = 64, transform: transforms.Compose = None, ): - """Generate embeddings for new images using a given ONNX runtime session. + """Generate embeddings for new images using a given ONNX runtime + session. @type img_paths: List[str] @param img_paths: List of image paths for new images. @type ort_session: L{InferenceSession} @param ort_session: ONNX runtime session. @type output_layer_name: str - @param output_layer_name: Name of the output layer in the ONNX model. + @param output_layer_name: Name of the output layer in the ONNX + model. @type emb_batch_size: int @param emb_batch_size: Batch size for generating embeddings. @type transform: torchvision.transforms - @param transform: Optional torchvision transform for preprocessing images. + @param transform: Optional torchvision transform for preprocessing + images. @rtype: List[List[float]] @return: List of embeddings for the new images. """ @@ -141,7 +146,9 @@ def generate_new_embeddings( batch_tensor = torch.stack(batch_tensors).cuda() # Run the ONNX model on the batch - ort_inputs = {ort_session.get_inputs()[0].name: batch_tensor.cpu().numpy()} + ort_inputs = { + ort_session.get_inputs()[0].name: batch_tensor.cpu().numpy() + } ort_outputs = ort_session.run([output_layer_name], ort_inputs) # Append the embeddings from the batch to the new_embeddings list diff --git a/luxonis_ml/data/__main__.py b/luxonis_ml/data/__main__.py index e22e0e97..d0e248a1 100644 --- a/luxonis_ml/data/__main__.py +++ b/luxonis_ml/data/__main__.py @@ -59,7 +59,9 @@ def get_dataset_info(name: str) -> Tuple[int, List[str], List[str]]: def print_info(name: str) -> None: dataset = LuxonisDataset(name) _, classes = dataset.get_classes() - table = Table(title="Classes", box=rich.box.ROUNDED, row_styles=["yellow", "cyan"]) + table = Table( + title="Classes", box=rich.box.ROUNDED, row_styles=["yellow", "cyan"] + ) table.add_column("Task", header_style="magenta i", max_width=30) table.add_column("Class Names", header_style="magenta i", max_width=50) for task, c in classes.items(): diff --git a/luxonis_ml/data/augmentations/batch_compose.py b/luxonis_ml/data/augmentations/batch_compose.py index 74596ad6..fbea3111 100644 --- a/luxonis_ml/data/augmentations/batch_compose.py +++ b/luxonis_ml/data/augmentations/batch_compose.py @@ -12,11 +12,19 @@ TransformsSeqType, get_always_apply, ) -from albumentations.core.keypoints_utils import KeypointParams, KeypointsProcessor +from albumentations.core.keypoints_utils import ( + KeypointParams, + KeypointsProcessor, +) from albumentations.core.utils import get_shape from .batch_processors import BboxBatchProcessor, KeypointsBatchProcessor -from .batch_utils import batch2list, concat_batches, list2batch, to_unbatched_name +from .batch_utils import ( + batch2list, + concat_batches, + list2batch, + to_unbatched_name, +) class Compose(BaseCompose): @@ -29,7 +37,8 @@ def __init__( p: float = 1.0, is_check_shapes: bool = True, ): - """Compose transforms and handle all transformations regarding bounding boxes. + """Compose transforms and handle all transformations regarding + bounding boxes. @param transforms: List of transformations to compose @type transforms: TransformsSeqType @@ -99,17 +108,23 @@ def _get_keypoints_processor(self, k_params, additional_targets): return KeypointsProcessor(k_params, additional_targets) @staticmethod - def _disable_check_args_for_transforms(transforms: TransformsSeqType) -> None: + def _disable_check_args_for_transforms( + transforms: TransformsSeqType, + ) -> None: for transform in transforms: if isinstance(transform, BaseCompose): - Compose._disable_check_args_for_transforms(transform.transforms) + Compose._disable_check_args_for_transforms( + transform.transforms + ) if isinstance(transform, Compose): transform._disable_check_args() def _disable_check_args(self) -> None: self.is_check_args = False - def __call__(self, *args, force_apply: bool = False, **data) -> Dict[str, Any]: + def __call__( + self, *args, force_apply: bool = False, **data + ) -> Dict[str, Any]: if args: raise KeyError( "You have to pass data to augmentations as named arguments, for example: aug(image=image)" @@ -123,7 +138,9 @@ def __call__(self, *args, force_apply: bool = False, **data) -> Dict[str, Any]: for p in self.processors.values(): p.ensure_data_valid(data) transforms = ( - self.transforms if need_to_run else get_always_apply(self.transforms) + self.transforms + if need_to_run + else get_always_apply(self.transforms) ) check_each_transform = any( @@ -148,7 +165,9 @@ def __call__(self, *args, force_apply: bool = False, **data) -> Dict[str, Any]: return data - def _check_data_post_transform(self, data: Dict[str, Any]) -> Dict[str, Any]: + def _check_data_post_transform( + self, data: Dict[str, Any] + ) -> Dict[str, Any]: rows, cols = get_shape(data["image"]) for p in self.processors.values(): @@ -206,7 +225,9 @@ def _check_args(self, additional_targets, **kwargs) -> None: internal_data_name = additional_targets.get(data_name, data_name) if internal_data_name in checked_single: if not isinstance(data, np.ndarray): - raise TypeError("{} must be numpy array type".format(data_name)) + raise TypeError( + "{} must be numpy array type".format(data_name) + ) shapes.append(data.shape[:2]) if internal_data_name in checked_multi: if data is not None: @@ -223,7 +244,11 @@ def _check_args(self, additional_targets, **kwargs) -> None: "bbox_params must be specified for bbox transformations" ) - if self.is_check_shapes and shapes and shapes.count(shapes[0]) != len(shapes): + if ( + self.is_check_shapes + and shapes + and shapes.count(shapes[0]) != len(shapes) + ): raise ValueError( "Height and Width of image, mask or masks should be equal. You can disable shapes check " "by setting a parameter is_check_shapes=False of Compose class (do it only if you are sure " @@ -249,10 +274,11 @@ def __init__( p: float = 1.0, is_check_shapes: bool = True, ): - """Compose designed to handle the multi-image transforms The contents can be a - subclass of `BatchBasedTransform` or other transforms enclosed by ForEach - container. All targets' names should have the suffix "_batch", ex - ("image_batch", "bboxes_batch"). Note this nameing rule is applied to the + """Compose designed to handle the multi-image transforms The + contents can be a subclass of `BatchBasedTransform` or other + transforms enclosed by ForEach container. All targets' names + should have the suffix "_batch", ex ("image_batch", + "bboxes_batch"). Note this nameing rule is applied to the `label_fields` of the `BboxParams` and the `KeypointsParams`. @param transforms: List of transformations to compose @@ -311,7 +337,9 @@ def _check_args(self, additional_targets, **kwargs) -> None: for data in datalist: super()._check_args(unbatched_targets, **data) - def _make_targets_contiguous(self, batched_data: Dict[str, Any]) -> Dict[str, Any]: + def _make_targets_contiguous( + self, batched_data: Dict[str, Any] + ) -> Dict[str, Any]: datalist = batch2list(batched_data) if len(datalist) == 0: return batched_data @@ -323,8 +351,8 @@ def _make_targets_contiguous(self, batched_data: Dict[str, Any]) -> Dict[str, An class ForEach(BaseCompose): - """Apply transforms for each batch element This expects batched input and can be - contained by the `BatchCompose`.""" + """Apply transforms for each batch element This expects batched + input and can be contained by the `BatchCompose`.""" def __init__(self, transforms: TransformsSeqType, p: float = 0.5): super().__init__(transforms, p) @@ -341,7 +369,9 @@ def __call__( batched_data = list2batch(processed) return batched_data - def add_targets(self, additional_targets: Optional[Dict[str, str]]) -> None: + def add_targets( + self, additional_targets: Optional[Dict[str, str]] + ) -> None: if additional_targets: unbatched_targets = { to_unbatched_name(k): to_unbatched_name(v) diff --git a/luxonis_ml/data/augmentations/batch_processors.py b/luxonis_ml/data/augmentations/batch_processors.py index 88ffa756..4e3faf80 100644 --- a/luxonis_ml/data/augmentations/batch_processors.py +++ b/luxonis_ml/data/augmentations/batch_processors.py @@ -2,7 +2,10 @@ from typing import Any, Dict, Optional, Sequence from albumentations.core.bbox_utils import BboxParams, BboxProcessor -from albumentations.core.keypoints_utils import KeypointParams, KeypointsProcessor +from albumentations.core.keypoints_utils import ( + KeypointParams, + KeypointsProcessor, +) from albumentations.core.utils import DataProcessor from .batch_utils import batch2list, list2batch, to_unbatched_name @@ -10,14 +13,16 @@ class BboxBatchProcessor(DataProcessor): def __init__( - self, params: BboxParams, additional_targets: Optional[Dict[str, str]] = None + self, + params: BboxParams, + additional_targets: Optional[Dict[str, str]] = None, ): """Data processor class to process bbox data in batches. @param params: Bbox parameters @type params: BboxParams - @param additional_targets: Additional targets of the transform. Defaults to - None. + @param additional_targets: Additional targets of the transform. + Defaults to None. @type additional_targets: Optional[Dict[str, str]] """ super().__init__(params, additional_targets) @@ -38,7 +43,9 @@ def ensure_data_valid(self, data: Dict[str, Any]) -> None: self.item_processor.ensure_data_valid(item) def postprocess(self, data: Dict[str, Any]) -> Dict[str, Any]: - processed = [self.item_processor.postprocess(item) for item in batch2list(data)] + processed = [ + self.item_processor.postprocess(item) for item in batch2list(data) + ] procesed_data = list2batch(processed) for k in data.keys(): data[k] = procesed_data[k] @@ -77,7 +84,9 @@ def convert_to_albumentations( def convert_from_albumentations( self, data: Sequence, rows: int, cols: int ) -> Sequence: - return self.item_processor.convert_from_albumentations(data, rows, cols) + return self.item_processor.convert_from_albumentations( + data, rows, cols + ) class KeypointsBatchProcessor(DataProcessor): @@ -90,8 +99,8 @@ def __init__( @param params: Keypoint parameters @type params: KeypointParams - @param additional_targets: Additional targets of the transform. Defaults to - None. + @param additional_targets: Additional targets of the transform. + Defaults to None. @type additional_targets: Optional[Dict[str, str]] """ super().__init__(params, additional_targets) @@ -101,7 +110,9 @@ def __init__( item_params.label_fields = [ to_unbatched_name(field) for field in label_fields ] - self.item_processor = KeypointsProcessor(item_params, additional_targets) + self.item_processor = KeypointsProcessor( + item_params, additional_targets + ) @property def default_data_name(self) -> str: @@ -112,7 +123,9 @@ def ensure_data_valid(self, data: Dict[str, Any]) -> None: self.item_processor.ensure_data_valid(item) def postprocess(self, data: Dict[str, Any]) -> Dict[str, Any]: - processed = [self.item_processor.postprocess(item) for item in batch2list(data)] + processed = [ + self.item_processor.postprocess(item) for item in batch2list(data) + ] procesed_data = list2batch(processed) for k in data.keys(): data[k] = procesed_data[k] @@ -151,4 +164,6 @@ def convert_to_albumentations( def convert_from_albumentations( self, data: Sequence, rows: int, cols: int ) -> Sequence: - return self.item_processor.convert_from_albumentations(data, rows, cols) + return self.item_processor.convert_from_albumentations( + data, rows, cols + ) diff --git a/luxonis_ml/data/augmentations/batch_transform.py b/luxonis_ml/data/augmentations/batch_transform.py index a5dd32ed..958f67f6 100644 --- a/luxonis_ml/data/augmentations/batch_transform.py +++ b/luxonis_ml/data/augmentations/batch_transform.py @@ -30,7 +30,9 @@ def targets(self) -> Dict[str, Callable]: "keypoints_batch": self.apply_to_keypoints_batch, } - def update_params(self, params: Dict[str, Any], **kwargs) -> Dict[str, Any]: + def update_params( + self, params: Dict[str, Any], **kwargs + ) -> Dict[str, Any]: # This overwrites the `super().update_params(...)` return params diff --git a/luxonis_ml/data/augmentations/batch_utils.py b/luxonis_ml/data/augmentations/batch_utils.py index 126d1c28..894c08f5 100644 --- a/luxonis_ml/data/augmentations/batch_utils.py +++ b/luxonis_ml/data/augmentations/batch_utils.py @@ -2,9 +2,12 @@ def batch2list(data: Dict[str, List]) -> List[Dict[str, Any]]: - """Convert from a batched target dict to list of normal target dicts.""" + """Convert from a batched target dict to list of normal target + dicts.""" if "image_batch" not in data: - raise ValueError("Batch-based transform should have `image_batch` target") + raise ValueError( + "Batch-based transform should have `image_batch` target" + ) batch_size = len(data["image_batch"]) items = [] for i in range(batch_size): @@ -15,13 +18,16 @@ def batch2list(data: Dict[str, List]) -> List[Dict[str, Any]]: item_k = to_unbatched_name(k) item[item_k] = v[i] else: - raise ValueError(f"All key must have '_batch' suffix, got `{k}`") + raise ValueError( + f"All key must have '_batch' suffix, got `{k}`" + ) items.append(item) return items def list2batch(data: List[Dict[str, Any]]) -> Dict[str, List]: - """Convert from a list of normal target dicts to a batched target dict.""" + """Convert from a list of normal target dicts to a batched target + dict.""" if len(data) == 0: raise ValueError("The input should have at least one item.") @@ -37,8 +43,8 @@ def list2batch(data: List[Dict[str, Any]]) -> Dict[str, List]: def to_unbatched_name(batched_name: str) -> str: - """Get a normal target name from a batched target name If the given name does not - have "_batched" suffix, ValueError will be raised.""" + """Get a normal target name from a batched target name If the given + name does not have "_batched" suffix, ValueError will be raised.""" if not batched_name.endswith("_batch"): raise ValueError( f"Batched target name must have '_batch' suffix, got `{batched_name}`" @@ -47,8 +53,9 @@ def to_unbatched_name(batched_name: str) -> str: def to_batched_name(name: str) -> str: - """Get a unbatched target name from a normal target name If the given name already - has had "_batched" suffix, ValueError will be raised.""" + """Get a unbatched target name from a normal target name If the + given name already has had "_batched" suffix, ValueError will be + raised.""" if name.endswith("_batch"): raise ValueError( diff --git a/luxonis_ml/data/augmentations/custom/letterbox_resize.py b/luxonis_ml/data/augmentations/custom/letterbox_resize.py index ccdad265..95884bc8 100644 --- a/luxonis_ml/data/augmentations/custom/letterbox_resize.py +++ b/luxonis_ml/data/augmentations/custom/letterbox_resize.py @@ -20,23 +20,25 @@ def __init__( always_apply: bool = False, p: float = 1.0, ): - """Augmentation to apply letterbox resizing to images. Also transforms masks, - bboxes and keypoints to correct shape. + """Augmentation to apply letterbox resizing to images. Also + transforms masks, bboxes and keypoints to correct shape. @param height: Desired height of the output. @type height: int @param width: Desired width of the output. @type width: int - @param interpolation: Cv2 flag to specify interpolation used when resizing. - Defaults to cv2.INTER_LINEAR. + @param interpolation: Cv2 flag to specify interpolation used + when resizing. Defaults to cv2.INTER_LINEAR. @type interpolation: int, optional @param border_value: Padding value for images. Defaults to 0. @type border_value: int, optional @param mask_value: Padding value for masks. Defaults to 0. @type mask_value: int, optional - @param always_apply: Whether to always apply the transform. Defaults to False. + @param always_apply: Whether to always apply the transform. + Defaults to False. @type always_apply: bool, optional - @param p: Probability of applying the transform. Defaults to 1.0. + @param p: Probability of applying the transform. Defaults to + 1.0. @type p: float, optional """ @@ -54,12 +56,15 @@ def __init__( self.border_value = border_value self.mask_value = mask_value - def update_params(self, params: Dict[str, Any], **kwargs) -> Dict[str, Any]: + def update_params( + self, params: Dict[str, Any], **kwargs + ) -> Dict[str, Any]: """Updates augmentation parameters with the necessary metadata. @param params: The existing augmentation parameters dictionary. @type params: Dict[str, Any] - @param kwargs: Additional keyword arguments to add the parameters. + @param kwargs: Additional keyword arguments to add the + parameters. @type kwargs: Any @return: Updated dictionary containing the merged parameters. @rtype: Dict[str, Any] @@ -121,7 +126,10 @@ def apply( resized_img = cv2.resize( img, - (self.width - pad_left - pad_right, self.height - pad_top - pad_bottom), + ( + self.width - pad_left - pad_right, + self.height - pad_top - pad_bottom, + ), interpolation=self.interpolation, ) img_out = cv2.copyMakeBorder( @@ -165,7 +173,10 @@ def apply_to_mask( resized_img = cv2.resize( img, - (self.width - pad_left - pad_right, self.height - pad_top - pad_bottom), + ( + self.width - pad_left - pad_right, + self.height - pad_top - pad_bottom, + ), interpolation=cv2.INTER_NEAREST, ) img_out = cv2.copyMakeBorder( @@ -208,10 +219,17 @@ def apply_to_bbox( """ x_min, y_min, x_max, y_max = denormalize_bbox( - bbox, self.height - pad_top - pad_bottom, self.width - pad_left - pad_right + bbox, + self.height - pad_top - pad_bottom, + self.width - pad_left - pad_right, )[:4] bbox = np.array( - [x_min + pad_left, y_min + pad_top, x_max + pad_left, y_max + pad_top] + [ + x_min + pad_left, + y_min + pad_top, + x_max + pad_left, + y_max + pad_top, + ] ) # clip bbox to image, ignoring padding bbox = bbox.clip( @@ -272,9 +290,17 @@ def get_transform_init_args_names(self) -> Tuple[str, ...]: @rtype: Tuple[str, ...] """ - return ("height", "width", "interpolation", "border_value", "mask_value") + return ( + "height", + "width", + "interpolation", + "border_value", + "mask_value", + ) - def _out_of_bounds(self, value: float, min_limit: float, max_limit: float) -> bool: + def _out_of_bounds( + self, value: float, min_limit: float, max_limit: float + ) -> bool: """ "Check if the given value is outside the specified limits. @param value: The value to be checked. @@ -283,7 +309,8 @@ def _out_of_bounds(self, value: float, min_limit: float, max_limit: float) -> bo @type min_limit: float @param max_limit: Maximum limit. @type max_limit: float - @return: True if the value is outside the specified limits, False otherwise. + @return: True if the value is outside the specified limits, + False otherwise. @rtype: bool """ return value < min_limit or value > max_limit diff --git a/luxonis_ml/data/augmentations/custom/mixup.py b/luxonis_ml/data/augmentations/custom/mixup.py index b788c705..17dce2b7 100644 --- a/luxonis_ml/data/augmentations/custom/mixup.py +++ b/luxonis_ml/data/augmentations/custom/mixup.py @@ -19,18 +19,21 @@ def __init__( always_apply: bool = False, p: float = 0.5, ): - """MixUp augmentation that merges two images and their annotations into one. If - images are not of same size then second one is first resized to match the first - one. + """MixUp augmentation that merges two images and their + annotations into one. If images are not of same size then second + one is first resized to match the first one. - @param alpha: Mixing coefficient, either a single float or a tuple representing - the range. Defaults to 0.5. + @param alpha: Mixing coefficient, either a single float or a + tuple representing the range. Defaults to 0.5. @type alpha: Union[float, Tuple[float, float]], optional - @param out_batch_size: Number of output images in the batch. Defaults to 1. + @param out_batch_size: Number of output images in the batch. + Defaults to 1. @type out_batch_size: int, optional - @param always_apply: Whether to always apply the transform. Defaults to False. + @param always_apply: Whether to always apply the transform. + Defaults to False. @type always_apply: bool, optional - @param p: Probability of applying the transform. Defaults to 0.5. + @param p: Probability of applying the transform. Defaults to + 0.5. @type p: float, optional """ super().__init__(batch_size=2, always_apply=always_apply, p=p) @@ -63,8 +66,8 @@ def apply_to_image_batch( ) -> List[np.ndarray]: """Applies the transformation to a batch of images. - @param image_batch: Batch of input images to which the transformation is - applied. + @param image_batch: Batch of input images to which the + transformation is applied. @type image_batch: List[np.ndarray] @param image_shapes: Shapes of the input images in the batch. @type image_shapes: List[Tuple[int, int]] @@ -75,13 +78,19 @@ def apply_to_image_batch( """ image1 = image_batch[0] # resize second image to size of the first one - image2 = cv2.resize(image_batch[1], (image_shapes[0][1], image_shapes[0][0])) + image2 = cv2.resize( + image_batch[1], (image_shapes[0][1], image_shapes[0][0]) + ) if isinstance(self.alpha, float): curr_alpha = np.clip(self.alpha, 0, 1) else: - curr_alpha = random.uniform(max(self.alpha[0], 0), min(self.alpha[1], 1)) - img_out = cv2.addWeighted(image1, curr_alpha, image2, 1 - curr_alpha, 0.0) + curr_alpha = random.uniform( + max(self.alpha[0], 0), min(self.alpha[1], 1) + ) + img_out = cv2.addWeighted( + image1, curr_alpha, image2, 1 - curr_alpha, 0.0 + ) return [img_out] def apply_to_mask_batch( @@ -92,7 +101,8 @@ def apply_to_mask_batch( ) -> List[np.ndarray]: """Applies the transformation to a batch of masks. - @param image_batch: Batch of input masks to which the transformation is applied. + @param image_batch: Batch of input masks to which the + transformation is applied. @type image_batch: List[np.ndarray] @param image_shapes: Shapes of the input images in the batch. @type image_shapes: List[Tuple[int, int]] @@ -114,12 +124,15 @@ def apply_to_mask_batch( return [out_mask] def apply_to_bboxes_batch( - self, bboxes_batch: List[BoxType], image_shapes: List[Tuple[int, int]], **params + self, + bboxes_batch: List[BoxType], + image_shapes: List[Tuple[int, int]], + **params, ) -> List[BoxType]: """Applies the transformation to a batch of bboxes. - @param image_batch: Batch of input bboxes to which the transformation is - applied. + @param image_batch: Batch of input bboxes to which the + transformation is applied. @type image_batch: List[BoxType] @param image_shapes: Shapes of the input images in the batch. @type image_shapes: List[Tuple[int, int]] @@ -138,8 +151,8 @@ def apply_to_keypoints_batch( ) -> List[KeypointType]: """Applies the transformation to a batch of keypoints. - @param image_batch: Batch of input keypoints to which the transformation is - applied. + @param image_batch: Batch of input keypoints to which the + transformation is applied. @type image_batch: List[BoxType] @param image_shapes: Shapes of the input images in the batch. @type image_shapes: List[Tuple[int, int]] @@ -158,12 +171,15 @@ def apply_to_keypoints_batch( scaled_kpts2.append(new_kpt + kpt[4:]) return [keypoints_batch[0] + scaled_kpts2] - def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]: + def get_params_dependent_on_targets( + self, params: Dict[str, Any] + ) -> Dict[str, Any]: """Get parameters dependent on the targets. @param params: Dictionary containing parameters. @type params: Dict[str, Any] - @return: Dictionary containing parameters dependent on the targets. + @return: Dictionary containing parameters dependent on the + targets. @rtype: Dict[str, Any] """ image_batch = params["image_batch"] diff --git a/luxonis_ml/data/augmentations/custom/mosaic.py b/luxonis_ml/data/augmentations/custom/mosaic.py index 5bc4de17..34be15f9 100644 --- a/luxonis_ml/data/augmentations/custom/mosaic.py +++ b/luxonis_ml/data/augmentations/custom/mosaic.py @@ -28,13 +28,14 @@ def __init__( always_apply: bool = False, p: float = 0.5, ): - """Mosaic augmentation arranges selected four images into single image in a 2x2 - grid layout. This is done in deterministic way meaning first image in the batch - will always be in top left. The input images should have the same number of - channels but can have different widths and heights. The output is cropped around - the intersection point of the four images with the size (out_with x out_height). - If the mosaic image is smaller than width x height, the gap is filled by the - fill_value. + """Mosaic augmentation arranges selected four images into single + image in a 2x2 grid layout. This is done in deterministic way + meaning first image in the batch will always be in top left. The + input images should have the same number of channels but can + have different widths and heights. The output is cropped around + the intersection point of the four images with the size + (out_with x out_height). If the mosaic image is smaller than + width x height, the gap is filled by the fill_value. @param out_height: Output image height. The mosaic image is cropped by this height around the mosaic center. If the size of the mosaic image is smaller than this value the gap is filled by the `value`. @@ -63,9 +64,13 @@ def __init__( super().__init__(batch_size=4, always_apply=always_apply, p=p) if out_height <= 0: - raise ValueError(f"out_height should be larger than 0, got {out_height}") + raise ValueError( + f"out_height should be larger than 0, got {out_height}" + ) if out_width <= 0: - raise ValueError(f"out_width should be larger than 0, got {out_width}") + raise ValueError( + f"out_width should be larger than 0, got {out_width}" + ) if out_batch_size <= 0: raise ValueError( f"out_batch_size should be larger than 0, got {out_batch_size}" @@ -107,8 +112,8 @@ def apply_to_image_batch( ) -> List[np.ndarray]: """Applies the transformation to a batch of images. - @param image_batch: Batch of input images to which the transformation is - applied. + @param image_batch: Batch of input images to which the + transformation is applied. @type image_batch: List[np.ndarray] @param indices: Indices of images in the batch. @type indices: List[Tuple[int, int]] @@ -119,9 +124,13 @@ def apply_to_image_batch( """ output_batch = [] for i_batch in range(self.out_batch_size): - idx_chunk = indices[self.n_tiles * i_batch : self.n_tiles * (i_batch + 1)] + idx_chunk = indices[ + self.n_tiles * i_batch : self.n_tiles * (i_batch + 1) + ] image_chunk = [image_batch[i] for i in idx_chunk] - mosaiced = mosaic4(image_chunk, self.out_height, self.out_width, self.value) + mosaiced = mosaic4( + image_chunk, self.out_height, self.out_width, self.value + ) output_batch.append(mosaiced) return output_batch @@ -130,7 +139,8 @@ def apply_to_mask_batch( ) -> List[np.ndarray]: """Applies the transformation to a batch of masks. - @param mask_batch: Batch of input masks to which the transformation is applied. + @param mask_batch: Batch of input masks to which the + transformation is applied. @type mask_batch: List[np.ndarray] @param indices: Indices of images in the batch. @type indices: List[Tuple[int, int]] @@ -141,7 +151,9 @@ def apply_to_mask_batch( """ output_batch = [] for i_batch in range(self.out_batch_size): - idx_chunk = indices[self.n_tiles * i_batch : self.n_tiles * (i_batch + 1)] + idx_chunk = indices[ + self.n_tiles * i_batch : self.n_tiles * (i_batch + 1) + ] mask_chunk = [mask_batch[i] for i in idx_chunk] mosaiced = mosaic4( mask_chunk, self.out_height, self.out_width, self.mask_value @@ -158,8 +170,8 @@ def apply_to_bboxes_batch( ) -> List[BoxType]: """Applies the transformation to a batch of bboxes. - @param bboxes_batch: Batch of input bboxes to which the transformation is - applied. + @param bboxes_batch: Batch of input bboxes to which the + transformation is applied. @type bboxes_batch: List[BboxType] @param indices: Indices of images in the batch. @type indices: List[Tuple[int, int]] @@ -172,7 +184,9 @@ def apply_to_bboxes_batch( """ output_batch = [] for i_batch in range(self.out_batch_size): - idx_chunk = indices[self.n_tiles * i_batch : self.n_tiles * (i_batch + 1)] + idx_chunk = indices[ + self.n_tiles * i_batch : self.n_tiles * (i_batch + 1) + ] bboxes_chunk = [bboxes_batch[i] for i in idx_chunk] shape_chunk = [image_shapes[i] for i in idx_chunk] new_bboxes = [] @@ -181,7 +195,12 @@ def apply_to_bboxes_batch( rows, cols = shape_chunk[i] for bbox in bboxes: new_bbox = bbox_mosaic4( - bbox[:4], rows, cols, i, self.out_height, self.out_width + bbox[:4], + rows, + cols, + i, + self.out_height, + self.out_width, ) new_bboxes.append(tuple(new_bbox) + tuple(bbox[4:])) output_batch.append(new_bboxes) @@ -196,8 +215,8 @@ def apply_to_keypoints_batch( ) -> List[KeypointType]: """Applies the transformation to a batch of keypoints. - @param keypoints_batch: Batch of input keypoints to which the transformation is - applied. + @param keypoints_batch: Batch of input keypoints to which the + transformation is applied. @type keypoints_batch: List[KeypointType] @param indices: Indices of images in the batch. @type indices: List[Tuple[int, int]] @@ -210,7 +229,9 @@ def apply_to_keypoints_batch( """ output_batch = [] for i_batch in range(self.out_batch_size): - idx_chunk = indices[self.n_tiles * i_batch : self.n_tiles * (i_batch + 1)] + idx_chunk = indices[ + self.n_tiles * i_batch : self.n_tiles * (i_batch + 1) + ] keypoints_chunk = [keyboints_batch[i] for i in idx_chunk] shape_chunk = [image_shapes[i] for i in idx_chunk] new_keypoints = [] @@ -219,18 +240,26 @@ def apply_to_keypoints_batch( rows, cols = shape_chunk[i] for keypoint in keypoints: new_keypoint = keypoint_mosaic4( - keypoint[:4], rows, cols, i, self.out_height, self.out_width + keypoint[:4], + rows, + cols, + i, + self.out_height, + self.out_width, ) new_keypoints.append(new_keypoint + tuple(keypoint[4:])) output_batch.append(new_keypoints) return output_batch - def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]: + def get_params_dependent_on_targets( + self, params: Dict[str, Any] + ) -> Dict[str, Any]: """Get parameters dependent on the targets. @param params: Dictionary containing parameters. @type params: Dict[str, Any] - @return: Dictionary containing parameters dependent on the targets. + @return: Dictionary containing parameters dependent on the + targets. @rtype: Dict[str, Any] """ image_batch = params["image_batch"] @@ -276,7 +305,9 @@ def mosaic4( """ N_TILES = 4 if len(image_batch) != N_TILES: - raise ValueError(f"Length of image_batch should be 4. Got {len(image_batch)}") + raise ValueError( + f"Length of image_batch should be 4. Got {len(image_batch)}" + ) for i in range(N_TILES - 1): if image_batch[0].shape[2:] != image_batch[i + 1].shape[2:]: @@ -357,7 +388,9 @@ def mosaic4( ) x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) - img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] + img4[y1a:y2a, x1a:x2a] = img[ + y1b:y2b, x1b:x2b + ] # img4[ymin:ymax, xmin:xmax] return img4 @@ -374,12 +407,14 @@ def bbox_mosaic4( @param bbox: A bounding box `(x_min, y_min, x_max, y_max)`. @type bbox: BoxInternalType - @param rows: Height of input image that corresponds to one of the mosaic cells + @param rows: Height of input image that corresponds to one of the + mosaic cells @type rows: int - @param cols: Width of input image that corresponds to one of the mosaic cells + @param cols: Width of input image that corresponds to one of the + mosaic cells @type cols: int - @param position_index: Index of the mosaic cell. 0: top left, 1: top right, 2: - bottom left, 3: bottom right + @param position_index: Index of the mosaic cell. 0: top left, 1: top + right, 2: bottom left, 3: bottom right @type position_index: int @param height: Height of output mosaic image @type height: int @@ -425,12 +460,14 @@ def keypoint_mosaic4( @param keypoint: A keypoint `(x, y, angle, scale)`. @type bbox: KeypointInternalType - @param rows: Height of input image that corresponds to one of the mosaic cells + @param rows: Height of input image that corresponds to one of the + mosaic cells @type rows: int - @param cols: Width of input image that corresponds to one of the mosaic cells + @param cols: Width of input image that corresponds to one of the + mosaic cells @type cols: int - @param position_index: Index of the mosaic cell. 0: top left, 1: top right, 2: - bottom left, 3: bottom right + @param position_index: Index of the mosaic cell. 0: top left, 1: top + right, 2: bottom left, 3: bottom right @type position_index: int @param height: Height of output mosaic image @type height: int diff --git a/luxonis_ml/data/augmentations/utils.py b/luxonis_ml/data/augmentations/utils.py index fac2feca..cc616997 100644 --- a/luxonis_ml/data/augmentations/utils.py +++ b/luxonis_ml/data/augmentations/utils.py @@ -42,7 +42,9 @@ def __init__( self.resize_transform, ) = self._parse_cfg( image_size=image_size, - augmentations=[a for a in augmentations if a["name"] == "Normalize"] + augmentations=[ + a for a in augmentations if a["name"] == "Normalize" + ] if only_normalize else augmentations, keep_aspect_ratio=keep_aspect_ratio, @@ -54,18 +56,20 @@ def _parse_cfg( augmentations: List[Dict[str, Any]], keep_aspect_ratio: bool = True, ) -> Tuple[BatchCompose, A.Compose, A.Compose, A.Compose]: - """Parses provided config and returns Albumentations BatchedCompose object and - Compose object for default transforms. + """Parses provided config and returns Albumentations + BatchedCompose object and Compose object for default transforms. @type image_size: List[int] @param image_size: Desired image size [H,W] @type augmentations: List[Dict[str, Any]] - @param augmentations: List of augmentations to use and their params + @param augmentations: List of augmentations to use and their + params @type keep_aspect_ratio: bool - @param keep_aspect_ratio: Whether should use resize that keeps aspect ratio of - original image. + @param keep_aspect_ratio: Whether should use resize that keeps + aspect ratio of original image. @rtype: Tuple[BatchCompose, A.Compose, A.Compose, A.Compose] - @return: Objects for batched, spatial, pixel and resize transforms + @return: Objects for batched, spatial, pixel and resize + transforms """ # NOTE: Always perform Resize @@ -81,14 +85,18 @@ def _parse_cfg( batched_augs = [] if augmentations: for aug in augmentations: - curr_aug = AUGMENTATIONS.get(aug["name"])(**aug.get("params", {})) + curr_aug = AUGMENTATIONS.get(aug["name"])( + **aug.get("params", {}) + ) if isinstance(curr_aug, A.ImageOnlyTransform): pixel_augs.append(curr_aug) elif isinstance(curr_aug, A.DualTransform): spatial_augs.append(curr_aug) elif isinstance(curr_aug, BatchBasedTransform): self.is_batched = True - self.aug_batch_size = max(self.aug_batch_size, curr_aug.batch_size) + self.aug_batch_size = max( + self.aug_batch_size, curr_aug.batch_size + ) batched_augs.append(curr_aug) batch_transform = BatchCompose( @@ -97,11 +105,17 @@ def _parse_cfg( ], bbox_params=A.BboxParams( format="coco", - label_fields=["bboxes_classes_batch", "bboxes_visibility_batch"], + label_fields=[ + "bboxes_classes_batch", + "bboxes_visibility_batch", + ], ), keypoint_params=A.KeypointParams( format="xy", - label_fields=["keypoints_visibility_batch", "keypoints_classes_batch"], + label_fields=[ + "keypoints_visibility_batch", + "keypoints_classes_batch", + ], remove_invisible=False, ), ) @@ -109,7 +123,8 @@ def _parse_cfg( spatial_transform = A.Compose( spatial_augs, bbox_params=A.BboxParams( - format="coco", label_fields=["bboxes_classes", "bboxes_visibility"] + format="coco", + label_fields=["bboxes_classes", "bboxes_visibility"], ), keypoint_params=A.KeypointParams( format="xy", @@ -121,7 +136,8 @@ def _parse_cfg( pixel_transform = A.Compose( pixel_augs, bbox_params=A.BboxParams( - format="coco", label_fields=["bboxes_classes", "bboxes_visibility"] + format="coco", + label_fields=["bboxes_classes", "bboxes_visibility"], ), keypoint_params=A.KeypointParams( format="xy", @@ -133,7 +149,8 @@ def _parse_cfg( resize_transform = A.Compose( [resize], bbox_params=A.BboxParams( - format="coco", label_fields=["bboxes_classes", "bboxes_visibility"] + format="coco", + label_fields=["bboxes_classes", "bboxes_visibility"], ), keypoint_params=A.KeypointParams( format="xy", @@ -142,7 +159,12 @@ def _parse_cfg( ), ) - return batch_transform, spatial_transform, pixel_transform, resize_transform + return ( + batch_transform, + spatial_transform, + pixel_transform, + resize_transform, + ) def _apply_transform( self, @@ -157,7 +179,8 @@ def _apply_transform( @type transformed: Dict[str, np.ndarray] @param transformed: Transformed data @type arg_names: List[str] - @param arg_names: Names of arguments to pass to transform function + @param arg_names: Names of arguments to pass to transform + function @type transform_func: Callable @param transform_func: Transform function to apply @type return_mask: bool @@ -169,7 +192,8 @@ def _apply_transform( """ transform_args = { - arg_name: transformed[f"{arg_name}{arg_suffix}"] for arg_name in arg_names + arg_name: transformed[f"{arg_name}{arg_suffix}"] + for arg_name in arg_names } if return_mask: transform_args["mask"] = transformed[f"mask{arg_suffix}"] @@ -187,7 +211,8 @@ def __call__( """Performs augmentations on provided data. @type data: List[Tuple[np.ndarray, Dict[LabelType, np.ndarray]]] - @param data: Data with list of input images and their annotations + @param data: Data with list of input images and their + annotations @type nc: int @param nc: Number of classes @type ns: int @@ -254,7 +279,9 @@ def __call__( transform_args["mask_batch"] = mask_batch transformed = self.batch_transform(force_apply=False, **transform_args) - transformed = {key: np.array(value[0]) for key, value in transformed.items()} + transformed = { + key: np.array(value[0]) for key, value in transformed.items() + } arg_names = [ "image", @@ -268,7 +295,11 @@ def __call__( # Apply spatial transform transformed = self._apply_transform( - transformed, arg_names, self.spatial_transform, return_mask, "_batch" + transformed, + arg_names, + self.spatial_transform, + return_mask, + "_batch", ) # Resize if necessary @@ -282,13 +313,17 @@ def __call__( transformed, arg_names, self.pixel_transform, return_mask ) - out_image, out_mask, out_bboxes, out_keypoints = self.post_transform_process( - transformed, - ns=ns, - nk=nk, - filter_kpts_by_bbox=(LabelType.BOUNDINGBOX in present_annotations) - and (LabelType.KEYPOINTS in present_annotations), - return_mask=return_mask, + out_image, out_mask, out_bboxes, out_keypoints = ( + self.post_transform_process( + transformed, + ns=ns, + nk=nk, + filter_kpts_by_bbox=( + LabelType.BOUNDINGBOX in present_annotations + ) + and (LabelType.KEYPOINTS in present_annotations), + return_mask=return_mask, + ) ) out_annotations = {} @@ -330,8 +365,8 @@ def prepare_img_annotations( @param iw: Input image width @type return_mask: bool @param return_mask: Whether to compute and return mask - @rtype: Tuple[np.ndarray, Optional[np.ndarray], np.ndarray, np.ndarray, - np.ndarray, np.ndarray, np.ndarray] + @rtype: Tuple[np.ndarray, Optional[np.ndarray], np.ndarray, + np.ndarray, np.ndarray, np.ndarray, np.ndarray] @return: Annotations in albumentations format """ @@ -339,7 +374,9 @@ def prepare_img_annotations( mask = None if return_mask: - seg = annotations.get(LabelType.SEGMENTATION, np.zeros((1, ih, iw))) + seg = annotations.get( + LabelType.SEGMENTATION, np.zeros((1, ih, iw)) + ) mask = np.argmax(seg, axis=0) + 1 mask[np.sum(seg, axis=0) == 0] = 0 # only background has value 0 @@ -352,7 +389,9 @@ def prepare_img_annotations( bboxes_classes = bboxes[:, 0] # albumentations expects list of keypoints e.g. [(x,y),(x,y),(x,y),(x,y)] - keypoints = annotations.get(LabelType.KEYPOINTS, np.zeros((1, nk * 3 + 1))) + keypoints = annotations.get( + LabelType.KEYPOINTS, np.zeros((1, nk * 3 + 1)) + ) keypoints_unflat = np.reshape(keypoints[:, 1:], (-1, 3)) keypoints_points = keypoints_unflat[:, :2] keypoints_points[:, 0] *= iw @@ -380,7 +419,8 @@ def post_transform_process( filter_kpts_by_bbox: bool, return_mask: bool = True, ) -> Tuple[np.ndarray, Optional[np.ndarray], np.ndarray, np.ndarray]: - """Postprocessing of albumentations output to LuxonisLoader format. + """Postprocessing of albumentations output to LuxonisLoader + format. @type transformed_data: Dict[str, np.ndarray] @param transformed_data: Output data from albumentations @@ -389,9 +429,10 @@ def post_transform_process( @type nk: int @param nk: Number of keypoints per instance @type filter_kpts_by_bbox: bool - @param filter_kpts_by_bbox: If True removes keypoint instances if its bounding - box was removed. - @rtype: Tuple[np.ndarray, Optional[np.ndarray], np.ndarray, np.ndarray] + @param filter_kpts_by_bbox: If True removes keypoint instances + if its bounding box was removed. + @rtype: Tuple[np.ndarray, Optional[np.ndarray], np.ndarray, + np.ndarray] @return: Postprocessed annotations """ @@ -415,7 +456,8 @@ def post_transform_process( transformed_data["bboxes_classes"], axis=-1 ) out_bboxes = np.concatenate( - (transformed_bboxes_classes, transformed_data["bboxes"]), axis=1 + (transformed_bboxes_classes, transformed_data["bboxes"]), + axis=1, ) else: # if no bboxes after transform out_bboxes = np.zeros((0, 5)) @@ -430,7 +472,8 @@ def post_transform_process( nk = 1 # done for easier postprocessing if transformed_data["keypoints"]: out_keypoints = np.concatenate( - (transformed_data["keypoints"], transformed_keypoints_vis), axis=1 + (transformed_data["keypoints"], transformed_keypoints_vis), + axis=1, ) else: out_keypoints = np.zeros((0, nk * 3 + 1)) @@ -442,7 +485,9 @@ def post_transform_process( keypoints_classes = transformed_data["keypoints_classes"] keypoints_classes = keypoints_classes[0::nk] keypoints_classes = np.expand_dims(keypoints_classes, axis=-1) - out_keypoints = np.concatenate((keypoints_classes, out_keypoints), axis=1) + out_keypoints = np.concatenate( + (keypoints_classes, out_keypoints), axis=1 + ) if filter_kpts_by_bbox: out_keypoints = out_keypoints[ transformed_data["bboxes_visibility"] @@ -451,12 +496,14 @@ def post_transform_process( return out_image, out_mask, out_bboxes, out_keypoints def check_bboxes(self, bboxes: np.ndarray) -> np.ndarray: - """Check bbox annotations and correct those with width or height 0. + """Check bbox annotations and correct those with width or height + 0. @type bboxes: np.ndarray @param bboxes: A numpy array representing bounding boxes. @rtype: np.ndarray - @return: The same bounding boxes with any out-of-bounds coordinate corrections. + @return: The same bounding boxes with any out-of-bounds + coordinate corrections. """ for i in range(bboxes.shape[0]): @@ -478,8 +525,8 @@ def mark_invisible_keypoints( @type iw: int @param iw: The image width. @rtype: np.ndarray - @return: The same keypoints with corrections to mark keypoints out-of-bounds as - invisible. + @return: The same keypoints with corrections to mark keypoints + out-of-bounds as invisible. """ for kp in keypoints: if not (0 <= kp[0] < iw and 0 <= kp[1] < ih): diff --git a/luxonis_ml/data/datasets/annotation.py b/luxonis_ml/data/datasets/annotation.py index 5de1574e..eb139c6b 100644 --- a/luxonis_ml/data/datasets/annotation.py +++ b/luxonis_ml/data/datasets/annotation.py @@ -1,8 +1,18 @@ import json import logging from abc import ABC, abstractmethod -from datetime import datetime -from typing import Any, ClassVar, Dict, List, Literal, Optional, Tuple, TypedDict, Union +from datetime import datetime, timezone +from typing import ( + Any, + ClassVar, + Dict, + List, + Literal, + Optional, + Tuple, + TypedDict, + Union, +) import numpy as np import pycocotools.mask as mask_util @@ -25,7 +35,8 @@ KeypointVisibility: TypeAlias = Literal[0, 1, 2] NormalizedFloat: TypeAlias = Annotated[float, Field(ge=0, le=1)] -"""C{NormalizedFloat} is a float that is restricted to the range [0, 1].""" +"""C{NormalizedFloat} is a float that is restricted to the range [0, +1].""" ParquetDict = TypedDict( "ParquetDict", @@ -58,13 +69,14 @@ class Annotation(ABC, BaseModelExtraForbid): """Base class for an annotation. @type task: str - @ivar task: The task name. By default it is the string representation of the - L{LabelType}. + @ivar task: The task name. By default it is the string + representation of the L{LabelType}. @type class_: str @ivar class_: The class name for the annotation. @type instance_id: int - @ivar instance_id: The instance id of the annotation. This determines the order in - which individual instances are loaded in L{LuxonisLoader}. + @ivar instance_id: The instance id of the annotation. This + determines the order in which individual instances are loaded in + L{LuxonisLoader}. @type _label_type: ClassVar[L{LabelType}] @ivar _label_type: The label type of the annotation. """ @@ -83,9 +95,11 @@ def validate_task(cls, values: Dict[str, Any]) -> Dict[str, Any]: return values def get_value(self) -> Dict[str, Any]: - """Converts the annotation to a dictionary that can be saved to a parquet - file.""" - return self.dict(exclude={"class_", "class_id", "instance_id", "task", "type_"}) + """Converts the annotation to a dictionary that can be saved to + a parquet file.""" + return self.dict( + exclude={"class_", "class_id", "instance_id", "task", "type_"} + ) @staticmethod @abstractmethod @@ -95,7 +109,8 @@ def combine_to_numpy( height: int, width: int, ) -> np.ndarray: - """Combines multiple instance annotations into a single numpy array.""" + """Combines multiple instance annotations into a single numpy + array.""" pass @@ -122,9 +137,11 @@ class BBoxAnnotation(Annotation): Values are normalized based on the image size. @type x: float - @ivar x: The center x-coordinate of the bounding box. Normalized to [0, 1]. + @ivar x: The top-left x coordinate of the bounding box. Normalized + to [0, 1]. @type y: float - @ivar y: The center y-coordinate of the bounding box. Normalized to [0, 1]. + @ivar y: The top-left y coordinate of the bounding box. Normalized + to [0, 1]. @type w: float @ivar w: The width of the bounding box. Normalized to [0, 1]. @type h: float @@ -188,7 +205,9 @@ class KeypointAnnotation(Annotation): type_: Literal["keypoints"] = Field("keypoints", alias="type") - keypoints: List[Tuple[NormalizedFloat, NormalizedFloat, KeypointVisibility]] + keypoints: List[ + Tuple[NormalizedFloat, NormalizedFloat, KeypointVisibility] + ] _label_type = LabelType.KEYPOINTS @@ -226,9 +245,13 @@ def to_numpy(self, class_mapping: Dict[str, int]) -> np.ndarray: @staticmethod def combine_to_numpy( - annotations: List["KeypointAnnotation"], class_mapping: Dict[str, int], **_ + annotations: List["KeypointAnnotation"], + class_mapping: Dict[str, int], + **_, ) -> np.ndarray: - keypoints = np.zeros((len(annotations), len(annotations[0].keypoints) * 3 + 1)) + keypoints = np.zeros( + (len(annotations), len(annotations[0].keypoints) * 3 + 1) + ) for i, ann in enumerate(annotations): keypoints[i] = ann.to_numpy(class_mapping) return keypoints @@ -305,7 +328,9 @@ def get_value(self) -> Dict[str, Any]: "counts": rle["counts"].decode("utf-8"), } - def to_numpy(self, _: Dict[str, int], width: int, height: int) -> np.ndarray: + def to_numpy( + self, _: Dict[str, int], width: int, height: int + ) -> np.ndarray: assert isinstance(self.counts, bytes) return mask_util.decode( {"counts": self.counts, "size": [height, width]} @@ -316,8 +341,8 @@ class MaskSegmentationAnnotation(SegmentationAnnotation): """Pixel-wise binary segmentation mask. @type mask: npt.NDArray[np.bool_] - @ivar mask: The segmentation mask as a numpy array. The mask must be 2D and must be - castable to a boolean array. + @ivar mask: The segmentation mask as a numpy array. The mask must be + 2D and must be castable to a boolean array. """ type_: Literal["mask"] = Field("mask", alias="type") @@ -331,7 +356,11 @@ def _convert_rle(cls, values: Dict[str, Any]) -> Dict[str, Any]: if "mask" in values: return values - if "width" not in values or "height" not in values or "counts" not in values: + if ( + "width" not in values + or "height" not in values + or "counts" not in values + ): raise ValueError( "MaskSegmentationAnnotation must have either " "'mask' or 'width', 'height', and 'counts'" @@ -381,8 +410,9 @@ class PolylineSegmentationAnnotation(SegmentationAnnotation): """Polyline segmentation mask. @type points: List[Tuple[float, float]] - @ivar points: List of points that define the polyline. Each point is a tuple of (x, - y). x and y are normalized to [0, 1] based on the image size. + @ivar points: List of points that define the polyline. Each point is + a tuple of (x, y). x and y are normalized to [0, 1] based on the + image size. """ type_: Literal["polyline"] = Field("polyline", alias="type") @@ -394,7 +424,9 @@ class PolylineSegmentationAnnotation(SegmentationAnnotation): def validate_values(cls, values: Dict[str, Any]) -> Dict[str, Any]: warn = False for i, point in enumerate(values["points"]): - if (point[0] < -2 or point[0] > 2) or (point[1] < -2 or point[1] > 2): + if (point[0] < -2 or point[0] > 2) or ( + point[1] < -2 or point[1] > 2 + ): raise ValueError( "Polyline annotation has value outside of automatic clipping range ([-2, 2]). " "Values should be normalized based on image size to range [0, 1]." @@ -414,8 +446,12 @@ def validate_values(cls, values: Dict[str, Any]) -> Dict[str, Any]: ) return values - def to_numpy(self, _: Dict[str, int], width: int, height: int) -> np.ndarray: - polyline = [(round(x * width), round(y * height)) for x, y in self.points] + def to_numpy( + self, _: Dict[str, int], width: int, height: int + ) -> np.ndarray: + polyline = [ + (round(x * width), round(y * height)) for x, y in self.points + ] mask = Image.new("L", (width, height), 0) draw = ImageDraw.Draw(mask) draw.polygon(polyline, fill=1, outline=1) @@ -439,10 +475,16 @@ class ArrayAnnotation(Annotation): @staticmethod def combine_to_numpy( - annotations: List["ArrayAnnotation"], class_mapping: Dict[str, int], **_ + annotations: List["ArrayAnnotation"], + class_mapping: Dict[str, int], + **_, ) -> np.ndarray: out_arr = np.zeros( - (len(annotations), len(class_mapping), *np.load(annotations[0].path).shape) + ( + len(annotations), + len(class_mapping), + *np.load(annotations[0].path).shape, + ) ) for i, ann in enumerate(annotations): class_ = class_mapping.get(ann.class_, 0) @@ -469,7 +511,9 @@ class LabelAnnotation(Annotation): @staticmethod def combine_to_numpy( - annotations: List["LabelAnnotation"], class_mapping: Dict[str, int], **_ + annotations: List["LabelAnnotation"], + class_mapping: Dict[str, int], + **_, ) -> np.ndarray: out_arr = np.zeros((len(annotations), len(class_mapping))).astype( type(annotations[0].value) @@ -504,24 +548,33 @@ class DatasetRecord(BaseModelExtraForbid): ] = Field(None, discriminator="type_") def to_parquet_dict(self) -> ParquetDict: - """Converts an annotation to a dictionary for writing to a parquet file. + """Converts an annotation to a dictionary for writing to a + parquet file. @rtype: L{ParquetDict} @return: A dictionary of annotation data. """ - value = self.annotation.get_value() if self.annotation is not None else {} + value = ( + self.annotation.get_value() if self.annotation is not None else {} + ) json_value = json.dumps(value) return { "file": self.file.name, "type": self.annotation.__class__.__name__, - "created_at": datetime.utcnow(), + "created_at": datetime.now(timezone.utc), "class": ( - self.annotation.class_ or "" if self.annotation is not None else "" + self.annotation.class_ or "" + if self.annotation is not None + else "" ), "instance_id": ( - self.annotation.instance_id or -1 if self.annotation is not None else -1 + self.annotation.instance_id or -1 + if self.annotation is not None + else -1 ), - "task": self.annotation.task if self.annotation is not None else "", + "task": self.annotation.task + if self.annotation is not None + else "", "annotation": json_value, } diff --git a/luxonis_ml/data/datasets/base_dataset.py b/luxonis_ml/data/datasets/base_dataset.py index 0b30f234..57f6a832 100644 --- a/luxonis_ml/data/datasets/base_dataset.py +++ b/luxonis_ml/data/datasets/base_dataset.py @@ -18,8 +18,8 @@ class BaseDataset( ABC, metaclass=AutoRegisterMeta, registry=DATASETS_REGISTRY, register=False ): - """Base abstract dataset class for managing datasets in the Luxonis MLOps - ecosystem.""" + """Base abstract dataset class for managing datasets in the Luxonis + MLOps ecosystem.""" @property @abstractmethod @@ -38,7 +38,8 @@ def get_tasks(self) -> List[str]: @abstractmethod def update_source(self, source: LuxonisSource) -> None: - """Updates underlying source of the dataset with a new LuxonisSource. + """Updates underlying source of the dataset with a new + LuxonisSource. @type source: L{LuxonisSource} @param source: The new L{LuxonisSource} to replace the old one. @@ -46,28 +47,31 @@ def update_source(self, source: LuxonisSource) -> None: pass @abstractmethod - def set_classes(self, classes: List[str], task: Optional[str] = None) -> None: - """Sets the names of classes for the dataset. This can be across all CV tasks or - certain tasks. + def set_classes( + self, classes: List[str], task: Optional[str] = None + ) -> None: + """Sets the names of classes for the dataset. This can be across + all CV tasks or certain tasks. @type classes: List[str] @param classes: List of class names to set. @type task: Optional[str] - @param task: Optionally specify the task where these classes apply. + @param task: Optionally specify the task where these classes + apply. """ pass @abstractmethod def get_classes(self) -> Tuple[List[str], Dict[str, List[str]]]: - """Gets overall classes in the dataset and classes according to computer vision - task. + """Gets overall classes in the dataset and classes according to + computer vision task. @type sync_mode: bool - @param sync_mode: If C{True}, reads classes from remote storage. If C{False}, - classes are read locally. + @param sync_mode: If C{True}, reads classes from remote storage. + If C{False}, classes are read locally. @rtype: Tuple[List[str], Dict] - @return: A combined list of classes for all tasks and a dictionary mapping tasks - to the classes used in each task. + @return: A combined list of classes for all tasks and a + dictionary mapping tasks to the classes used in each task. """ pass @@ -78,8 +82,8 @@ def set_skeletons( edges: Optional[List[Tuple[int, int]]] = None, task: Optional[str] = None, ) -> None: - """Sets the semantic structure of keypoint skeletons for the classes that use - keypoints. + """Sets the semantic structure of keypoint skeletons for the + classes that use keypoints. Example:: @@ -99,26 +103,32 @@ def set_skeletons( pass @abstractmethod - def get_skeletons(self) -> Dict[str, Tuple[List[str], List[Tuple[int, int]]]]: - """Returns the dictionary defining the semantic skeleton for each class using - keypoints. + def get_skeletons( + self, + ) -> Dict[str, Tuple[List[str], List[Tuple[int, int]]]]: + """Returns the dictionary defining the semantic skeleton for + each class using keypoints. @rtype: Dict[str, Tuple[List[str], List[Tuple[int, int]]]] - @return: For each task, a tuple containing a list of keypoint names and a list - of edges between the keypoints. + @return: For each task, a tuple containing a list of keypoint + names and a list of edges between the keypoints. """ pass @abstractmethod - def add(self, generator: DatasetIterator, batch_size: int = 1_000_000) -> None: + def add( + self, generator: DatasetIterator, batch_size: int = 1_000_000 + ) -> None: """Write annotations to parquet files. @type generator: L{DatasetGenerator} - @param generator: A Python iterator that yields either instances of - L{DatasetRecord} or a dictionary that can be converted to L{DatasetRecord}. + @param generator: A Python iterator that yields either instances + of L{DatasetRecord} or a dictionary that can be converted to + L{DatasetRecord}. @type batch_size: int - @param batch_size: The number of annotations generated before processing. This - can be set to a lower value to reduce memory usage. + @param batch_size: The number of annotations generated before + processing. This can be set to a lower value to reduce + memory usage. """ pass @@ -133,7 +143,9 @@ def make_splits( ] ] = None, *, - ratios: Optional[Union[Dict[str, float], Tuple[float, float, float]]] = None, + ratios: Optional[ + Union[Dict[str, float], Tuple[float, float, float]] + ] = None, definitions: Optional[Dict[str, List[PathType]]] = None, replace_old_splits: bool = False, ) -> None: diff --git a/luxonis_ml/data/datasets/luxonis_dataset.py b/luxonis_ml/data/datasets/luxonis_dataset.py index ecf5514a..20ce8cb0 100644 --- a/luxonis_ml/data/datasets/luxonis_dataset.py +++ b/luxonis_ml/data/datasets/luxonis_dataset.py @@ -26,7 +26,12 @@ from typing_extensions import Self import luxonis_ml.data.utils.data_utils as data_utils -from luxonis_ml.utils import LuxonisFileSystem, deprecated, environ, make_progress_bar +from luxonis_ml.utils import ( + LuxonisFileSystem, + deprecated, + environ, + make_progress_bar, +) from luxonis_ml.utils.filesystem import PathType from ..utils.constants import LDF_VERSION @@ -49,8 +54,8 @@ def __init__( delete_existing: bool = False, delete_remote: bool = False, ) -> None: - """Luxonis Dataset Format (LDF) is used to define datasets in the Luxonis MLOps - ecosystem. + """Luxonis Dataset Format (LDF) is used to define datasets in + the Luxonis MLOps ecosystem. @type dataset_name: str @param dataset_name: Name of the dataset @@ -59,12 +64,14 @@ def __init__( @type bucket_type: BucketType @param bucket_type: Whether to use external cloud buckets @type bucket_storage: BucketStorage - @param bucket_storage: Underlying bucket storage from local, S3, or GCS + @param bucket_storage: Underlying bucket storage from local, S3, + or GCS @type delete_existing: bool - @param delete_existing: Whether to delete a dataset with the same name if it - exists + @param delete_existing: Whether to delete a dataset with the + same name if it exists @type delete_remote: bool - @param delete_remote: Whether to delete the dataset from the cloud as well + @param delete_remote: Whether to delete the dataset from the + cloud as well """ self.base_path = environ.LUXONISML_BASE_PATH @@ -125,7 +132,8 @@ def __len__(self) -> int: return len(df.select("uuid").unique()) if df is not None else 0 def _get_credential(self, key: str) -> str: - """Gets secret credentials from credentials file or ENV variables.""" + """Gets secret credentials from credentials file or ENV + variables.""" if key in self._credentials.keys(): return self._credentials[key] @@ -138,30 +146,43 @@ def _init_paths(self) -> None: """Configures local path or bucket directory.""" self.local_path = ( - self.base_path / "data" / self.team_id / "datasets" / self.dataset_name + self.base_path + / "data" + / self.team_id + / "datasets" + / self.dataset_name ) self.media_path = self.local_path / "media" self.annotations_path = self.local_path / "annotations" self.metadata_path = self.local_path / "metadata" self.arrays_path = self.local_path / "arrays" - for path in [self.media_path, self.annotations_path, self.metadata_path]: + for path in [ + self.media_path, + self.annotations_path, + self.metadata_path, + ]: path.mkdir(exist_ok=True, parents=True) if not self.is_remote: self.path = str(self.local_path) else: self.path = self._construct_url( - self.bucket_storage, self.bucket, self.team_id, self.dataset_name + self.bucket_storage, + self.bucket, + self.team_id, + self.dataset_name, ) @overload - def _load_df_offline(self, lazy: Literal[False] = ...) -> Optional[pl.DataFrame]: - ... + def _load_df_offline( + self, lazy: Literal[False] = ... + ) -> Optional[pl.DataFrame]: ... @overload - def _load_df_offline(self, lazy: Literal[True] = ...) -> Optional[pl.LazyFrame]: - ... + def _load_df_offline( + self, lazy: Literal[True] = ... + ) -> Optional[pl.LazyFrame]: ... def _load_df_offline( self, lazy: bool = False @@ -179,9 +200,13 @@ def _load_df_offline( return pl.concat(dfs) if dfs else None def _get_file_index(self) -> Optional[pl.DataFrame]: - path = get_file(self.fs, "metadata/file_index.parquet", self.media_path) + path = get_file( + self.fs, "metadata/file_index.parquet", self.media_path + ) if path is not None and path.exists(): - return pl.read_parquet(path).select(pl.all().exclude("^__index_level_.*$")) + return pl.read_parquet(path).select( + pl.all().exclude("^__index_level_.*$") + ) return None def _write_index( @@ -204,7 +229,10 @@ def _write_metadata(self) -> None: @staticmethod def _construct_url( - bucket_storage: BucketStorage, bucket: str, team_id: str, dataset_name: str + bucket_storage: BucketStorage, + bucket: str, + team_id: str, + dataset_name: str, ) -> str: """Constructs a URL for a remote dataset.""" return f"{bucket_storage.value}://{bucket}/{team_id}/datasets/{dataset_name}" @@ -236,7 +264,8 @@ def is_remote(self) -> bool: return self.bucket_storage != BucketStorage.LOCAL def update_source(self, source: LuxonisSource) -> None: - """Updates underlying source of the dataset with a new L{LuxonisSource}. + """Updates underlying source of the dataset with a new + L{LuxonisSource}. @type source: L{LuxonisSource} @param source: The new L{LuxonisSource} to replace the old one. @@ -245,7 +274,9 @@ def update_source(self, source: LuxonisSource) -> None: self.metadata["source"] = source.to_document() self._write_metadata() - def set_classes(self, classes: List[str], task: Optional[str] = None) -> None: + def set_classes( + self, classes: List[str], task: Optional[str] = None + ) -> None: if task is not None: self.metadata["classes"][task] = classes else: @@ -257,7 +288,11 @@ def set_classes(self, classes: List[str], task: Optional[str] = None) -> None: def get_classes(self) -> Tuple[List[str], Dict[str, List[str]]]: all_classes = list( - {c for classes in self.metadata["classes"].values() for c in classes} + { + c + for classes in self.metadata["classes"].values() + for c in classes + } ) return sorted(all_classes), self.metadata["classes"] @@ -281,7 +316,9 @@ def set_skeletons( } self._write_metadata() - def get_skeletons(self) -> Dict[str, Tuple[List[str], List[Tuple[int, int]]]]: + def get_skeletons( + self, + ) -> Dict[str, Tuple[List[str], List[Tuple[int, int]]]]: return { task: (skel["labels"], skel["edges"]) for task, skel in self.metadata["skeletons"].items() @@ -308,17 +345,21 @@ def sync_from_cloud(self, force: bool = False) -> None: self.logger.warning("Already synced. Use force=True to resync") def delete_dataset(self, *, delete_remote: bool = False) -> None: - """Deletes the dataset from local storage and optionally from the cloud. + """Deletes the dataset from local storage and optionally from + the cloud. @type delete_remote: bool - @param delete_remote: Whether to delete the dataset from the cloud. + @param delete_remote: Whether to delete the dataset from the + cloud. """ if not self.is_remote: shutil.rmtree(self.path) self.logger.info(f"Deleted dataset {self.dataset_name}") if self.is_remote and delete_remote: - self.logger.info(f"Deleting dataset {self.dataset_name} from cloud") + self.logger.info( + f"Deleting dataset {self.dataset_name} from cloud" + ) assert self.path assert self.dataset_name assert self.local_path @@ -328,11 +369,17 @@ def delete_dataset(self, *, delete_remote: bool = False) -> None: def _infer_task(self, ann: Annotation) -> str: if not hasattr(LuxonisDataset._infer_task, "_logged_infered_classes"): - LuxonisDataset._infer_task._logged_infered_classes = defaultdict(bool) + LuxonisDataset._infer_task._logged_infered_classes = defaultdict( + bool + ) def _log_once(cls_: str, task: str, message: str, level: str = "info"): - if not LuxonisDataset._infer_task._logged_infered_classes[(cls_, task)]: - LuxonisDataset._infer_task._logged_infered_classes[(cls_, task)] = True + if not LuxonisDataset._infer_task._logged_infered_classes[ + (cls_, task) + ]: + LuxonisDataset._infer_task._logged_infered_classes[ + (cls_, task) + ] = True getattr(self.logger, level)(message, extra={"markup": True}) cls_ = ann.class_ @@ -420,7 +467,9 @@ def _add_process_batch( self.logger.info("Uploading media...") # TODO: support from bucket (likely with a self.fs.copy_dir) - self.fs.put_dir(local_paths=paths, remote_dir="media", uuid_dict=uuid_dict) + self.fs.put_dir( + local_paths=paths, remote_dir="media", uuid_dict=uuid_dict + ) self.logger.info("Media uploaded") self._process_arrays(batch_data) @@ -446,14 +495,18 @@ def _add_process_batch( elif uuid not in processed_uuids: new_index["uuid"].append(uuid) new_index["file"].append(file) - new_index["original_filepath"].append(str(filepath.absolute())) + new_index["original_filepath"].append( + str(filepath.absolute()) + ) processed_uuids.add(uuid) pfm.write({"uuid": uuid, **ann.to_parquet_dict()}) self.progress.update(task, advance=1) self.progress.remove_task(task) - def add(self, generator: DatasetIterator, batch_size: int = 1_000_000) -> Self: + def add( + self, generator: DatasetIterator, batch_size: int = 1_000_000 + ) -> Self: generator = add_generator_wrapper(generator) index = self._get_file_index() new_index = {"uuid": [], "file": [], "original_filepath": []} @@ -465,14 +518,19 @@ def add(self, generator: DatasetIterator, batch_size: int = 1_000_000) -> Self: num_kpts_per_task: Dict[str, int] = {} annotations_path = get_dir( - self.fs, "annotations", self.local_path, default=self.annotations_path + self.fs, + "annotations", + self.local_path, + default=self.annotations_path, ) assert annotations_path is not None with ParquetFileManager(annotations_path) as pfm: for i, data in enumerate(generator, start=1): record = ( - data if isinstance(data, DatasetRecord) else DatasetRecord(**data) + data + if isinstance(data, DatasetRecord) + else DatasetRecord(**data) ) ann = record.annotation if ann is not None: @@ -493,7 +551,9 @@ def add(self, generator: DatasetIterator, batch_size: int = 1_000_000) -> Self: ) batch_data = [] - self._add_process_batch(batch_data, pfm, index, new_index, processed_uuids) + self._add_process_batch( + batch_data, pfm, index, new_index, processed_uuids + ) with suppress(shutil.SameFileError): self.fs.put_dir(annotations_path, "") @@ -503,7 +563,9 @@ def add(self, generator: DatasetIterator, batch_size: int = 1_000_000) -> Self: old_classes = set(curr_classes.get(task, [])) new_classes = list(classes - old_classes) if new_classes: - self.logger.info(f"Detected new classes for task {task}: {new_classes}") + self.logger.info( + f"Detected new classes for task {task}: {new_classes}" + ) self.set_classes(list(classes | old_classes), task) for task, num_kpts in num_kpts_per_task.items(): self.set_skeletons( @@ -538,7 +600,9 @@ def _warn_on_duplicates(self) -> None: ) duplicates_paired_df = duplicates_paired.collect() for uuid, files in duplicates_paired_df.iter_rows(): - self.logger.warning(f"UUID: {uuid} has multiple file names: {files}") + self.logger.warning( + f"UUID: {uuid} has multiple file names: {files}" + ) # Warn on duplicate annotations duplicate_annotation = ( @@ -579,7 +643,9 @@ def make_splits( ] ] = None, *, - ratios: Optional[Union[Dict[str, float], Tuple[float, float, float]]] = None, + ratios: Optional[ + Union[Dict[str, float], Tuple[float, float, float]] + ] = None, definitions: Optional[Dict[str, List[PathType]]] = None, replace_old_splits: bool = False, ) -> None: @@ -591,7 +657,9 @@ def make_splits( if splits is not None: if ratios is not None or definitions is not None: - raise ValueError("Cannot provide both splits and ratios/definitions") + raise ValueError( + "Cannot provide both splits and ratios/definitions" + ) if isinstance(splits, tuple): ratios = splits elif isinstance(splits, dict): @@ -607,7 +675,11 @@ def make_splits( raise ValueError( "Ratios must be a tuple of 3 floats for train, val, and test splits" ) - ratios = {"train": ratios[0], "val": ratios[1], "test": ratios[2]} + ratios = { + "train": ratios[0], + "val": ratios[1], + "test": ratios[2], + } sum_ = sum(ratios.values()) if not math.isclose(sum_, 1.0): raise ValueError(f"Ratios must sum to 1.0, got {sum_:0.4f}") @@ -634,7 +706,9 @@ def make_splits( with open(splits_path, "r") as file: old_splits = defaultdict(list, json.load(file)) - defined_uuids = set(uuid for uuids in old_splits.values() for uuid in uuids) + defined_uuids = set( + uuid for uuids in old_splits.values() for uuid in uuids + ) if definitions is None: ratios = ratios or {"train": 0.8, "val": 0.1, "test": 0.1} @@ -654,7 +728,9 @@ def make_splits( "If you want to generate new splits, set `replace_old_splits=True`" ) else: - ids = df.select("uuid").unique().get_column("uuid").to_list() + ids = ( + df.select("uuid").unique().get_column("uuid").to_list() + ) old_splits = defaultdict(list) np.random.shuffle(ids) @@ -673,7 +749,9 @@ def make_splits( for split, filepaths in definitions.items(): splits_to_update.append(split) if not isinstance(filepaths, list): - raise ValueError("Must provide splits as a list of filepaths") + raise ValueError( + "Must provide splits as a list of filepaths" + ) ids = [ find_filepath_uuid(filepath, index, raise_on_missing=True) for filepath in filepaths @@ -700,7 +778,8 @@ def exists( @type dataset_name: str @param dataset_name: Name of the dataset to check @type remote: bool - @param remote: Whether to check if the dataset exists in the cloud + @param remote: Whether to check if the dataset exists in the + cloud """ return dataset_name in LuxonisDataset.list_datasets( team_id, bucket_storage, bucket diff --git a/luxonis_ml/data/datasets/source.py b/luxonis_ml/data/datasets/source.py index 96811d48..57174abe 100644 --- a/luxonis_ml/data/datasets/source.py +++ b/luxonis_ml/data/datasets/source.py @@ -6,8 +6,8 @@ @dataclass class LuxonisComponent: - """Abstraction for a piece of media within a source. Most commonly, this abstracts - an image sensor. + """Abstraction for a piece of media within a source. Most commonly, + this abstracts an image sensor. @type name: str @param name: A recognizable name for the component. @@ -35,7 +35,9 @@ def to_document(self) -> LuxonisComponentDocument: } @classmethod - def from_document(cls, document: LuxonisComponentDocument) -> "LuxonisComponent": + def from_document( + cls, document: LuxonisComponentDocument + ) -> "LuxonisComponent": if document["image_type"] is not None: return cls( name=document["name"], @@ -44,12 +46,14 @@ def from_document(cls, document: LuxonisComponentDocument) -> "LuxonisComponent" ) else: return cls( - name=document["name"], media_type=MediaType(document["media_type"]) + name=document["name"], + media_type=MediaType(document["media_type"]), ) class LuxonisSource: - """Abstracts the structure of a dataset and which components/media are included.""" + """Abstracts the structure of a dataset and which components/media + are included.""" class LuxonisSourceDocument(TypedDict): name: str @@ -62,7 +66,8 @@ def __init__( components: Optional[List[LuxonisComponent]] = None, main_component: Optional[str] = None, ) -> None: - """Abstracts the structure of a dataset by grouping together components. + """Abstracts the structure of a dataset by grouping together + components. For example, with an U{OAK-D}, you can have a source with 4 image @@ -86,7 +91,9 @@ def __init__( LuxonisComponent(name) ] # basic source includes a single color image - self.components = {component.name: component for component in components} + self.components = { + component.name: component for component in components + } self.main_component = main_component or next(iter(self.components)) def to_document(self) -> LuxonisSourceDocument: @@ -94,7 +101,8 @@ def to_document(self) -> LuxonisSourceDocument: "name": self.name, "main_component": self.main_component, "components": [ - component.to_document() for component in self.components.values() + component.to_document() + for component in self.components.values() ], } diff --git a/luxonis_ml/data/datasets/utils.py b/luxonis_ml/data/datasets/utils.py index 29fc23a0..6df42d47 100644 --- a/luxonis_ml/data/datasets/utils.py +++ b/luxonis_ml/data/datasets/utils.py @@ -137,7 +137,13 @@ def get_dir( def _rescale_mask( - mask: np.ndarray, mask_w: int, mask_h: int, x: float, y: float, w: float, h: float + mask: np.ndarray, + mask_w: int, + mask_h: int, + x: float, + y: float, + w: float, + h: float, ) -> np.ndarray: return mask[ int(y * mask_h) : int((y + h) * mask_h), @@ -182,7 +188,8 @@ def rescale_values( np.ndarray, ] ]: - """Rescale annotation values based on the bounding box coordinates.""" + """Rescale annotation values based on the bounding box + coordinates.""" x, y, w, h = bbox["x"], bbox["y"], bbox["w"], bbox["h"] if sub_ann_key == "keypoints": @@ -198,7 +205,9 @@ def rescale_values( if sub_ann_key == "segmentation": assert isinstance(ann, dict) if "polylines" in ann: - return [(poly[0] * w + x, poly[1] * h + y) for poly in ann["polylines"]] + return [ + (poly[0] * w + x, poly[1] * h + y) for poly in ann["polylines"] + ] if "rle" in ann: return _rescale_rle(ann["rle"], x, y, w, h) @@ -216,8 +225,8 @@ def rescale_values( def add_generator_wrapper(generator: DatasetIterator) -> DatasetIterator: - """Generator wrapper to rescale and reformat annotations for each record in the - input generator.""" + """Generator wrapper to rescale and reformat annotations for each + record in the input generator.""" def create_new_record( record: Dict[str, Union[str, Dict]], diff --git a/luxonis_ml/data/loaders/__init__.py b/luxonis_ml/data/loaders/__init__.py index 1463eb47..b8bdc3a8 100644 --- a/luxonis_ml/data/loaders/__init__.py +++ b/luxonis_ml/data/loaders/__init__.py @@ -1,4 +1,9 @@ -from .base_loader import LOADERS_REGISTRY, BaseLoader, Labels, LuxonisLoaderOutput +from .base_loader import ( + LOADERS_REGISTRY, + BaseLoader, + Labels, + LuxonisLoaderOutput, +) from .luxonis_loader import LuxonisLoader __all__ = [ diff --git a/luxonis_ml/data/loaders/base_loader.py b/luxonis_ml/data/loaders/base_loader.py index 08dd90b0..ea33b355 100644 --- a/luxonis_ml/data/loaders/base_loader.py +++ b/luxonis_ml/data/loaders/base_loader.py @@ -9,13 +9,14 @@ from ..utils.enums import LabelType Labels: TypeAlias = Dict[str, Tuple[np.ndarray, LabelType]] -"""C{Labels} is a dictionary mappping task names to their L{LabelType} and annotations -as L{numpy arrays}.""" +"""C{Labels} is a dictionary mappping task names to their L{LabelType} +and annotations as L{numpy arrays}.""" LuxonisLoaderOutput: TypeAlias = Tuple[np.ndarray, Labels] -"""C{LuxonisLoaderOutput} is a tuple of an image as a L{numpy array} and a -dictionary of task group names and their annotations as L{Annotations}.""" +"""C{LuxonisLoaderOutput} is a tuple of an image as a L{numpy +array} and a dictionary of task group names and their +annotations as L{Annotations}.""" LOADERS_REGISTRY: Registry[Type["BaseLoader"]] = Registry(name="loaders") diff --git a/luxonis_ml/data/loaders/luxonis_loader.py b/luxonis_ml/data/loaders/luxonis_loader.py index a5ad2c27..46476ffe 100644 --- a/luxonis_ml/data/loaders/luxonis_loader.py +++ b/luxonis_ml/data/loaders/luxonis_loader.py @@ -30,15 +30,16 @@ def __init__( @type dataset: LuxonisDataset @param dataset: LuxonisDataset to use @type view: Union[str, List[str]] - @param view: What splits to use. Can be either a single split or a list of - splits. Defaults to "train". + @param view: What splits to use. Can be either a single split or + a list of splits. Defaults to "train". @type stream: bool @param stream: Flag for data streaming. Defaults to C{False}. @type augmentations: Optional[luxonis_ml.loader.Augmentations] - @param augmentations: Augmentation class that performs augmentations. Defaults - to C{None}. + @param augmentations: Augmentation class that performs + augmentations. Defaults to C{None}. @type force_resync: bool - @param force_resync: Flag to force resync from cloud. Defaults to C{False}. + @param force_resync: Flag to force resync from cloud. Defaults + to C{False}. """ self.logger = logging.getLogger(__name__) @@ -111,14 +112,15 @@ def __len__(self) -> int: return len(self.instances) def __getitem__(self, idx: int) -> LuxonisLoaderOutput: - """Function to load a sample consisting of an image and its annotations. + """Function to load a sample consisting of an image and its + annotations. @type idx: int - @param idx: The (often random) integer index to retrieve a sample from the - dataset. + @param idx: The (often random) integer index to retrieve a + sample from the dataset. @rtype: LuxonisLoaderOutput - @return: The loader ouput consisting of the image and a dictionary defining its - annotations. + @return: The loader ouput consisting of the image and a + dictionary defining its annotations. """ if self.augmentations is None: @@ -159,7 +161,9 @@ def __getitem__(self, idx: int) -> LuxonisLoaderOutput: if label_type == LabelType.KEYPOINTS: if ( LabelType.BOUNDINGBOX - in map(itemgetter(1), list(annotations.values())) + in map( + itemgetter(1), list(annotations.values()) + ) and LabelType.BOUNDINGBOX not in label_dict # type: ignore ): continue @@ -167,7 +171,9 @@ def __getitem__(self, idx: int) -> LuxonisLoaderOutput: if ( LabelType.BOUNDINGBOX in label_dict # type: ignore and LabelType.BOUNDINGBOX - in map(itemgetter(1), list(annotations.values())) + in map( + itemgetter(1), list(annotations.values()) + ) ): bbox_task = task_dict[LabelType.BOUNDINGBOX] *_, bbox_suffix = bbox_task.split("-", 1) @@ -191,20 +197,24 @@ def __getitem__(self, idx: int) -> LuxonisLoaderOutput: random.setstate(random_state) np.random.set_state(np_random_state) - img, aug_annotations = self.augmentations(aug_input_data, nk=nk, ns=ns) + img, aug_annotations = self.augmentations( + aug_input_data, nk=nk, ns=ns + ) for label_type, array in aug_annotations.items(): out_dict[label_to_task[label_type]] = (array, label_type) return img, out_dict # type: ignore - def _load_image_with_annotations(self, idx: int) -> Tuple[np.ndarray, Labels]: + def _load_image_with_annotations( + self, idx: int + ) -> Tuple[np.ndarray, Labels]: """Loads image and its annotations based on index. @type idx: int @param idx: Index of the image @rtype: Tuple[L{np.ndarray}, dict] - @return: Image as L{np.ndarray} in RGB format and a dictionary with all the - present annotations + @return: Image as L{np.ndarray} in RGB format and a dictionary + with all the present annotations """ ann_indices = self.idx_to_df_row[idx] @@ -227,7 +237,9 @@ def _load_image_with_annotations(self, idx: int) -> Tuple[np.ndarray, Labels]: labels_by_task = defaultdict(list) instance_counters = defaultdict(int) for annotation_data in ann_rows: - _, _, type_, _, class_, instance_id, task, ann_str, _ = annotation_data + _, _, type_, _, class_, instance_id, task, ann_str, _ = ( + annotation_data + ) if instance_id < 0: instance_counters[task] += 1 instance_id = instance_counters[task] diff --git a/luxonis_ml/data/parsers/base_parser.py b/luxonis_ml/data/parsers/base_parser.py index f913c243..0d5c2593 100644 --- a/luxonis_ml/data/parsers/base_parser.py +++ b/luxonis_ml/data/parsers/base_parser.py @@ -11,8 +11,8 @@ ParserOutput = Tuple[DatasetIterator, List[str], Dict[str, Dict], List[str]] """Type alias for parser output. -Contains a function to create the annotation generator, list of classes names, skeleton -dictionary for keypoints and list of added images. +Contains a function to create the annotation generator, list of classes +names, skeleton dictionary for keypoints and list of added images. """ @@ -24,14 +24,14 @@ class BaseParser(ABC): @staticmethod @abstractmethod def validate_split(split_path: Path) -> Optional[Dict[str, Any]]: - """Validates if a split subdirectory is in an expected format. If so, returns - kwargs to pass to L{from_split} method. + """Validates if a split subdirectory is in an expected format. + If so, returns kwargs to pass to L{from_split} method. @type split_path: Path @param split_path: Path to split directory. @rtype: Optional[Dict[str, Any]] - @return: Dictionary with kwargs to pass to L{from_split} method or C{None} if - the split is not in the expected format. + @return: Dictionary with kwargs to pass to L{from_split} method + or C{None} if the split is not in the expected format. """ pass @@ -56,7 +56,8 @@ def from_dir( @type dataset_dir: str @param dataset_dir: Path to source dataset directory. @type parser_kwargs: Dict[str, Any] - @param parser_kwargs: Additional kwargs for specific parser implementation. + @param parser_kwargs: Additional kwargs for specific parser + implementation. @rtype: Tuple[List[str], List[str], List[str]] @return: Tuple with added images for train, val and test splits. """ @@ -64,7 +65,8 @@ def from_dir( @abstractmethod def from_split(self, **kwargs) -> ParserOutput: - """Parses a data in a split subdirectory to L{LuxonisDataset} format. + """Parses a data in a split subdirectory to L{LuxonisDataset} + format. @type kwargs: Dict[str, Any] @param kwargs: Additional kwargs for specific parser implementation. @@ -82,7 +84,8 @@ def _parse_split(self, **kwargs) -> List[str]: """Parses data in a split subdirectory. @type kwargs: Dict[str, Any] - @param kwargs: Additional kwargs for specific parser implementation. + @param kwargs: Additional kwargs for specific parser + implementation. @rtype: List[str] @return: List of added images. """ @@ -103,19 +106,21 @@ def parse_split( split_ratios: Optional[Dict[str, float]] = None, **kwargs, ) -> BaseDataset: - """Parses data in a split subdirectory to L{LuxonisDataset} format. + """Parses data in a split subdirectory to L{LuxonisDataset} + format. @type split: Optional[str] - @param split: As what split the data will be added to LDF. If set, - C{split_ratios} and C{random_split} are ignored. + @param split: As what split the data will be added to LDF. If + set, C{split_ratios} and C{random_split} are ignored. @type random_split: bool - @param random_split: If random splits should be made. If C{True}, - C{split_ratios} are used. + @param random_split: If random splits should be made. If + C{True}, C{split_ratios} are used. @type split_ratios: Optional[Tuple[float, float, float]] - @param split_ratios: Ratios for random splits. Only used if C{random_split} is - C{True}. Defaults to C{(0.8, 0.1, 0.1)}. + @param split_ratios: Ratios for random splits. Only used if + C{random_split} is C{True}. Defaults to C{(0.8, 0.1, 0.1)}. @type kwargs: Dict[str, Any] - @param kwargs: Additional C{kwargs} for specific parser implementation. + @param kwargs: Additional C{kwargs} for specific parser + implementation. @rtype: LuxonisDataset @return: C{LDF} with all the images and annotations parsed. """ @@ -132,7 +137,8 @@ def parse_dir(self, dataset_dir: Path, **kwargs) -> BaseDataset: @type dataset_dir: str @param dataset_dir: Path to source dataset directory. @type kwargs: Dict[str, Any] - @param kwargs: Additional C{kwargs} for specific parser implementation. + @param kwargs: Additional C{kwargs} for specific parser + implementation. @rtype: LuxonisDataset @return: C{LDF} with all the images and annotations parsed. """ @@ -148,7 +154,8 @@ def parse_dir(self, dataset_dir: Path, **kwargs) -> BaseDataset: return self.dataset def task_wrapper(self, generator: DatasetIterator) -> DatasetIterator: - """Wraps the generator with a function that adds custom task information. + """Wraps the generator with a function that adds custom task + information. @type generator: DatasetIterator @param generator: Generator function @@ -167,7 +174,8 @@ def task_wrapper(self, generator: DatasetIterator) -> DatasetIterator: @staticmethod def _get_added_images(generator: DatasetIterator) -> List[PathType]: - """Returns list of unique images added by the generator function. + """Returns list of unique images added by the generator + function. @type generator: L{DatasetGenerator} @param generator: Generator function @@ -182,7 +190,9 @@ def _get_added_images(generator: DatasetIterator) -> List[PathType]: ) @staticmethod - def _compare_stem_files(list1: Iterable[Path], list2: Iterable[Path]) -> bool: + def _compare_stem_files( + list1: Iterable[Path], list2: Iterable[Path] + ) -> bool: """Compares sets of files by their stem. Example: @@ -207,7 +217,8 @@ def _compare_stem_files(list1: Iterable[Path], list2: Iterable[Path]) -> bool: @staticmethod def _list_images(image_dir: Path) -> List[Path]: - """Returns list of all images in the directory supported by opencv. + """Returns list of all images in the directory supported by + opencv. @type image_dir: Path @param image_dir: Path to directory with images diff --git a/luxonis_ml/data/parsers/classification_directory_parser.py b/luxonis_ml/data/parsers/classification_directory_parser.py index 06d85ac8..0beb1cf7 100644 --- a/luxonis_ml/data/parsers/classification_directory_parser.py +++ b/luxonis_ml/data/parsers/classification_directory_parser.py @@ -46,25 +46,30 @@ def validate_split(split_path: Path) -> Optional[Dict[str, Any]]: def validate(dataset_dir: Path) -> bool: for split in ["train", "valid", "test"]: split_path = dataset_dir / split - if ClassificationDirectoryParser.validate_split(split_path) is None: + if ( + ClassificationDirectoryParser.validate_split(split_path) + is None + ): return False return True - def from_dir(self, dataset_dir: Path) -> Tuple[List[str], List[str], List[str]]: + def from_dir( + self, dataset_dir: Path + ) -> Tuple[List[str], List[str], List[str]]: added_train_imgs = self._parse_split(class_dir=dataset_dir / "train") added_val_imgs = self._parse_split(class_dir=dataset_dir / "valid") added_test_imgs = self._parse_split(class_dir=dataset_dir / "test") return added_train_imgs, added_val_imgs, added_test_imgs def from_split(self, class_dir: Path) -> ParserOutput: - """Parses annotations from classification directory format to LDF. Annotations - include classification. + """Parses annotations from classification directory format to + LDF. Annotations include classification. @type class_dir: Path @param class_dir: Path to top level directory @rtype: L{ParserOutput} - @return: Annotation generator, list of classes names, skeleton dictionary for - keypoints and list of added images. + @return: Annotation generator, list of classes names, skeleton + dictionary for keypoints and list of added images. """ class_names = [d.name for d in class_dir.iterdir() if d.is_dir()] diff --git a/luxonis_ml/data/parsers/coco_parser.py b/luxonis_ml/data/parsers/coco_parser.py index 66ebfc59..ffe8e15d 100644 --- a/luxonis_ml/data/parsers/coco_parser.py +++ b/luxonis_ml/data/parsers/coco_parser.py @@ -58,7 +58,8 @@ class COCOParser(BaseParser): def _detect_dataset_dir_format( dataset_dir: Path, ) -> Tuple[Optional[Format], List[str]]: - """Checks if dataset directory structure is in FiftyOne or Roboflow format.""" + """Checks if dataset directory structure is in FiftyOne or + Roboflow format.""" fiftyone_splits = ["train", "validation", "test"] roboflow_splits = ["train", "valid", "test"] if all((dataset_dir / split).exists() for split in fiftyone_splits): @@ -130,7 +131,9 @@ def from_dir( train_ann_path = ( dataset_dir / keypoint_ann_paths["train"] - if keypoint_ann_paths and use_keypoint_ann and dir_format == Format.FIFTYONE + if keypoint_ann_paths + and use_keypoint_ann + and dir_format == Format.FIFTYONE else train_paths["annotation_path"] ) added_train_imgs = self._parse_split( @@ -144,7 +147,9 @@ def from_dir( val_ann_path = ( dataset_dir / keypoint_ann_paths["val"] - if keypoint_ann_paths and use_keypoint_ann and dir_format == Format.FIFTYONE + if keypoint_ann_paths + and use_keypoint_ann + and dir_format == Format.FIFTYONE else val_paths["annotation_path"] ) _added_val_imgs = self._parse_split( @@ -179,17 +184,20 @@ def from_dir( return added_train_imgs, added_val_imgs, added_test_imgs - def from_split(self, image_dir: Path, annotation_path: Path) -> ParserOutput: - """Parses annotations from COCO format to LDF. Annotations include - classification, segmentation, object detection and keypoints if present. + def from_split( + self, image_dir: Path, annotation_path: Path + ) -> ParserOutput: + """Parses annotations from COCO format to LDF. Annotations + include classification, segmentation, object detection and + keypoints if present. @type image_dir: Path @param image_dir: Path to directory with images @type annotation_path: Path @param annotation_path: Path to annotation json file @rtype: L{ParserOutput} - @return: Annotation generator, list of classes names, skeleton dictionary for - keypoints and list of added images. + @return: Annotation generator, list of classes names, skeleton + dictionary for keypoints and list of added images. """ with open(annotation_path) as f: annotation_data = json.load(f) @@ -292,7 +300,9 @@ def generator() -> DatasetIterator: keypoints = [] for kp in kpts: - keypoints.append((kp[0] / img_w, kp[1] / img_h, int(kp[2]))) + keypoints.append( + (kp[0] / img_w, kp[1] / img_h, int(kp[2])) + ) yield { "file": path, "annotation": { diff --git a/luxonis_ml/data/parsers/create_ml_parser.py b/luxonis_ml/data/parsers/create_ml_parser.py index 6f6c8b0d..0677250a 100644 --- a/luxonis_ml/data/parsers/create_ml_parser.py +++ b/luxonis_ml/data/parsers/create_ml_parser.py @@ -48,33 +48,43 @@ def validate(dataset_dir: Path) -> bool: return False return True - def from_dir(self, dataset_dir: Path) -> Tuple[List[str], List[str], List[str]]: + def from_dir( + self, dataset_dir: Path + ) -> Tuple[List[str], List[str], List[str]]: added_train_imgs = self._parse_split( image_dir=dataset_dir / "train", - annotation_path=dataset_dir / "train" / "_annotations.createml.json", + annotation_path=dataset_dir + / "train" + / "_annotations.createml.json", ) added_val_imgs = self._parse_split( image_dir=dataset_dir / "valid", - annotation_path=dataset_dir / "valid" / "_annotations.createml.json", + annotation_path=dataset_dir + / "valid" + / "_annotations.createml.json", ) added_test_imgs = self._parse_split( image_dir=dataset_dir / "test", - annotation_path=dataset_dir / "test" / "_annotations.createml.json", + annotation_path=dataset_dir + / "test" + / "_annotations.createml.json", ) return added_train_imgs, added_val_imgs, added_test_imgs - def from_split(self, image_dir: Path, annotation_path: Path) -> ParserOutput: - """Parses annotations from CreateML format to LDF. Annotations include - classification and object detection. + def from_split( + self, image_dir: Path, annotation_path: Path + ) -> ParserOutput: + """Parses annotations from CreateML format to LDF. Annotations + include classification and object detection. @type image_dir: Path @param image_dir: Path to directory with images @type annotation_path: Path @param annotation_path: Path to annotation json file @rtype: L{ParserOutput} - @return: Annotation generator, list of classes names, skeleton dictionary for - keypoints and list of added images. + @return: Annotation generator, list of classes names, skeleton + dictionary for keypoints and list of added images. """ with open(annotation_path) as f: annotations_data = json.load(f) diff --git a/luxonis_ml/data/parsers/darknet_parser.py b/luxonis_ml/data/parsers/darknet_parser.py index 7b67ac8d..bc26a58b 100644 --- a/luxonis_ml/data/parsers/darknet_parser.py +++ b/luxonis_ml/data/parsers/darknet_parser.py @@ -34,7 +34,10 @@ def validate_split(split_path: Path) -> Optional[Dict[str, Any]]: labels = split_path.glob("*.txt") if not BaseParser._compare_stem_files(images, labels): return None - return {"image_dir": split_path, "classes_path": split_path / "_darknet.labels"} + return { + "image_dir": split_path, + "classes_path": split_path / "_darknet.labels", + } @staticmethod def validate(dataset_dir: Path) -> bool: @@ -44,7 +47,9 @@ def validate(dataset_dir: Path) -> bool: return False return True - def from_dir(self, dataset_dir: Path) -> Tuple[List[str], List[str], List[str]]: + def from_dir( + self, dataset_dir: Path + ) -> Tuple[List[str], List[str], List[str]]: added_train_imgs = self._parse_split( image_dir=dataset_dir / "train", classes_path=dataset_dir / "train" / "_darknet.labels", @@ -60,19 +65,21 @@ def from_dir(self, dataset_dir: Path) -> Tuple[List[str], List[str], List[str]]: return added_train_imgs, added_val_imgs, added_test_imgs def from_split(self, image_dir: Path, classes_path: Path) -> ParserOutput: - """Parses annotations from Darknet format to LDF. Annotations include - classification and object detection. + """Parses annotations from Darknet format to LDF. Annotations + include classification and object detection. @type image_dir: Path @param image_dir: Path to directory with images @type classes_path: Path @param classes_path: Path to file with class names @rtype: L{ParserOutput} - @return: Annotation generator, list of classes names, skeleton dictionary for - keypoints and list of added images. + @return: Annotation generator, list of classes names, skeleton + dictionary for keypoints and list of added images. """ with open(classes_path) as f: - class_names = {i: line.rstrip() for i, line in enumerate(f.readlines())} + class_names = { + i: line.rstrip() for i, line in enumerate(f.readlines()) + } def generator() -> DatasetIterator: for img_path in self._list_images(image_dir): diff --git a/luxonis_ml/data/parsers/luxonis_parser.py b/luxonis_ml/data/parsers/luxonis_parser.py index 4ca29073..1a08e0fc 100644 --- a/luxonis_ml/data/parsers/luxonis_parser.py +++ b/luxonis_ml/data/parsers/luxonis_parser.py @@ -2,7 +2,16 @@ import zipfile from enum import Enum from pathlib import Path -from typing import Dict, Generic, Optional, Tuple, Type, TypeVar, Union, overload +from typing import ( + Dict, + Generic, + Optional, + Tuple, + Type, + TypeVar, + Union, + overload, +) from luxonis_ml.data import DATASETS_REGISTRY, BaseDataset, LuxonisDataset from luxonis_ml.data.utils.enums import LabelType @@ -59,29 +68,33 @@ def __init__( ): """High-level abstraction over various parsers. - Automatically recognizes the dataset format and uses the appropriate parser. + Automatically recognizes the dataset format and uses the + appropriate parser. @type dataset_dir: str - @param dataset_dir: Path to the dataset directory or zip file. Can also be a - remote URL supported by L{LuxonisFileSystem}. + @param dataset_dir: Path to the dataset directory or zip file. + Can also be a remote URL supported by L{LuxonisFileSystem}. @type dataset_name: Optional[str] - @param dataset_name: Name of the dataset. If C{None}, the name is derived from - the name of the dataset directory. + @param dataset_name: Name of the dataset. If C{None}, the name + is derived from the name of the dataset directory. @type save_dir: Optional[Union[Path, str]] - @param save_dir: If a remote URL is provided in C{dataset_dir}, the dataset will - be downloaded to this directory. If C{None}, the dataset will be downloaded - to the current working directory. + @param save_dir: If a remote URL is provided in C{dataset_dir}, + the dataset will be downloaded to this directory. If + C{None}, the dataset will be downloaded to the current + working directory. @type dataset_plugin: Optional[str] - @param dataset_plugin: Name of the dataset plugin to use. If C{None}, - C{LuxonisDataset} is used. + @param dataset_plugin: Name of the dataset plugin to use. If + C{None}, C{LuxonisDataset} is used. @type dataset_type: Optional[DatasetType] - @param dataset_type: If provided, the parser will use this dataset type instead - of trying to recognize it automatically. + @param dataset_type: If provided, the parser will use this + dataset type instead of trying to recognize it + automatically. @type kwargs: Dict[str, Any] - @param kwargs: Additional C{kwargs} to be passed to the constructor of specific - L{BaseDataset} implementation. + @param kwargs: Additional C{kwargs} to be passed to the + constructor of specific L{BaseDataset} implementation. @type task_mapping: Optional[Dict[LabelType, str]] - @param task_mapping: Dictionary mapping label types to task names. + @param task_mapping: Dictionary mapping label types to task + names. """ save_dir = Path(save_dir) if save_dir else None name = Path(dataset_dir).name @@ -90,7 +103,9 @@ def __init__( if self.dataset_dir.suffix == ".zip": with zipfile.ZipFile(self.dataset_dir, "r") as zip_ref: unzip_dir = self.dataset_dir.parent / self.dataset_dir.stem - logger.info(f"Extracting '{self.dataset_dir.name}' to '{unzip_dir}'") + logger.info( + f"Extracting '{self.dataset_dir.name}' to '{unzip_dir}'" + ) zip_ref.extractall(unzip_dir) self.dataset_dir = unzip_dir @@ -111,8 +126,12 @@ def __init__( dataset_name = dataset_name or name.replace(" ", "_").split(".")[0] - self.dataset = self.dataset_constructor(dataset_name=dataset_name, **kwargs) - self.parser = self.parsers[self.dataset_type](self.dataset, task_mapping or {}) + self.dataset = self.dataset_constructor( + dataset_name=dataset_name, **kwargs + ) + self.parser = self.parsers[self.dataset_type]( + self.dataset, task_mapping or {} + ) @overload def parse(self: "LuxonisParser[str]", **kwargs) -> BaseDataset: @@ -125,11 +144,12 @@ def parse(self: "LuxonisParser[None]", **kwargs) -> LuxonisDataset: def parse(self, **kwargs) -> BaseDataset: """Parses the dataset and returns it in LuxonisDataset format. - If the dataset already exists, parsing will be skipped and the existing dataset - will be returned instead. + If the dataset already exists, parsing will be skipped and the + existing dataset will be returned instead. @type kwargs: Dict[str, Any] - @param kwargs: Additional C{kwargs} for specific parser implementation. + @param kwargs: Additional C{kwargs} for specific parser + implementation. @rtype: LuxonisDataset @return: Parsed dataset in L{LuxonisDataset} format. """ @@ -173,7 +193,8 @@ def _parse_dir(self, **kwargs) -> BaseDataset: Check under each parser for the expected directory structure. @type kwargs: Dict[str, Any] - @param kwargs: Additional C{kwargs} for specific parser function. + @param kwargs: Additional C{kwargs} for specific parser + function. @rtype: LuxonisDataset @return: C{LDF} with all the images and annotations parsed. """ @@ -189,20 +210,22 @@ def _parse_split( ) -> BaseDataset: """Parses data from a subdirectory representing a single split. - Should be used if adding/changing only specific split. Check under each parser - for expected directory structure. + Should be used if adding/changing only specific split. Check + under each parser for expected directory structure. @type split: Optional[Literal["train", "val", "test"]] - @param split: As what split the data will be added to LDF. If set, - C{split_ratios} and C{random_split} are ignored. + @param split: As what split the data will be added to LDF. If + set, C{split_ratios} and C{random_split} are ignored. @type random_split: bool - @param random_split: If random splits should be made. If C{True}, - C{split_ratios} are used. + @param random_split: If random splits should be made. If + C{True}, C{split_ratios} are used. @type split_ratios: Optional[Dict[str, float]] - @param split_ratios: Ratios for random splits. Only used if C{random_split} is - C{True}. Defaults to C{{"train": 0.8, "val": 0.1, "test": 0.1}}. + @param split_ratios: Ratios for random splits. Only used if + C{random_split} is C{True}. Defaults to C{{"train": 0.8, + "val": 0.1, "test": 0.1}}. @type kwargs: Dict[str, Any] - @param kwargs: Additional kwargs for specific parser implementation. + @param kwargs: Additional kwargs for specific parser + implementation. @rtype: LuxonisDataset @return: C{LDF} with all the images and annotations parsed. """ diff --git a/luxonis_ml/data/parsers/segmentation_mask_directory_parser.py b/luxonis_ml/data/parsers/segmentation_mask_directory_parser.py index b54a0283..8bac4ad7 100644 --- a/luxonis_ml/data/parsers/segmentation_mask_directory_parser.py +++ b/luxonis_ml/data/parsers/segmentation_mask_directory_parser.py @@ -51,11 +51,16 @@ def validate_split(split_path: Path) -> Optional[Dict[str, Any]]: def validate(dataset_dir: Path) -> bool: for split in ["train", "valid", "test"]: split_path = dataset_dir / split - if SegmentationMaskDirectoryParser.validate_split(split_path) is None: + if ( + SegmentationMaskDirectoryParser.validate_split(split_path) + is None + ): return False return True - def from_dir(self, dataset_dir: Path) -> Tuple[List[str], List[str], List[str]]: + def from_dir( + self, dataset_dir: Path + ) -> Tuple[List[str], List[str], List[str]]: added_train_imgs = self._parse_split( image_dir=dataset_dir / "train", seg_dir=dataset_dir / "train", @@ -87,8 +92,8 @@ def from_split( @type classes_path: Path @param classes_path: Path to CSV file with class names @rtype: L{ParserOutput} - @return: Annotation generator, list of classes names, skeleton dictionary for - keypoints and list of added images + @return: Annotation generator, list of classes names, skeleton + dictionary for keypoints and list of added images """ idx_class = " Class" # NOTE: space prefix included diff --git a/luxonis_ml/data/parsers/solo_parser.py b/luxonis_ml/data/parsers/solo_parser.py index 134d1c9e..6c6a9ad3 100644 --- a/luxonis_ml/data/parsers/solo_parser.py +++ b/luxonis_ml/data/parsers/solo_parser.py @@ -40,8 +40,8 @@ def validate_split(split_path: Path) -> Optional[Dict[str, Any]]: @type split_path: Path @param split_path: Path to split directory. @rtype: Optional[Dict[str, Any]] - @return: Dictionary with kwargs to pass to L{from_split} method or C{None} if - the split is not in the expected format. + @return: Dictionary with kwargs to pass to L{from_split} method + or C{None} if the split is not in the expected format. """ if not split_path.exists(): return None @@ -94,7 +94,8 @@ def from_dir( @type dataset_dir: str @param dataset_dir: Path to source dataset directory. @rtype: Tuple[List[str], List[str], List[str]] - @return: Tuple with added images for train, valid and test splits. + @return: Tuple with added images for train, valid and test + splits. """ added_train_imgs = self._parse_split(split_path=dataset_dir / "train") @@ -107,14 +108,15 @@ def from_split( self, split_path: Path, ) -> ParserOutput: - """Parses data in a split subdirectory from SOLO format to L{LuxonisDataset} - format. + """Parses data in a split subdirectory from SOLO format to + L{LuxonisDataset} format. @type split_path: Path - @param split_path: Path to directory with sequences of images and annotations. + @param split_path: Path to directory with sequences of images + and annotations. @rtype: L{ParserOutput} - @return: C{LuxonisDataset} generator, list of class names, skeleton dictionary - for keypoints and list of added images. + @return: C{LuxonisDataset} generator, list of class names, + skeleton dictionary for keypoints and list of added images. """ if not os.path.exists(split_path): @@ -127,20 +129,29 @@ def from_split( with open(annotation_definitions_path) as json_file: annotation_definitions_dict = json.load(json_file) else: - raise Exception(f"{annotation_definitions_path} path non-existent.") + raise Exception( + f"{annotation_definitions_path} path non-existent." + ) - annotation_types = self._get_solo_annotation_types(annotation_definitions_dict) + annotation_types = self._get_solo_annotation_types( + annotation_definitions_dict + ) - class_names = self._get_solo_bbox_class_names(annotation_definitions_dict) + class_names = self._get_solo_bbox_class_names( + annotation_definitions_dict + ) # TODO: We make an assumption here that bbox class_names are also valid for all other annotation types in the dataset. Is this OK? # TODO: Can we imagine a case where classes between annotation types are different? Which class names to return in this case? if class_names == []: raise Exception("No class_names identified. ") - keypoint_labels = self._get_solo_keypoint_names(annotation_definitions_dict) + keypoint_labels = self._get_solo_keypoint_names( + annotation_definitions_dict + ) skeletons = { - class_name: {"labels": keypoint_labels} for class_name in class_names + class_name: {"labels": keypoint_labels} + for class_name in class_names } # TODO: setting skeletons by assigning all keypoint names to each class_name. Is this OK? # if NOT, set them manually with LuxonisDataset.set_skeletons() as SOLO format does not @@ -158,7 +169,9 @@ def generator() -> DatasetIterator: os.path.join(sequence_path, "*.frame_data.json") ): # single sequence can have multiple steps if not os.path.exists(frame_path): - raise FileNotFoundError(f"{frame_path} not existent.") + raise FileNotFoundError( + f"{frame_path} not existent." + ) with open(frame_path) as f: frame = json.load(f) @@ -168,7 +181,9 @@ def generator() -> DatasetIterator: annotations = capture["annotations"] img_path = os.path.join(sequence_path, img_fname) if not os.path.exists(img_path): - raise FileNotFoundError(f"{img_path} not existent.") + raise FileNotFoundError( + f"{img_path} not existent." + ) if "BoundingBox2DAnnotation" in annotation_types: for anno in annotations: @@ -197,7 +212,10 @@ def generator() -> DatasetIterator: }, } - if "SemanticSegmentationAnnotation" in annotation_types: + if ( + "SemanticSegmentationAnnotation" + in annotation_types + ): for anno in annotations: if anno["@type"].endswith( "SemanticSegmentationAnnotation" @@ -205,15 +223,21 @@ def generator() -> DatasetIterator: sseg_annotations = anno mask_fname = sseg_annotations["filename"] - mask_path = os.path.join(sequence_path, mask_fname) + mask_path = os.path.join( + sequence_path, mask_fname + ) mask = cv2.imread(mask_path) for instance in sseg_annotations["instances"]: class_name = instance["labelName"] r, g, b, _ = instance["pixelValue"] curr_mask = np.zeros_like(mask) - curr_mask[np.all(mask == [b, g, r], axis=2)] = 1 - curr_mask = np.max(curr_mask, axis=2) # 3D->2D + curr_mask[ + np.all(mask == [b, g, r], axis=2) + ] = 1 + curr_mask = np.max( + curr_mask, axis=2 + ) # 3D->2D yield { "file": img_path, @@ -226,13 +250,19 @@ def generator() -> DatasetIterator: if "KeypointAnnotation" in annotation_types: for anno in annotations: - if anno["@type"].endswith("KeypointAnnotation"): + if anno["@type"].endswith( + "KeypointAnnotation" + ): keypoint_annotations = anno["values"] - for keypoints_annotation in keypoint_annotations: + for ( + keypoints_annotation + ) in keypoint_annotations: label_id = keypoints_annotation["labelId"] keypoints = [] - for keypoint in keypoints_annotation["keypoints"]: + for keypoint in keypoints_annotation[ + "keypoints" + ]: x, y = keypoint["location"] visibility = keypoint["state"] keypoints.append( @@ -259,7 +289,9 @@ def generator() -> DatasetIterator: added_images, ) - def _get_solo_annotation_types(self, annotation_definitions_dict: dict) -> list: + def _get_solo_annotation_types( + self, annotation_definitions_dict: dict + ) -> list: """List all annotation types present in the dataset. @type annotation_definitions_dict: dict diff --git a/luxonis_ml/data/parsers/tensorflow_csv_parser.py b/luxonis_ml/data/parsers/tensorflow_csv_parser.py index 94e7c1a7..99943e53 100644 --- a/luxonis_ml/data/parsers/tensorflow_csv_parser.py +++ b/luxonis_ml/data/parsers/tensorflow_csv_parser.py @@ -48,7 +48,9 @@ def validate(dataset_dir: Path) -> bool: return False return True - def from_dir(self, dataset_dir: Path) -> Tuple[List[str], List[str], List[str]]: + def from_dir( + self, dataset_dir: Path + ) -> Tuple[List[str], List[str], List[str]]: added_train_imgs = self._parse_split( image_dir=dataset_dir / "train", annotation_path=dataset_dir / "train" / "_annotations.csv", @@ -63,18 +65,23 @@ def from_dir(self, dataset_dir: Path) -> Tuple[List[str], List[str], List[str]]: ) return added_train_imgs, added_val_imgs, added_test_imgs - def from_split(self, image_dir: Path, annotation_path: Path) -> ParserOutput: - """Parses annotations from TensorflowCSV format to LDF. Annotations include - classification and object detection. + def from_split( + self, image_dir: Path, annotation_path: Path + ) -> ParserOutput: + """Parses annotations from TensorflowCSV format to LDF. + Annotations include classification and object detection. @type image_dir: Path @param image_dir: Path to directory with images @type annotation_path: Path @param annotation_path: Path to annotation CSV file @rtype: L{ParserOutput} - @return: Annotation generator, list of classes names, skeleton dictionary for + @return: Annotation generator, list of classes names, skeleton + dictionary for """ - df = pl.read_csv(annotation_path).filter(pl.col("filename").is_not_null()) + df = pl.read_csv(annotation_path).filter( + pl.col("filename").is_not_null() + ) images_annotations = {} class_names = set(df["class"]) @@ -95,7 +102,9 @@ def from_split(self, image_dir: Path, annotation_path: Path) -> ParserOutput: ymin = row["ymin"] xmax = row["xmax"] ymax = row["ymax"] - bbox_xywh = np.array([xmin, ymin, xmax - xmin, ymax - ymin], dtype=float) + bbox_xywh = np.array( + [xmin, ymin, xmax - xmin, ymax - ymin], dtype=float + ) bbox_xywh[::2] /= width bbox_xywh[1::2] /= height bbox_xywh = bbox_xywh.tolist() diff --git a/luxonis_ml/data/parsers/voc_parser.py b/luxonis_ml/data/parsers/voc_parser.py index f060375c..19c6ed77 100644 --- a/luxonis_ml/data/parsers/voc_parser.py +++ b/luxonis_ml/data/parsers/voc_parser.py @@ -45,7 +45,9 @@ def validate(dataset_dir: Path) -> bool: return False return True - def from_dir(self, dataset_dir: Path) -> Tuple[List[str], List[str], List[str]]: + def from_dir( + self, dataset_dir: Path + ) -> Tuple[List[str], List[str], List[str]]: added_train_imgs = self._parse_split( image_dir=dataset_dir / "train", annotation_dir=dataset_dir / "train", @@ -65,16 +67,17 @@ def from_split( image_dir: Path, annotation_dir: Path, ) -> ParserOutput: - """Parses annotations from VOC format to LDF. Annotations include classification - and object detection. + """Parses annotations from VOC format to LDF. Annotations + include classification and object detection. @type image_dir: Path @param image_dir: Path to directory with images @type annotation_dir: Path - @param annotation_dir: Path to directory with C{.xml} annotations + @param annotation_dir: Path to directory with C{.xml} + annotations @rtype: L{ParserOutput} - @return: Annotation generator, list of classes names, skeleton dictionary for - keypoints and list of added images. + @return: Annotation generator, list of classes names, skeleton + dictionary for keypoints and list of added images. """ class_names = set() diff --git a/luxonis_ml/data/parsers/yolov4_parser.py b/luxonis_ml/data/parsers/yolov4_parser.py index ad56875d..c295900f 100644 --- a/luxonis_ml/data/parsers/yolov4_parser.py +++ b/luxonis_ml/data/parsers/yolov4_parser.py @@ -49,7 +49,9 @@ def validate(dataset_dir: Path) -> bool: return False return True - def from_dir(self, dataset_dir: Path) -> Tuple[List[str], List[str], List[str]]: + def from_dir( + self, dataset_dir: Path + ) -> Tuple[List[str], List[str], List[str]]: added_train_imgs = self._parse_split( image_dir=dataset_dir / "train", annotation_path=dataset_dir / "train" / "_annotations.txt", @@ -70,8 +72,8 @@ def from_dir(self, dataset_dir: Path) -> Tuple[List[str], List[str], List[str]]: def from_split( self, image_dir: Path, annotation_path: Path, classes_path: Path ) -> ParserOutput: - """Parses annotations from YoloV4 format to LDF. Annotations include - classification and object detection. + """Parses annotations from YoloV4 format to LDF. Annotations + include classification and object detection. @type image_dir: Path @param image_dir: Path to directory with images @@ -80,11 +82,13 @@ def from_split( @type classes_path: Path @param classes_path: Path to file with class names @rtype: L{ParserOutput} - @return: Annotation generator, list of classes names, skeleton dictionary for - keypoints and list of added images. + @return: Annotation generator, list of classes names, skeleton + dictionary for keypoints and list of added images. """ with open(classes_path) as f: - class_names = {i: line.rstrip() for i, line in enumerate(f.readlines())} + class_names = { + i: line.rstrip() for i, line in enumerate(f.readlines()) + } def generator() -> DatasetIterator: with open(annotation_path) as f: diff --git a/luxonis_ml/data/parsers/yolov6_parser.py b/luxonis_ml/data/parsers/yolov6_parser.py index cfdbdc74..efefc9d9 100644 --- a/luxonis_ml/data/parsers/yolov6_parser.py +++ b/luxonis_ml/data/parsers/yolov6_parser.py @@ -91,8 +91,8 @@ def from_dir( def from_split( self, image_dir: Path, annotation_dir: Path, classes_path: Path ) -> ParserOutput: - """Parses annotations from YoloV6 format to LDF. Annotations include - classification and object detection. + """Parses annotations from YoloV6 format to LDF. Annotations + include classification and object detection. @type image_dir: Path @param image_dir: Path to directory with images @@ -101,8 +101,8 @@ def from_split( @type classes_path: Path @param classes_path: Path to yaml file with classes names @rtype: L{ParserOutput} - @return: Annotation generator, list of classes names, skeleton dictionary for - keypoints and list of added images. + @return: Annotation generator, list of classes names, skeleton + dictionary for keypoints and list of added images. """ with open(classes_path) as f: classes_data = yaml.safe_load(f) diff --git a/luxonis_ml/data/requirements.txt b/luxonis_ml/data/requirements.txt index 1a46e70c..5e787cfc 100644 --- a/luxonis_ml/data/requirements.txt +++ b/luxonis_ml/data/requirements.txt @@ -10,5 +10,5 @@ pandas>=2.0.0 pyarrow>=13.0.0 pycocotools>=2.0.7 typeguard>=4.1.0 -polars>=0.20.31 +polars[timezone]>=0.20.31 ordered-set>=4.0.0 diff --git a/luxonis_ml/data/utils/data_utils.py b/luxonis_ml/data/utils/data_utils.py index c05540bd..f4a9abfe 100644 --- a/luxonis_ml/data/utils/data_utils.py +++ b/luxonis_ml/data/utils/data_utils.py @@ -5,8 +5,8 @@ def check_array(path: Path) -> None: - """Checks whether a path to a numpy array is valid. This checks that th file exists - and is readable by numpy. + """Checks whether a path to a numpy array is valid. This checks that + th file exists and is readable by numpy. @type values: Path @param values: A path to a numpy array. @@ -23,9 +23,13 @@ def _check_valid_array(path: Path) -> bool: return False if not isinstance(path, Path) or not path.suffix == ".npy": - raise ValueError(f"Array path {path} must be a path to a numpy array (.npy)") + raise ValueError( + f"Array path {path} must be a path to a numpy array (.npy)" + ) if not _check_valid_array(path): - raise ValueError(f"Array at path {path} is not a valid numpy array (.npy)") + raise ValueError( + f"Array at path {path} is not a valid numpy array (.npy)" + ) def rgb_to_bool_masks( @@ -33,8 +37,8 @@ def rgb_to_bool_masks( class_colors: Dict[str, Tuple[int, int, int]], add_background_class: bool = False, ) -> Iterator[Tuple[str, np.ndarray]]: - """Helper function to convert an RGB segmentation mask to boolean masks for each - class. + """Helper function to convert an RGB segmentation mask to boolean + masks for each class. Example:: >>> segmentation_mask = np.array([[[0, 0, 0], [255, 0, 0], [0, 255, 0]], @@ -63,7 +67,9 @@ def rgb_to_bool_masks( @return: An iterator of tuples where the first element is the class name and the second element is a boolean mask for that class. """ - color_to_id = {tuple(color): i for i, color in enumerate(class_colors.values())} + color_to_id = { + tuple(color): i for i, color in enumerate(class_colors.values()) + } lookup_table = np.zeros((256, 256, 256), dtype=np.uint8) for color, id in color_to_id.items(): diff --git a/luxonis_ml/data/utils/enums.py b/luxonis_ml/data/utils/enums.py index b0312811..49b84bcd 100644 --- a/luxonis_ml/data/utils/enums.py +++ b/luxonis_ml/data/utils/enums.py @@ -15,13 +15,11 @@ class LabelType(str, Enum): class DataLabelType(Enum): """Supported computer vision label types. - Annotation types can be nested (I{e.g.} a BOX has 2 LABELS, a BOX has a POLYLINE - instance segmentation, I{etc.}) + Annotation types can be nested (I{e.g.} a BOX has 2 LABELS, a BOX + has a POLYLINE instance segmentation, I{etc.}) """ - CLASSIFICATION = ( - "classification" # used for single, multi-class, or multi-label classification - ) + CLASSIFICATION = "classification" # used for single, multi-class, or multi-label classification BOX = "box" # bounding box POLYLINE = "polyline" # polyline to represent segmentation mask instances SEGMENTATION = "segmentation" # RLE encoding of a binary segmentation mask diff --git a/luxonis_ml/data/utils/parquet.py b/luxonis_ml/data/utils/parquet.py index c28d7620..870602dd 100644 --- a/luxonis_ml/data/utils/parquet.py +++ b/luxonis_ml/data/utils/parquet.py @@ -11,10 +11,11 @@ def __init__(self, directory: PathType, num_rows: int = 100_000) -> None: """Manages the insertion of data into parquet files. @type directory: str - @param directory: The local directory in which parquet files are stored. + @param directory: The local directory in which parquet files are + stored. @type num_rows: int - @param num_rows: The maximum number of rows permitted in a parquet file before - another file is created. + @param num_rows: The maximum number of rows permitted in a + parquet file before another file is created. """ self.dir = Path(directory) @@ -49,8 +50,8 @@ def write(self, add_data: Dict[str, Any]) -> None: """Writes a row to the current working parquet file. @type add_data: Dict - @param add_data: A dictionary representing annotations, mapping annotation types - to values. + @param add_data: A dictionary representing annotations, mapping + annotation types to values. """ if not self.buffer: diff --git a/luxonis_ml/data/utils/visualizations.py b/luxonis_ml/data/utils/visualizations.py index b3f7e904..3fbba3de 100644 --- a/luxonis_ml/data/utils/visualizations.py +++ b/luxonis_ml/data/utils/visualizations.py @@ -37,7 +37,8 @@ def create_text_image( @type font_size: float @param font_size: The font size of the text. Default is 0.7. @type bg_color: Tuple[int, int, int] - @param bg_color: The background color of the image. Default is white. + @param bg_color: The background color of the image. Default is + white. @type text_color: Tuple[int, int, int] @param text_color: The color of the text. Default is black. """ @@ -51,14 +52,23 @@ def create_text_image( text_y = (height + text_size[1]) // 2 cv2.putText( - img, text, (text_x, text_y), font, font_size, text_color, 1, cv2.LINE_AA + img, + text, + (text_x, text_y), + font, + font_size, + text_color, + 1, + cv2.LINE_AA, ) return img def concat_images( - image_dict: Dict[str, np.ndarray], padding: int = 10, label_height: int = 30 + image_dict: Dict[str, np.ndarray], + padding: int = 10, + label_height: int = 30, ): """Concatenates images into a single image with labels. @@ -95,7 +105,9 @@ def concat_images( x_start = j * cell_width label = create_text_image(name, cell_width, label_height) - output[y_start : y_start + label_height, x_start : x_start + cell_width] = label + output[ + y_start : y_start + label_height, x_start : x_start + cell_width + ] = label h, w = img.shape[:2] y_img = y_start + label_height + padding @@ -123,7 +135,8 @@ def visualize( @type labels: Labels @param labels: The labels to visualize. @type class_names: Dict[str, List[str]] - @param class_names: A dictionary mapping task names to a list of class names. + @param class_names: A dictionary mapping task names to a list of + class names. @rtype: np.ndarray @return: The visualized image. """ diff --git a/luxonis_ml/embeddings/methods/OOD.py b/luxonis_ml/embeddings/methods/OOD.py index 88d4c33f..53f6be05 100644 --- a/luxonis_ml/embeddings/methods/OOD.py +++ b/luxonis_ml/embeddings/methods/OOD.py @@ -23,6 +23,7 @@ - numpy - scikit-learn """ + from typing import Optional, Union import numpy as np @@ -41,11 +42,11 @@ def isolation_forest_OOD( @type X: np.array @param X: The embeddings to use. @type contamination: Union[float, str] - @param contamination: The contamination parameter for Isolation Forests. Default is - 'auto'. + @param contamination: The contamination parameter for Isolation + Forests. Default is 'auto'. @type n_jobs: int - @param n_jobs: The number of jobs to use. Default is -1, which means all available - CPUs. + @param n_jobs: The number of jobs to use. Default is -1, which means + all available CPUs. @type verbose: int @param verbose: The verbosity level. Default is 1. @type random_state: Optional[int] @@ -74,13 +75,14 @@ def isolation_forest_OOD( def leverage_OOD(X: np.array, std_threshold: int = 3) -> np.array: - """Out-of-distribution detection using leverage and linear regression. + """Out-of-distribution detection using leverage and linear + regression. @type X: np.array @param X: The embeddings to use. @type std_threshold: int - @param std_threshold: The number of standard deviations to use for the leverage - threshold. Default is 3. + @param std_threshold: The number of standard deviations to use for + the leverage threshold. Default is 3. @rtype: np.array @return: The indices of the embeddings that are out-of-distribution. """ diff --git a/luxonis_ml/embeddings/methods/duplicate.py b/luxonis_ml/embeddings/methods/duplicate.py index bec3dd77..c4dda8d1 100644 --- a/luxonis_ml/embeddings/methods/duplicate.py +++ b/luxonis_ml/embeddings/methods/duplicate.py @@ -87,19 +87,24 @@ def _plot_kde( def kde_peaks( - data: np.ndarray, bandwidth: Union[str, float] = "scott", plot: bool = False + data: np.ndarray, + bandwidth: Union[str, float] = "scott", + plot: bool = False, ) -> Tuple[np.ndarray, np.ndarray, int, float]: - """Find peaks in a KDE distribution using scipy's argrelextrema function. + """Find peaks in a KDE distribution using scipy's argrelextrema + function. @type data: np.ndarray @param data: The data to fit the KDE. @type bandwidth: Union[str, float] - @param bandwidth: The bandwidth to use for the KDE. Default is 'scott'. + @param bandwidth: The bandwidth to use for the KDE. Default is + 'scott'. @type plot: bool @param plot: Whether to plot the KDE. @rtype: Tuple[np.ndarray, np.ndarray, int, float] - @return: The indices of the KDE maxima, the indices of the KDE minima, the index of - the global maxima, and the standard deviation of the data. + @return: The indices of the KDE maxima, the indices of the KDE + minima, the index of the global maxima, and the standard + deviation of the data. """ # fit density kde = FFTKDE(kernel="gaussian", bw=bandwidth) @@ -136,28 +141,33 @@ def find_similar( ) -> np.ndarray: """Find the most similar embeddings to the reference embeddings. - @type reference_embeddings: Union[str, List[str], List[List[float]], np.ndarray] - @param reference_embeddings: The embeddings to compare against. Or a list of of - embedding instance_ids that reside in VectorDB. + @type reference_embeddings: Union[str, List[str], List[List[float]], + np.ndarray] + @param reference_embeddings: The embeddings to compare against. Or a + list of of embedding instance_ids that reside in VectorDB. @type vectordb_api: VectorDBAPI @param vectordb_api: The VectorDBAPI instance to use. @type k: int @param k: The number of embeddings to return. Default is 100. @type n: int - @param n: The number of embeddings to compare against. Default is 1000. (This is the - number of embeddings that are returned by the VectorDB search. It matters for - the KDE, as it can be slow for large n. Your choice of n depends on the amount - of duplicates in your dataset, the more duplicates, the larger n should be. If - you have 2-10 duplicates per image, n=100 should be ok. If you have 50-300 - duplicates per image, n=1000 should work good enough. + @param n: The number of embeddings to compare against. Default is + 1000. (This is the number of embeddings that are returned by the + VectorDB search. It matters for the KDE, as it can be slow for + large n. Your choice of n depends on the amount of duplicates in + your dataset, the more duplicates, the larger n should be. If + you have 2-10 duplicates per image, n=100 should be ok. If you + have 50-300 duplicates per image, n=1000 should work good + enough. @type method: str - @param method: The method to use to find the most similar embeddings. If 'first' use - the first of the reference embeddings. If 'average', use the average of the - reference embeddings. + @param method: The method to use to find the most similar + embeddings. If 'first' use the first of the reference + embeddings. If 'average', use the average of the reference + embeddings. @type k_method: str - @param k_method: The method to select the best k. If None, use k as is. If - 'kde_basic', use the minimum of the KDE. If 'kde_peaks', use the minimum of the - KDE peaks, according to a specific hardcoded hevristics/thresholds. + @param k_method: The method to select the best k. If None, use k as + is. If 'kde_basic', use the minimum of the KDE. If 'kde_peaks', + use the minimum of the KDE peaks, according to a specific + hardcoded hevristics/thresholds. @type kde_bw: Union[str, float] @param kde_bw: The bandwidth to use for the KDE. Default is 'scott'. @type plot: bool @@ -229,7 +239,9 @@ def find_similar( else: # get maxima and minima of the KDE on the distances - _, minima, _, _ = kde_peaks(similarities, bandwidth=kde_bw, plot=plot) + _, minima, _, _ = kde_peaks( + similarities, bandwidth=kde_bw, plot=plot + ) if len(minima) > 0: minima = minima[-1] if minima < 0.94: diff --git a/luxonis_ml/embeddings/methods/mistakes.py b/luxonis_ml/embeddings/methods/mistakes.py index 27ad2538..f76eedc8 100644 --- a/luxonis_ml/embeddings/methods/mistakes.py +++ b/luxonis_ml/embeddings/methods/mistakes.py @@ -32,9 +32,11 @@ from sklearn.neighbors import KNeighborsClassifier -def find_mismatches_centroids(X: np.array, y: np.array) -> Tuple[np.array, np.array]: - """Find mismatches in the dataset. A mismatch is defined as a sample that is closer - to another centroid than to its own centroid. +def find_mismatches_centroids( + X: np.array, y: np.array +) -> Tuple[np.array, np.array]: + """Find mismatches in the dataset. A mismatch is defined as a sample + that is closer to another centroid than to its own centroid. @type X: np.array @param X: The embeddings to use. @@ -45,7 +47,9 @@ def find_mismatches_centroids(X: np.array, y: np.array) -> Tuple[np.array, np.ar """ unique_labels = np.unique(y) # Create a mapping from string labels to integer indices - label_to_index = {label: index for index, label in enumerate(unique_labels)} + label_to_index = { + label: index for index, label in enumerate(unique_labels) + } # calculate centroids of each class centroids = [] diff --git a/luxonis_ml/embeddings/methods/representative.py b/luxonis_ml/embeddings/methods/representative.py index dfee0867..c8f785bb 100644 --- a/luxonis_ml/embeddings/methods/representative.py +++ b/luxonis_ml/embeddings/methods/representative.py @@ -58,22 +58,25 @@ def calculate_similarity_matrix(embeddings: np.ndarray) -> np.ndarray: def find_representative_greedy( distance_matrix: np.ndarray, desired_size: int = 1000, seed: int = 0 ) -> List[int]: - """Find the most representative images using a greedy algorithm. Gready search of - maximally unique embeddings. + """Find the most representative images using a greedy algorithm. + Gready search of maximally unique embeddings. @type distance_matrix: np.array @param distance_matrix: The distance matrix to use. @type desired_size: int - @param desired_size: The desired size of the representative set. Default is 1000. + @param desired_size: The desired size of the representative set. + Default is 1000. @type seed: int - @param seed: The index of the seed image. Default is 0. Must be in the range [0, - num_images-1]. + @param seed: The index of the seed image. Default is 0. Must be in + the range [0, num_images-1]. @rtype: List[int] @return: The indices of the representative images. """ num_images = distance_matrix.shape[0] selected_images = set() - selected_images.add(seed) # If seed==0: start with the first image as a seed. + selected_images.add( + seed + ) # If seed==0: start with the first image as a seed. while len(selected_images) < desired_size: max_distance = -1 @@ -82,7 +85,9 @@ def find_representative_greedy( for i in range(num_images): if i not in selected_images: # Calculate the minimum similarity to all previously selected images - min_distance = min([distance_matrix[i, j] for j in selected_images]) + min_distance = min( + [distance_matrix[i, j] for j in selected_images] + ) if min_distance > max_distance: max_distance = min_distance best_image = i @@ -96,17 +101,21 @@ def find_representative_greedy( def find_representative_greedy_vectordb( vectordb_api: VectorDBAPI, desired_size: int = 1000, seed: int = None ) -> List[int]: - """Find the most representative embeddings using a greedy algorithm with VectorDB. + """Find the most representative embeddings using a greedy algorithm + with VectorDB. @note: Due to many requests, this function is very slow. Use - vectordb_api.retrieve_all_embeddings() and find_representative_greedy() instead. + vectordb_api.retrieve_all_embeddings() and + find_representative_greedy() instead. @type vectordb_api: VectorDBAPI - @param vectordb_api: The Vector database client instance to use for searches. + @param vectordb_api: The Vector database client instance to use for + searches. @type desired_size: int - @param desired_size: The desired size of the representative set. Default is 1000. + @param desired_size: The desired size of the representative set. + Default is 1000. @type seed: int - @param seed: The ID of the seed embedding. Default is None, which means a random - seed is chosen. + @param seed: The ID of the seed embedding. Default is None, which + means a random seed is chosen. @rtype: List[int] @return: The IDs of the representative embeddings. """ @@ -150,15 +159,17 @@ def find_representative_kmedoids( max_iter: int = 100, seed: int = None, ) -> List[int]: - """Find the most representative images using k-medoids. K-medoids clustering of - embeddings. + """Find the most representative images using k-medoids. K-medoids + clustering of embeddings. @type similarity_matrix: np.array @param similarity_matrix: The similarity matrix to use. @type desired_size: int - @param desired_size: The desired size of the representative set. Default is 1000. + @param desired_size: The desired size of the representative set. + Default is 1000. @type max_iter: int - @param max_iter: The maximum number of iterations to use. Default is 100. + @param max_iter: The maximum number of iterations to use. Default is + 100. @type seed: int @param seed: The random seed to use. Default is None. @rtype: list diff --git a/luxonis_ml/embeddings/utils/embedding.py b/luxonis_ml/embeddings/utils/embedding.py index 22aa36d4..3f866ffa 100644 --- a/luxonis_ml/embeddings/utils/embedding.py +++ b/luxonis_ml/embeddings/utils/embedding.py @@ -31,6 +31,7 @@ - Ensure the output_layer_name in C{extract_embeddings} matches the appropriate output layer in the ONNX model. - This module specifically focuses on ONNX models and reading images from Luxonis Filesystem. """ + from typing import Callable, List, Tuple import cv2 diff --git a/luxonis_ml/embeddings/utils/ldf.py b/luxonis_ml/embeddings/utils/ldf.py index 7410eb8b..71d49859 100644 --- a/luxonis_ml/embeddings/utils/ldf.py +++ b/luxonis_ml/embeddings/utils/ldf.py @@ -1,5 +1,5 @@ -"""Utilities for generating image embeddings and inserting them into a VectorDB -database. +"""Utilities for generating image embeddings and inserting them into a +VectorDB database. This script provides functions for: @@ -25,6 +25,7 @@ Ensure that a VectorDB server is running and accessible before using these utilities. """ + from typing import Any, Callable, Dict, List import numpy as np @@ -36,8 +37,8 @@ def _get_sample_payloads_LDF(dataset: LuxonisDataset) -> List[Dict[str, Any]]: - """Extract payloads from the LuxonisDataset. Currently supports classification - datasets. + """Extract payloads from the LuxonisDataset. Currently supports + classification datasets. @type dataset: LuxonisDataset @param dataset: An instance of LuxonisDataset. @@ -71,8 +72,8 @@ def _get_sample_payloads_LDF(dataset: LuxonisDataset) -> List[Dict[str, Any]]: def _filter_new_samples_by_id( vectordb_api: VectorDBAPI, all_payloads: List[Dict[str, Any]] ) -> List[Dict[str, Any]]: - """Filter out samples that are already in the Vector database based on their - instance ID. + """Filter out samples that are already in the Vector database based + on their instance ID. @type vectordb_api: L{VectorDBAPI} @param vectordb_api: Vector database API instance. @@ -143,7 +144,8 @@ def generate_embeddings( emb_batch_size: int = 64, vectordb_batch_size: int = 64, ) -> Dict[str, List[float]]: - """Generate embeddings for a given dataset and insert them into a VectorDB. + """Generate embeddings for a given dataset and insert them into a + VectorDB. @type luxonis_dataset: L{LuxonisDataset} @param luxonis_dataset: The dataset object. @@ -152,14 +154,16 @@ def generate_embeddings( @type vectordb_api: L{VectorDBAPI} @param vectordb_api: VectorDBAPI instance. @type output_layer_name: str - @param output_layer_name: Name of the output layer in the ONNX model. + @param output_layer_name: Name of the output layer in the ONNX + model. @type transform: Callable[[np.ndarray], np.ndarray] - @param transform: Preprocessing function for images. If None, default preprocessing - is used. + @param transform: Preprocessing function for images. If None, + default preprocessing is used. @type emb_batch_size: int @param emb_batch_size: Batch size for generating embeddings. @type vectordb_batch_size: int - @param vectordb_batch_size: Batch size for inserting into a vector DB. + @param vectordb_batch_size: Batch size for inserting into a vector + DB. @type: Dict[str, List[float]] @return: Dictionary of instance ID to embedding. """ @@ -180,7 +184,9 @@ def generate_embeddings( ) new_payloads = [new_payloads[ix] for ix in succ_ix] - _batch_upsert(vectordb_api, new_embeddings, new_payloads, vectordb_batch_size) + _batch_upsert( + vectordb_api, new_embeddings, new_payloads, vectordb_batch_size + ) # make a instance_id : embedding dictionary instance_id_to_embedding = { diff --git a/luxonis_ml/embeddings/utils/model.py b/luxonis_ml/embeddings/utils/model.py index 31a5f0a5..4097c68f 100644 --- a/luxonis_ml/embeddings/utils/model.py +++ b/luxonis_ml/embeddings/utils/model.py @@ -38,13 +38,14 @@ def extend_output_onnx( intermediate_tensor_name: str = "/Flatten_output_0", overwrite: bool = False, ) -> onnx.ModelProto: - """Set an intermediate output layer as output of the provided ONNX model. + """Set an intermediate output layer as output of the provided ONNX + model. - If C{overwrite} is set to True, the second to last layer output will be set as - output layer and renamed. + If C{overwrite} is set to True, the second to last layer output will + be set as output layer and renamed. - (You need to know the name of the intermediate layer, which you can find by - inspecting the ONNX model with Netron.app) + (You need to know the name of the intermediate layer, which you can + find by inspecting the ONNX model with Netron.app) """ if overwrite: onnx.checker.check_model(onnx_model) diff --git a/luxonis_ml/embeddings/utils/qdrant.py b/luxonis_ml/embeddings/utils/qdrant.py index 57c71088..cb04e21a 100644 --- a/luxonis_ml/embeddings/utils/qdrant.py +++ b/luxonis_ml/embeddings/utils/qdrant.py @@ -36,10 +36,12 @@ class QdrantManager: - """Class to manage Qdrant Docker container and perform various operations related to - embeddings.""" + """Class to manage Qdrant Docker container and perform various + operations related to embeddings.""" - def __init__(self, image_name="qdrant/qdrant", container_name="qdrant_container"): + def __init__( + self, image_name="qdrant/qdrant", container_name="qdrant_container" + ): """Initialize the QdrantManager.""" self.image_name = image_name self.container_name = container_name @@ -142,20 +144,27 @@ def stop_docker_qdrant(self): class QdrantAPI(VectorDBAPI): - """Class to perform various Qdrant operations related to embeddings.""" + """Class to perform various Qdrant operations related to + embeddings.""" def __init__(self, host: str = "localhost", port: int = 6333) -> None: - """Initialize the QdrantAPI without setting a specific collection. + """Initialize the QdrantAPI without setting a specific + collection. @type host: str - @param host: The host address of the Qdrant server. Default is "localhost". + @param host: The host address of the Qdrant server. Default is + "localhost". @type port: int - @param port: The port number of the Qdrant server. Default is 6333. + @param port: The port number of the Qdrant server. Default is + 6333. """ self.client = QdrantClient(host=host, port=port) def create_collection( - self, collection_name: str, properties: List[str], vector_size: int = 512 + self, + collection_name: str, + properties: List[str], + vector_size: int = 512, ) -> None: """Create a collection in Qdrant with specified properties. @@ -164,7 +173,8 @@ def create_collection( @type properties: List[str] @param properties: The list of properties for the collection. @type vector_size: int - @param vector_size: The size of the embedding vectors. Default is 512. + @param vector_size: The size of the embedding vectors. Default + is 512. """ self.collection_name = collection_name self.properties = properties @@ -174,7 +184,9 @@ def create_collection( except Exception: self.client.recreate_collection( collection_name=self.collection_name, - vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE), + vectors_config=VectorParams( + size=vector_size, distance=Distance.COSINE + ), ) print("Created new collection") @@ -193,23 +205,28 @@ def insert_embeddings( payloads: List[Dict[str, Any]], batch_size: int = 50, ) -> None: - """Batch insert embeddings with IDs and additional metadata into a collection. + """Batch insert embeddings with IDs and additional metadata into + a collection. @type ids: List[str] @param ids: The list of instance_ids for the embeddings. @type embeddings: List[List[float]] @param embeddings: The list of embedding vectors. @type payloads: List[Dict[str, Any]] - @param payloads: The list of additional metadata for the embeddings. + @param payloads: The list of additional metadata for the + embeddings. @type batch_size: int - @param batch_size: The batch size for inserting embeddings. Default is 50. + @param batch_size: The batch size for inserting embeddings. + Default is 50. """ total_len = len(embeddings) # check if payloads key values subset of the self.properties for payload in payloads: if not set(payload.keys()).issubset(set(self.properties)): - raise ValueError("Payload keys should be subset of the properties") + raise ValueError( + "Payload keys should be subset of the properties" + ) for i in range(0, total_len, batch_size): batch_ids = ids[i : i + batch_size] @@ -221,7 +238,9 @@ def insert_embeddings( ) # Upsert the batch of points to the Qdrant collection - self.client.upsert(collection_name=self.collection_name, points=batch) + self.client.upsert( + collection_name=self.collection_name, points=batch + ) def search_similar_embeddings( self, embedding: List[float], top_k: int = 5 @@ -231,10 +250,11 @@ def search_similar_embeddings( @type embedding: List[float] @param embedding: The query embedding vector. @type top_k: int - @param top_k: The number of similar embeddings to retrieve. Default is 5. + @param top_k: The number of similar embeddings to retrieve. + Default is 5. @rtype: Tuple[List[str], List[float]] - @return: The list of instance_ids of the similar embeddings and the list of - similarity scores. + @return: The list of instance_ids of the similar embeddings and + the list of similarity scores. """ search_results = self.client.search( collection_name=self.collection_name, @@ -250,22 +270,25 @@ def search_similar_embeddings( return ids, scores def get_similarity_scores( - self, reference_id: str, other_ids: List[str], sort_distances: bool = True + self, + reference_id: str, + other_ids: List[str], + sort_distances: bool = True, ) -> Tuple[List[str], List[float]]: - """Get a list of similarity scores between the reference embedding and other - embeddings. + """Get a list of similarity scores between the reference + embedding and other embeddings. @type reference_id: int @param reference_id: The instance_id of the reference embedding. @type other_ids: List[int] - @param other_ids: The list of instance_ids of other embeddings to compare with - the reference. + @param other_ids: The list of instance_ids of other embeddings + to compare with the reference. @type sort_distances: bool - @param sort_distances: Whether to sort the results by distance or keep the - original order. + @param sort_distances: Whether to sort the results by distance + or keep the original order. @rtype: Tuple[List[int], List[float] - @return: The list of instance_ids of the other embeddings and the list of - similarity scores. + @return: The list of instance_ids of the other embeddings and + the list of similarity scores. """ # Retrieve the embedding vector for the reference_id reference_embedding = self.get_embeddings_from_ids([reference_id])[0] @@ -300,12 +323,15 @@ def get_similarity_scores( return ids, scores def compute_similarity_matrix(self) -> List[List[float]]: - """Compute a full similarity matrix for all embeddings in a Qdrant collection. + """Compute a full similarity matrix for all embeddings in a + Qdrant collection. @rtype: Tuple[List[str], List[List[float]]] - @return: The list of instance_ids of the embeddings and the similarity matrix. - @note: This method is not recommended for large collections. It is better to use - the L{get_all_embeddings} method and compute the similarity matrix yourself. + @return: The list of instance_ids of the embeddings and the + similarity matrix. + @note: This method is not recommended for large collections. It + is better to use the L{get_all_embeddings} method and + compute the similarity matrix yourself. """ # Get all embeddings ids, embeddings = self.retrieve_all_embeddings() @@ -314,7 +340,10 @@ def compute_similarity_matrix(self) -> List[List[float]]: # Create a list of search requests search_queries = [ SearchRequest( - vector=emb, with_payload=False, with_vector=False, limit=len(embeddings) + vector=emb, + with_payload=False, + with_vector=False, + limit=len(embeddings), ) for emb in embeddings ] @@ -328,7 +357,9 @@ def compute_similarity_matrix(self) -> List[List[float]]: requests=search_queries[patch : patch + 100], ) batch_search_results.extend(batch_search_results_i) - print("Completed search for batch {}-{}".format(patch, patch + 100)) + print( + "Completed search for batch {}-{}".format(patch, patch + 100) + ) # Create a dictionary for O(1) lookup of ids id_to_index = {id: index for index, id in enumerate(ids)} @@ -346,11 +377,12 @@ def compute_similarity_matrix(self) -> List[List[float]]: return sim_matrix def retrieve_embeddings_by_ids(self, ids: List[str]) -> List[List[float]]: - """Retrieve embeddings associated with a list of IDs from a Qdrant collection. - The order of the embeddings IS preserved. + """Retrieve embeddings associated with a list of IDs from a + Qdrant collection. The order of the embeddings IS preserved. @type ids: List[str] - @param ids: The list of instance_ids of the embeddings to retrieve. + @param ids: The list of instance_ids of the embeddings to + retrieve. @rtype: List[List[float]] @return: The list of embedding vectors. """ @@ -394,11 +426,12 @@ def retrieve_all_ids(self) -> List[str]: return ids def retrieve_all_embeddings(self) -> Tuple[List[str], List[List[float]]]: - """Retrieve all embeddings and their IDs from a Qdrant collection. + """Retrieve all embeddings and their IDs from a Qdrant + collection. @rtype: Tuple[List[str], List[List[float]]] - @return: The list of instance_ids of the embeddings and the list of embedding - vectors. + @return: The list of instance_ids of the embeddings and the list + of embedding vectors. """ # Get the number of points in the collection collection_info = self.client.get_collection( @@ -423,13 +456,15 @@ def retrieve_all_embeddings(self) -> Tuple[List[str], List[List[float]]]: def retrieve_payloads_by_ids( self, ids: List[str], properties: Optional[List[str]] = None ) -> List[Dict[str, Any]]: - """Retrieve specified payload properties for a list of IDs from a collection. - The order of the labels IS preserved. + """Retrieve specified payload properties for a list of IDs from + a collection. The order of the labels IS preserved. @type ids: List[str] - @param ids: The list of instance_ids of the embeddings to retrieve. + @param ids: The list of instance_ids of the embeddings to + retrieve. @type properties: Optional[List[str]] - @param properties: The list of payload properties to retrieve. Default is None. + @param properties: The list of payload properties to retrieve. + Default is None. @rtype: List[Dict[str, Any]] @return: The list of payload dictionaries. """ diff --git a/luxonis_ml/embeddings/utils/vectordb.py b/luxonis_ml/embeddings/utils/vectordb.py index a6aaadcf..cbeddc7d 100644 --- a/luxonis_ml/embeddings/utils/vectordb.py +++ b/luxonis_ml/embeddings/utils/vectordb.py @@ -5,12 +5,14 @@ class VectorDBAPI(ABC): """Abstract class for Vector Database APIs. - This class defines a common interface for vector database operations for different - implementations like Qdrant and Weaviate. + This class defines a common interface for vector database operations + for different implementations like Qdrant and Weaviate. """ @abstractmethod - def create_collection(self, collection_name: str, properties: List[str]) -> None: + def create_collection( + self, collection_name: str, properties: List[str] + ) -> None: """Create a collection in the vector database.""" pass @@ -39,14 +41,19 @@ def search_similar_embeddings( @abstractmethod def get_similarity_scores( - self, reference_id: str, other_ids: List[str], sort_distances: bool = True + self, + reference_id: str, + other_ids: List[str], + sort_distances: bool = True, ) -> Tuple[List[str], List[float]]: - """Get similarity scores between a reference embedding and other embeddings.""" + """Get similarity scores between a reference embedding and other + embeddings.""" pass @abstractmethod def compute_similarity_matrix(self) -> List[List[float]]: - """Compute a similarity matrix for all the embeddings in the collection.""" + """Compute a similarity matrix for all the embeddings in the + collection.""" pass @abstractmethod diff --git a/luxonis_ml/embeddings/utils/weaviate.py b/luxonis_ml/embeddings/utils/weaviate.py index 70430a86..6b2f3e48 100644 --- a/luxonis_ml/embeddings/utils/weaviate.py +++ b/luxonis_ml/embeddings/utils/weaviate.py @@ -8,9 +8,9 @@ class WeaviateAPI(VectorDBAPI): - """Provides a Python interface for interacting with Weaviate, facilitating - operations such as creating collections, managing embeddings, and querying for - similar embeddings. + """Provides a Python interface for interacting with Weaviate, + facilitating operations such as creating collections, managing + embeddings, and querying for similar embeddings. It only supports cosine similarity for now. """ @@ -24,12 +24,14 @@ def __init__( """Initializes the Weaviate API client with connection details. @type url: str - @param url: URL of the Weaviate instance, defaults to U{localhost:8080}. + @param url: URL of the Weaviate instance, defaults to + U{localhost:8080}. @type grpc_url: str @param grpc_url: URL of the gRPC Weaviate instance, defaults to U{localhost:50051}. @type auth_api_key: str - @param auth_api_key: API key for authentication. Defaults to C{None}. + @param auth_api_key: API key for authentication. Defaults to + C{None}. """ if auth_api_key is not None: auth_api_key = weaviate.AuthApiKey(auth_api_key) @@ -56,7 +58,8 @@ def create_collection( @type collection_name: str @param collection_name: Name of the collection to create. @type properties: List[str] - @param properties: List of properties for the collection. Defaults to None. + @param properties: List of properties for the collection. + Defaults to None. """ self.collection_name = collection_name self.properties = properties @@ -68,7 +71,9 @@ def create_collection( for prop in self.properties: properties.append( wvc.Property( - name=prop, data_type=wvc.DataType.TEXT, skip_vectorization=True + name=prop, + data_type=wvc.DataType.TEXT, + skip_vectorization=True, ) ) @@ -99,7 +104,8 @@ def insert_embeddings( payloads: List[Dict[str, Any]], batch_size: int = 100, ) -> None: - """Inserts embeddings with associated payloads into a collection. + """Inserts embeddings with associated payloads into a + collection. @type uuids: List[str] @param uuids: List of UUIDs for the embeddings. @@ -113,7 +119,9 @@ def insert_embeddings( data = [] for i, embedding in enumerate(embeddings): data.append( - wvc.DataObject(properties=payloads[i], uuid=uuids[i], vector=embedding) + wvc.DataObject( + properties=payloads[i], uuid=uuids[i], vector=embedding + ) ) if len(data) == batch_size: @@ -152,18 +160,21 @@ def search_similar_embeddings( return uuids, scores def get_similarity_scores( - self, reference_id: str, other_ids: List[str], sort_distances: bool = False + self, + reference_id: str, + other_ids: List[str], + sort_distances: bool = False, ) -> Tuple[List[str], List[float]]: - """Calculates the similarity score between the reference embedding and the - specified embeddings. + """Calculates the similarity score between the reference + embedding and the specified embeddings. @type reference_id: str @param reference_id: UUID of the reference embedding. @type other_ids: List[str] @param other_ids: List of UUIDs of the embeddings to compare to. @type sort_distances: bool - @param sort_distances: Whether to sort the results by distance or keep order of - the UUIDs. Defaults to False. + @param sort_distances: Whether to sort the results by distance + or keep order of the UUIDs. Defaults to False. @rtype ids: List[str] @return ids: List of UUIDs of the embeddings. @rtype scores: List[float] @@ -186,30 +197,39 @@ def get_similarity_scores( ids = other_ids scores = [0] * len(other_ids) for result in response.objects: - scores[other_ids.index(str(result.uuid))] = 1 - result.metadata.distance + scores[other_ids.index(str(result.uuid))] = ( + 1 - result.metadata.distance + ) return ids, scores def compute_similarity_matrix(self) -> List[List[float]]: - """Calculates the similarity matrix for all the embeddings in the collection. - @note: This is a very inefficient implementation. For large numbers of - embeddings, calculate the similarity matrix by hand + """Calculates the similarity matrix for all the embeddings in + the collection. @note: This is a very inefficient + implementation. For large numbers of embeddings, calculate the + similarity matrix by hand (sklearn.metrics.pairwise.cosine_similarity). @rtype sim_matrix: List[List[float]] - @return sim_matrix: Similarity matrix for all the embeddings in the collection. + @return sim_matrix: Similarity matrix for all the embeddings in + the collection. """ uuids = self.retrieve_all_ids() sim_matrix = [] for uuid in uuids: - ids, scores = self.get_similarity_scores(uuid, uuids, sort_distances=False) + ids, scores = self.get_similarity_scores( + uuid, uuids, sort_distances=False + ) sim_matrix.append(scores) return sim_matrix - def retrieve_embeddings_by_ids(self, uuids: List[str]) -> List[List[float]]: - """Gets the embeddings for the specified UUIDs, up to a maximum of 10000. + def retrieve_embeddings_by_ids( + self, uuids: List[str] + ) -> List[List[float]]: + """Gets the embeddings for the specified UUIDs, up to a maximum + of 10000. @type uuids: List[str] @param uuids: List of UUIDs of the embeddings to get. @@ -230,7 +250,9 @@ def retrieve_embeddings_by_ids(self, uuids: List[str]) -> List[List[float]]: } # Retrieve embeddings in the order of the provided UUIDs embeddings = [ - uuid_embedding_map[uuid] for uuid in uuids if uuid in uuid_embedding_map + uuid_embedding_map[uuid] + for uuid in uuids + if uuid in uuid_embedding_map ] return embeddings @@ -238,7 +260,8 @@ def retrieve_embeddings_by_ids(self, uuids: List[str]) -> List[List[float]]: def retrieve_payloads_by_ids( self, uuids: List[str], properties: Optional[List[str]] = None ) -> List[Dict[str, Any]]: - """Gets the payloads for the specified UUIDs, up to a maximum of 10000. + """Gets the payloads for the specified UUIDs, up to a maximum of + 10000. @type uuids: List[str] @param uuids: List of UUIDs of the embeddings to get. @@ -261,7 +284,9 @@ def retrieve_payloads_by_ids( } # Retrieve payloads in the order of the provided UUIDs payloads = [ - uuid_payload_map[uuid] for uuid in uuids if uuid in uuid_payload_map + uuid_payload_map[uuid] + for uuid in uuids + if uuid in uuid_payload_map ] return payloads diff --git a/luxonis_ml/nn_archive/__init__.py b/luxonis_ml/nn_archive/__init__.py index cd62b9a8..94a06f2f 100644 --- a/luxonis_ml/nn_archive/__init__.py +++ b/luxonis_ml/nn_archive/__init__.py @@ -3,4 +3,10 @@ from .model import Model from .utils import infer_layout, is_nn_archive -__all__ = ["ArchiveGenerator", "Model", "Config", "is_nn_archive", "infer_layout"] +__all__ = [ + "ArchiveGenerator", + "Model", + "Config", + "is_nn_archive", + "infer_layout", +] diff --git a/luxonis_ml/nn_archive/__main__.py b/luxonis_ml/nn_archive/__main__.py index 88b9c0ec..1dc4ee1c 100644 --- a/luxonis_ml/nn_archive/__main__.py +++ b/luxonis_ml/nn_archive/__main__.py @@ -53,7 +53,9 @@ def inspect( inputs = metadata = outputs = heads = True if metadata: - print(Panel.fit(Pretty(archive_config.model.metadata), title="Metadata")) + print( + Panel.fit(Pretty(archive_config.model.metadata), title="Metadata") + ) if heads: print(Panel.fit(Pretty(archive_config.model.heads), title="Heads")) if inputs: @@ -67,13 +69,15 @@ def extract( path: PathArgument, destination: Annotated[ str, - typer.Option("-d", "--dest", help="Path where to extract the Archive."), + typer.Option( + "-d", "--dest", help="Path where to extract the Archive." + ), ] = ".", ): """Extracts NN Archive. - Extracts the NN Archive to the destination path. By default, the Archive is - extracted to the current working directory. + Extracts the NN Archive to the destination path. By default, the + Archive is extracted to the current working directory. """ extract_path = Path(destination) / (Path(path).name.split(".")[0]) diff --git a/luxonis_ml/nn_archive/archive_generator.py b/luxonis_ml/nn_archive/archive_generator.py index 7aec3fdd..32fe668a 100644 --- a/luxonis_ml/nn_archive/archive_generator.py +++ b/luxonis_ml/nn_archive/archive_generator.py @@ -8,8 +8,8 @@ class ArchiveGenerator: - """Generator of abstracted NN archive (.tar) files containing config and model files - (executables). + """Generator of abstracted NN archive (.tar) files containing config + and model files (executables). @type archive_name: str @ivar archive_name: Desired archive file name. @@ -20,8 +20,8 @@ class ArchiveGenerator: @type executables_paths: list @ivar executables_paths: Paths to relevant model executables. @type compression: str - @ivar compression: Type of archive file compression ("xz" for LZMA, "gz" for gzip, - or "bz2" for bzip2 compression). + @ivar compression: Type of archive file compression ("xz" for LZMA, + "gz" for gzip, or "bz2" for bzip2 compression). """ def __init__( @@ -62,7 +62,9 @@ def make_archive(self) -> str: with tarfile.open(archive_path, f"w:{self.compression}") as tar: # add executables for executable_path in self.executables_paths: - tar.add(executable_path, arcname=os.path.basename(executable_path)) + tar.add( + executable_path, arcname=os.path.basename(executable_path) + ) # add config JSON tarinfo = tarfile.TarInfo(name="config.json") tarinfo.size = len(json_data) diff --git a/luxonis_ml/nn_archive/config.py b/luxonis_ml/nn_archive/config.py index 390b22a7..7362dea6 100644 --- a/luxonis_ml/nn_archive/config.py +++ b/luxonis_ml/nn_archive/config.py @@ -10,13 +10,16 @@ class Config(BaseModelExtraForbid): - """The main class of the multi/single-stage model config scheme (multi- stage models - consists of interconnected single-stage models). + """The main class of the multi/single-stage model config scheme + (multi- stage models consists of interconnected single-stage + models). @type config_version: str - @ivar config_version: Static variable representing the version of the config scheme. + @ivar config_version: Static variable representing the version of + the config scheme. @type model: Model - @ivar model: A Model object representing the neural network used in the archive. + @ivar model: A Model object representing the neural network used in + the archive. """ config_version: CONFIG_VERSION = Field( diff --git a/luxonis_ml/nn_archive/config_building_blocks/base_models/head.py b/luxonis_ml/nn_archive/config_building_blocks/base_models/head.py index 55e406e8..a17df4b8 100644 --- a/luxonis_ml/nn_archive/config_building_blocks/base_models/head.py +++ b/luxonis_ml/nn_archive/config_building_blocks/base_models/head.py @@ -19,13 +19,15 @@ class Head(BaseModel, ABC): @type name: str | None @ivar name: Optional name of the head. @type parser: str - @ivar parser: Name of the parser responsible for processing the models output. + @ivar parser: Name of the parser responsible for processing the + models output. @type outputs: List[str] | None - @ivar outputs: Specify which outputs are fed into the parser. If None, all outputs - are fed. + @ivar outputs: Specify which outputs are fed into the parser. If + None, all outputs are fed. @type metadata: C{HeadMetadata} | C{HeadObjectDetectionMetadata} | - C{HeadClassificationMetadata} | C{HeadObjectDetectionSSDMetadata} | - C{HeadSegmentationMetadata} | C{HeadYOLOMetadata} + C{HeadClassificationMetadata} | + C{HeadObjectDetectionSSDMetadata} | C{HeadSegmentationMetadata} + | C{HeadYOLOMetadata} @ivar metadata: Metadata of the parser. """ diff --git a/luxonis_ml/nn_archive/config_building_blocks/base_models/head_metadata.py b/luxonis_ml/nn_archive/config_building_blocks/base_models/head_metadata.py index f06ac2b2..e511ef0f 100644 --- a/luxonis_ml/nn_archive/config_building_blocks/base_models/head_metadata.py +++ b/luxonis_ml/nn_archive/config_building_blocks/base_models/head_metadata.py @@ -6,7 +6,8 @@ class HeadMetadata(BaseModel): - """Metadata for the basic head. It allows you to specify additional fields. + """Metadata for the basic head. It allows you to specify additional + fields. @type postprocessor_path: str | None @ivar postprocessor_path: Path to the postprocessor. @@ -27,16 +28,18 @@ class HeadObjectDetectionMetadata(HeadMetadata): @type n_classes: int @ivar n_classes: Number of object classes detected by the model. @type iou_threshold: float - @ivar iou_threshold: Non-max supression threshold limiting boxes intersection. + @ivar iou_threshold: Non-max supression threshold limiting boxes + intersection. @type conf_threshold: float - @ivar conf_threshold: Confidence score threshold above which a detected object is - considered valid. + @ivar conf_threshold: Confidence score threshold above which a + detected object is considered valid. @type max_det: int @ivar max_det: Maximum detections per image. @type anchors: list - @ivar anchors: Predefined bounding boxes of different sizes and aspect ratios. The - innermost lists are length 2 tuples of box sizes. The middle lists are anchors - for each output. The outmost lists go from smallest to largest output. + @ivar anchors: Predefined bounding boxes of different sizes and + aspect ratios. The innermost lists are length 2 tuples of box + sizes. The middle lists are anchors for each output. The outmost + lists go from smallest to largest output. """ classes: List[str] = Field( @@ -62,11 +65,11 @@ class HeadObjectDetectionSSDMetadata(HeadObjectDetectionMetadata): """Metadata for the SSD object detection head. @type boxes_outputs: str - @ivar boxes_outputs: Output name corresponding to predicted bounding box - coordinates. + @ivar boxes_outputs: Output name corresponding to predicted bounding + box coordinates. @type scores_outputs: str - @ivar scores_outputs: Output name corresponding to predicted bounding box confidence - scores. + @ivar scores_outputs: Output name corresponding to predicted + bounding box confidence scores. """ boxes_outputs: str = Field( @@ -94,7 +97,9 @@ class HeadClassificationMetadata(HeadMetadata): n_classes: int = Field( description="Number of object classes recognized by the model." ) - is_softmax: bool = Field(description="True, if output is already softmaxed.") + is_softmax: bool = Field( + description="True, if output is already softmaxed." + ) class HeadSegmentationMetadata(HeadMetadata): @@ -114,15 +119,17 @@ class HeadSegmentationMetadata(HeadMetadata): n_classes: int = Field( description="Number of object classes recognized by the model." ) - is_softmax: bool = Field(description="True, if output is already softmaxed.") + is_softmax: bool = Field( + description="True, if output is already softmaxed." + ) class HeadYOLOMetadata(HeadObjectDetectionMetadata, HeadSegmentationMetadata): """Metadata for the YOLO head. @type yolo_outputs: list - @ivar yolo_outputs: A list of output names for each of the different YOLO grid - sizes. + @ivar yolo_outputs: A list of output names for each of the different + YOLO grid sizes. @type mask_outputs: list | None @ivar mask_outputs: A list of output names for each mask output. @type protos_outputs: str | None @@ -132,13 +139,17 @@ class HeadYOLOMetadata(HeadObjectDetectionMetadata, HeadSegmentationMetadata): @type angles_outputs: list | None @ivar angles_outputs: A list of output names for the angles. @type subtype: C{ObjectDetectionSubtypeYOLO} - @ivar subtype: YOLO family decoding subtype (e.g. yolov5, yolov6, yolov7 etc.) + @ivar subtype: YOLO family decoding subtype (e.g. yolov5, yolov6, + yolov7 etc.) @type n_prototypes: int | None - @ivar n_prototypes: Number of prototypes per bbox in YOLO instance segmnetation. + @ivar n_prototypes: Number of prototypes per bbox in YOLO instance + segmnetation. @type n_keypoints: int | None - @ivar n_keypoints: Number of keypoints per bbox in YOLO keypoint detection. + @ivar n_keypoints: Number of keypoints per bbox in YOLO keypoint + detection. @type is_softmax: bool | None - @ivar is_softmax: True, if output is already softmaxed in YOLO instance segmentation + @ivar is_softmax: True, if output is already softmaxed in YOLO + instance segmentation """ yolo_outputs: List[str] = Field( @@ -167,10 +178,12 @@ class HeadYOLOMetadata(HeadObjectDetectionMetadata, HeadSegmentationMetadata): description="YOLO family decoding subtype (e.g. yolov5, yolov6, yolov7 etc.)." ) n_prototypes: Optional[int] = Field( - None, description="Number of prototypes per bbox in YOLO instance segmnetation." + None, + description="Number of prototypes per bbox in YOLO instance segmnetation.", ) n_keypoints: Optional[int] = Field( - None, description="Number of keypoints per bbox in YOLO keypoint detection." + None, + description="Number of keypoints per bbox in YOLO keypoint detection.", ) is_softmax: Optional[bool] = Field( None, @@ -276,7 +289,11 @@ def validate_task_specific_fields( defined_params = defined_params.difference(common_fields) supported_output_params = { - "instance_segmentation": ["yolo_outputs", "mask_outputs", "protos_outputs"], + "instance_segmentation": [ + "yolo_outputs", + "mask_outputs", + "protos_outputs", + ], "keypoint_detection": ["yolo_outputs", "keypoints_outputs"], "object_detection": ["yolo_outputs"], } @@ -295,7 +312,9 @@ def validate_task_specific_fields( for param in defined_params: if param == "angles_outputs" and "object_detection" in tasks: continue - if not any(param in supported_output_params[task] for task in tasks): + if not any( + param in supported_output_params[task] for task in tasks + ): raise ValueError( f"Invalid combination of output parameters. Field {param} is not supported for the tasks {tasks}." ) @@ -313,5 +332,7 @@ def validate_anchors(cls, values): or values["subtype"] == ObjectDetectionSubtypeYOLO.YOLOv8 ) ): - raise ValueError("YOLOv6, YOLOv6r2, and YOLOv8 do not support anchors.") + raise ValueError( + "YOLOv6, YOLOv6r2, and YOLOv8 do not support anchors." + ) return values diff --git a/luxonis_ml/nn_archive/config_building_blocks/base_models/input.py b/luxonis_ml/nn_archive/config_building_blocks/base_models/input.py index 1e5ec5c5..e0969967 100644 --- a/luxonis_ml/nn_archive/config_building_blocks/base_models/input.py +++ b/luxonis_ml/nn_archive/config_building_blocks/base_models/input.py @@ -14,18 +14,22 @@ class PreprocessingBlock(BaseModelExtraForbid): """Represents preprocessing operations applied to the input data. @type mean: list - @ivar mean: Mean values in channel order. Typically, this is BGR order. + @ivar mean: Mean values in channel order. Typically, this is BGR + order. @type scale: list - @ivar scale: Standardization values in channel order. Typically, this is BGR order. + @ivar scale: Standardization values in channel order. Typically, + this is BGR order. @type reverse_channels: bool | None - @ivar reverse_channels: If True, color channels are reversed (e.g. BGR to RGB or - vice versa). + @ivar reverse_channels: If True, color channels are reversed (e.g. + BGR to RGB or vice versa). @type interleaved_to_planar: bool | None - @ivar interleaved_to_planar: If True, format is changed from interleaved to planar. + @ivar interleaved_to_planar: If True, format is changed from + interleaved to planar. """ mean: Optional[List[float]] = Field( - None, description="Mean values in channel order. Typically, this is BGR order." + None, + description="Mean values in channel order. Typically, this is BGR order.", ) scale: Optional[List[float]] = Field( None, @@ -36,7 +40,8 @@ class PreprocessingBlock(BaseModelExtraForbid): description="If True, color channels are reversed (e.g. BGR to RGB or vice versa).", ) interleaved_to_planar: Optional[bool] = Field( - None, description="If True, format is changed from interleaved to planar." + None, + description="If True, format is changed from interleaved to planar.", ) @@ -66,7 +71,9 @@ class Input(BaseModelExtraForbid): dtype: DataType = Field( description="Data type of the input data (e.g., 'float32')." ) - input_type: InputType = Field(description="Type of input data (e.g., 'image').") + input_type: InputType = Field( + description="Type of input data (e.g., 'image')." + ) shape: List[int] = Field( min_length=1, description="Shape of the input data as a list of integers (e.g. [H,W], [H,W,C], [N,H,W,C], ...).", diff --git a/luxonis_ml/nn_archive/config_building_blocks/enums/data_type.py b/luxonis_ml/nn_archive/config_building_blocks/enums/data_type.py index 29ce7df3..f81c271e 100644 --- a/luxonis_ml/nn_archive/config_building_blocks/enums/data_type.py +++ b/luxonis_ml/nn_archive/config_building_blocks/enums/data_type.py @@ -2,7 +2,8 @@ class DataType(Enum): - """Represents all existing data types used in i/o streams of the model.""" + """Represents all existing data types used in i/o streams of the + model.""" INT8 = "int8" INT32 = "int32" diff --git a/luxonis_ml/nn_archive/model.py b/luxonis_ml/nn_archive/model.py index 76fabed4..d641c8f5 100644 --- a/luxonis_ml/nn_archive/model.py +++ b/luxonis_ml/nn_archive/model.py @@ -17,8 +17,8 @@ class Model(BaseModelExtraForbid): @type outputs: list @ivar outputs: List of Output objects defining the model outputs. @type heads: list - @ivar heads: List of Head objects defining the model heads. If not defined, we - assume a raw output. + @ivar heads: List of Head objects defining the model heads. If not + defined, we assume a raw output. """ metadata: Metadata = Field( diff --git a/luxonis_ml/nn_archive/utils.py b/luxonis_ml/nn_archive/utils.py index 199e8250..e21c366f 100644 --- a/luxonis_ml/nn_archive/utils.py +++ b/luxonis_ml/nn_archive/utils.py @@ -11,7 +11,8 @@ def is_nn_archive(path: PathType) -> bool: @type path: PathType @param path: Path to the file to check. @rtype: bool - @return: True if the file is a valid NN archive file, False otherwise. + @return: True if the file is a valid NN archive file, False + otherwise. """ path = Path(path) diff --git a/luxonis_ml/tracker/tracker.py b/luxonis_ml/tracker/tracker.py index d3630589..eaad8031 100644 --- a/luxonis_ml/tracker/tracker.py +++ b/luxonis_ml/tracker/tracker.py @@ -30,8 +30,9 @@ def __init__( mlflow_tracking_uri: Optional[str] = None, rank: int = 0, ): - """Implementation of PytorchLightning Logger that wraps various logging - software. Supported loggers: TensorBoard, WandB and MLFlow. + """Implementation of PytorchLightning Logger that wraps various + logging software. Supported loggers: TensorBoard, WandB and + MLFlow. @type project_name: Optional[str] @param project_name: Name of the project used for WandB and MLFlow. @@ -90,11 +91,15 @@ def __init__( self.is_sweep = is_sweep self.rank = rank - self.run_id = run_id # if using MLFlow then it will continue previous run + self.run_id = ( + run_id # if using MLFlow then it will continue previous run + ) if is_wandb or is_mlflow: if self.project_name is None and self.project_id is None: - raise Exception("Either project_name or project_id must be specified!") + raise Exception( + "Either project_name or project_id must be specified!" + ) if self.is_wandb and wandb_entity is None: raise Exception("Must specify wandb_entity when using wandb!") @@ -102,7 +107,9 @@ def __init__( self.wandb_entity = wandb_entity if self.is_mlflow: if mlflow_tracking_uri is None: - raise Exception("Must specify mlflow_tracking_uri when using mlflow!") + raise Exception( + "Must specify mlflow_tracking_uri when using mlflow!" + ) else: self.mlflow_tracking_uri = mlflow_tracking_uri @@ -126,7 +133,8 @@ def __init__( @staticmethod def rank_zero_only(fn: Callable) -> Callable: - """Function wrapper that lets only processes with rank=0 execute it.""" + """Function wrapper that lets only processes with rank=0 execute + it.""" @wraps(fn) def wrapped_fn(self, *args: Any, **kwargs: Any) -> Optional[Any]: @@ -154,8 +162,11 @@ def version(self) -> int: @property @rank_zero_only - def experiment(self) -> Dict[Literal["tensorboard", "wandb", "mlflow"], Any]: - """Creates new experiments or returns active ones if already created.""" + def experiment( + self, + ) -> Dict[Literal["tensorboard", "wandb", "mlflow"], Any]: + """Creates new experiments or returns active ones if already + created.""" if self._experiment is not None: return self._experiment @@ -202,21 +213,28 @@ def experiment(self) -> Dict[Literal["tensorboard", "wandb", "mlflow"], Any]: self._experiment["mlflow"] = mlflow - self.artifacts_dir = f"{self.save_directory}/{self.run_name}/artifacts" + self.artifacts_dir = ( + f"{self.save_directory}/{self.run_name}/artifacts" + ) Path(self.artifacts_dir).mkdir(parents=True, exist_ok=True) - self._experiment["mlflow"].set_tracking_uri(self.mlflow_tracking_uri) + self._experiment["mlflow"].set_tracking_uri( + self.mlflow_tracking_uri + ) if self.project_id is not None: self.project_name = None experiment = self._experiment["mlflow"].set_experiment( - experiment_name=self.project_name, experiment_id=self.project_id + experiment_name=self.project_name, + experiment_id=self.project_id, ) self.project_id = experiment.experiment_id # if self.run_id == None then create new run, else use alredy created one run = self._experiment["mlflow"].start_run( - run_id=self.run_id, run_name=self.run_name, nested=self.is_sweep + run_id=self.run_id, + run_name=self.run_name, + nested=self.is_sweep, ) self.run_id = run.info.run_id @@ -247,8 +265,8 @@ def log_hyperparams( def log_metric(self, name: str, value: float, step: int) -> None: """Logs metric value with name and step. - @note: step is ommited when logging with wandb to avoid problems with - inconsistent incrementation. + @note: step is ommited when logging with wandb to avoid problems + with inconsistent incrementation. @type name: str @param name: Metric name @type value: float @@ -285,8 +303,9 @@ def log_metrics(self, metrics: Dict[str, float], step: int) -> None: @rank_zero_only def log_image(self, name: str, img: np.ndarray, step: int) -> None: - """Logs image with name and step. Note: step is omitted when logging with wandb - is used to avoid problems with inconsistent incrementation. + """Logs image with name and step. Note: step is omitted when + logging with wandb is used to avoid problems with inconsistent + incrementation. @type name: str @param name: Caption of the image @@ -296,7 +315,9 @@ def log_image(self, name: str, img: np.ndarray, step: int) -> None: @param step: Current step """ if self.is_tensorboard: - self.experiment["tensorboard"].add_image(name, img, step, dataformats="HWC") + self.experiment["tensorboard"].add_image( + name, img, step, dataformats="HWC" + ) if self.is_wandb: wandb_image = self.experiment["wandb"].Image(img, caption=name) @@ -319,9 +340,11 @@ def upload_artifact( @type path: PathType @param path: Path to the artifact @type name: Optional[str] - @param name: Name of the artifact, if None then use the name of the file + @param name: Name of the artifact, if None then use the name of + the file @type typ: str - @param typ: Type of the artifact, defaults to "artifact". Only used for WandB. + @param typ: Type of the artifact, defaults to "artifact". Only + used for WandB. """ path = Path(path) if self.is_wandb: @@ -348,8 +371,8 @@ def log_images(self, imgs: Dict[str, np.ndarray], step: int) -> None: """Logs multiple images. @type imgs: Dict[str, np.ndarray] - @param imgs: Dict of image key-value pairs where key is image caption and value - is image data + @param imgs: Dict of image key-value pairs where key is image + caption and value is image data @type step: int @param step: Current step """ @@ -360,7 +383,9 @@ def _get_next_run_number(self) -> int: """Returns number id for next run.""" log_dirs = glob.glob(f"{self.save_directory}/*") - log_dirs = [path.split("/")[-1] for path in log_dirs if os.path.isdir(path)] + log_dirs = [ + path.split("/")[-1] for path in log_dirs if os.path.isdir(path) + ] nums = [path.split("-")[0] for path in log_dirs] nums = [int(num) for num in nums if num.isnumeric()] @@ -386,7 +411,9 @@ def _get_latest_run_name(self) -> str: if ld.split("-")[0].isnumeric(): runs.append(ld) runs.sort( - key=lambda x: os.path.getmtime(os.path.join(self.save_directory, x)), + key=lambda x: os.path.getmtime( + os.path.join(self.save_directory, x) + ), reverse=True, ) return runs[0] diff --git a/luxonis_ml/utils/__main__.py b/luxonis_ml/utils/__main__.py index 8fa665ce..5eca5c71 100644 --- a/luxonis_ml/utils/__main__.py +++ b/luxonis_ml/utils/__main__.py @@ -50,7 +50,8 @@ def delete(url: UrlArgument): def ls( url: UrlArgument, recursive: Annotated[ - bool, typer.Option(..., "--recursive", "-r", help="List files recursively.") + bool, + typer.Option(..., "--recursive", "-r", help="List files recursively."), ] = False, typ: Annotated[ TypeEnum, diff --git a/luxonis_ml/utils/config.py b/luxonis_ml/utils/config.py index 50edf722..e449f850 100644 --- a/luxonis_ml/utils/config.py +++ b/luxonis_ml/utils/config.py @@ -16,7 +16,9 @@ class LuxonisConfig(BaseModelExtraForbid): def get_config( cls: Type[T], cfg: Optional[Union[str, Dict[str, Any]]] = None, - overrides: Optional[Union[Dict[str, Any], List[str], Tuple[str, ...]]] = None, + overrides: Optional[ + Union[Dict[str, Any], List[str], Tuple[str, ...]] + ] = None, ) -> T: """Loads config from a yaml file or a dictionary. @@ -30,7 +32,9 @@ def get_config( @raise ValueError: If neither C{cfg} nor C{overrides} are provided. """ if cfg is None and overrides is None: - raise ValueError("At least one of `cfg` or `overrides` must be set.") + raise ValueError( + "At least one of `cfg` or `overrides` must be set." + ) if isinstance(overrides, (list, tuple)): if len(overrides) % 2 != 0: @@ -83,9 +87,11 @@ def get(self, key_merged: str, default: Any = None) -> Any: If the key doesn't exist, the default value is returned. @type key_merged: str - @param key_merged: Key in a form of a string with levels separated by dots. + @param key_merged: Key in a form of a string with levels + separated by dots. @type default: Any - @param default: Default value to return if the key doesn't exist. + @param default: Default value to return if the key doesn't + exist. @rtype: Any @return: Value of the key or default value. """ @@ -93,7 +99,9 @@ def get(self, key_merged: str, default: Any = None) -> Any: for key in key_merged.split("."): if isinstance(value, list): if not key.isdecimal(): - raise ValueError(f"Can't access list with non-int key `{key}`.") + raise ValueError( + f"Can't access list with non-int key `{key}`." + ) index = int(key) if index >= len(value): return default @@ -109,11 +117,13 @@ def get(self, key_merged: str, default: Any = None) -> Any: return value @staticmethod - def _merge_overrides(data: Dict[str, Any], overrides: Dict[str, Any]) -> None: + def _merge_overrides( + data: Dict[str, Any], overrides: Dict[str, Any] + ) -> None: """Merges the config dictionary with the CLI overrides. - The overrides are a dictionary mapping "dotted" keys to either final or unparsed - values. + The overrides are a dictionary mapping "dotted" keys to either + final or unparsed values. @type data: dict @param data: Dictionary with config data. @@ -132,20 +142,26 @@ def _parse_value(value: Any) -> Any: # keep as string and hope for the best return value - def _merge_recursive(data: Union[Dict, List], dot_name: str, value: Any): + def _merge_recursive( + data: Union[Dict, List], dot_name: str, value: Any + ): key, *tail = dot_name.split(".") if not tail: parsed_value = _parse_value(value) if key.isdecimal(): index = int(key) if not isinstance(data, list): - raise ValueError("int keys are not allowed for non-list values") + raise ValueError( + "int keys are not allowed for non-list values" + ) if index >= len(data): data.append(parsed_value) else: data[index] = parsed_value elif isinstance(data, list): - raise ValueError("Only int keys are allowed for list values") + raise ValueError( + "Only int keys are allowed for list values" + ) else: data[key] = parsed_value @@ -156,7 +172,9 @@ def _merge_recursive(data: Union[Dict, List], dot_name: str, value: Any): if key.isdecimal(): index = int(key) if not isinstance(data, list): - raise ValueError("int keys are not allowed for non-list values") + raise ValueError( + "int keys are not allowed for non-list values" + ) if index >= len(data): index = len(data) if data: diff --git a/luxonis_ml/utils/environ.py b/luxonis_ml/utils/environ.py index 7f2bcb39..342347b0 100644 --- a/luxonis_ml/utils/environ.py +++ b/luxonis_ml/utils/environ.py @@ -36,7 +36,9 @@ class Environ(BaseSettings): GOOGLE_APPLICATION_CREDENTIALS: Optional[str] = None - LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO" + LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = ( + "INFO" + ) @model_serializer(when_used="always", mode="plain", return_type=str) def _serialize_environ(self) -> str: diff --git a/luxonis_ml/utils/filesystem.py b/luxonis_ml/utils/filesystem.py index 5a2a3f8f..e9a2e642 100644 --- a/luxonis_ml/utils/filesystem.py +++ b/luxonis_ml/utils/filesystem.py @@ -41,8 +41,7 @@ def __call__( local_path: PathType, remote_path: PathType, mlflow_instance: Optional[ModuleType] = None, - ) -> str: - ... + ) -> str: ... PUT_FILE_REGISTRY: Registry[PutFile] = Registry(name="put_file") @@ -65,24 +64,25 @@ def __init__( ): """Abstraction over remote and local sources. - Helper class which abstracts uploading and downloading files from remote and - local sources. Supports S3, MLflow, GCS, and local file systems. + Helper class which abstracts uploading and downloading files + from remote and local sources. Supports S3, MLflow, GCS, and + local file systems. @type path: str - @param path: Input path consisting of protocol and actual path or just path for - local files + @param path: Input path consisting of protocol and actual path + or just path for local files @type allow_active_mlflow_run: Optional[bool] - @param allow_active_mlflow_run: Flag if operations are allowed on active MLFlow - run. Defaults to False. + @param allow_active_mlflow_run: Flag if operations are allowed + on active MLFlow run. Defaults to False. @type allow_local: Optional[bool] - @param allow_local: Flag if operations are allowed on local file system. - Defaults to True. + @param allow_local: Flag if operations are allowed on local file + system. Defaults to True. @type cache_storage: Optional[str] - @param cache_storage: Path to cache storage. No cache is used if set to None. - Defaults to None. + @param cache_storage: Path to cache storage. No cache is used if + set to None. Defaults to None. @type put_file_plugin: Optional[str] - @param put_file_plugin: The name of a registered function under the - PUT_FILE_REGISTRY to override C{self.put_file}. + @param put_file_plugin: The name of a registered function under + the PUT_FILE_REGISTRY to override C{self.put_file}. """ self.cache_storage = cache_storage @@ -187,7 +187,9 @@ def init_fsspec_filesystem(self) -> fsspec.AbstractFileSystem: logger.warning("Ignoring cache storage for local filesystem.") return fs - return fsspec.filesystem("filecache", fs=fs, cache_storage=self.cache_storage) + return fsspec.filesystem( + "filecache", fs=fs, cache_storage=self.cache_storage + ) def put_file( self, @@ -202,8 +204,8 @@ def put_file( @type remote_path: PathType @param remote_path: Relative path to remote file @type mlflow_instance: Optional[L{ModuleType}] - @param mlflow_instance: MLFlow instance if uploading to active run. Defaults to - C{None}. + @param mlflow_instance: MLFlow instance if uploading to active + run. Defaults to C{None}. @rtype: str @return: The full remote path of the uploded file. """ @@ -235,18 +237,20 @@ def put_dir( """Uploads files to remote storage. @type local_paths: Union[PathType, Sequence[PathType]] - @param local_paths: Either a string specifying a directory to walk the files or - a list of files which can be in different directories. + @param local_paths: Either a string specifying a directory to + walk the files or a list of files which can be in different + directories. @type remote_dir: PathType @param remote_dir: Relative path to remote directory @type uuid_dict: Optional[Dict[str, str]] - @param uuid_dict: Stores paths as keys and corresponding UUIDs as values to - replace the file basename. + @param uuid_dict: Stores paths as keys and corresponding UUIDs + as values to replace the file basename. @type mlflow_instance: Optional[L{ModuleType}] - @param mlflow_instance: MLFlow instance if uploading to active run. Defaults to - None. + @param mlflow_instance: MLFlow instance if uploading to active + run. Defaults to None. @rtype: Optional[Dict[str, str]] - @return: When local_paths is a list, this maps local_paths to remote_paths + @return: When local_paths is a list, this maps local_paths to + remote_paths """ if self.is_mlflow: raise NotImplementedError @@ -289,8 +293,8 @@ def put_bytes( @type remote_path: PathType @param remote_path: Relative path to remote file @type mlflow_instance: Optional[L{ModuleType}] - @param mlflow_instance: MLFlow instance if uploading to active run. Defaults to - None. + @param mlflow_instance: MLFlow instance if uploading to active + run. Defaults to None. """ if self.is_mlflow: raise NotImplementedError @@ -312,8 +316,8 @@ def get_file( @type local_path: PathType @param local_path: Path to local file @type mlflow_instance: Optional[L{ModuleType}] - @param mlflow_instance: MLFlow instance if uploading to active run. Defaults to - C{None}. + @param mlflow_instance: MLFlow instance if uploading to active + run. Defaults to C{None}. @rtype: Path @return: Path to the downloaded file. """ @@ -363,13 +367,14 @@ def get_dir( """Copies many files from remote storage to local storage. @type remote_paths: Union[PathType, Sequence[PathType]] - @param remote_paths: Either a string specifying a directory to walk the files or - a list of files which can be in different directories. + @param remote_paths: Either a string specifying a directory to + walk the files or a list of files which can be in different + directories. @type local_dir: PathType @param local_dir: Path to local directory @type mlflow_instance: Optional[L{ModuleType}] - @param mlflow_instance: MLFlow instance if uploading to active run. Defaults to - C{None}. + @param mlflow_instance: MLFlow instance if uploading to active + run. Defaults to C{None}. @rtype: Path @return: Path to the downloaded directory. """ @@ -387,7 +392,9 @@ def get_dir( elif isinstance(remote_paths, list): with ThreadPoolExecutor() as executor: for remote_path in remote_paths: - local_path = str(local_dir / Path(Path(remote_path).name)) + local_path = str( + local_dir / Path(Path(remote_path).name) + ) executor.submit(self.get_file, remote_path, local_path) return Path(local_dir) @@ -400,7 +407,8 @@ def delete_dir( @type remote_dir: PathType @param remote_dir: Relative path to remote directory. @type allow_delete_parent: bool - @param allow_delete_parent: If True, allows deletion of the parent directory. + @param allow_delete_parent: If True, allows deletion of the + parent directory. """ if not remote_dir and not allow_delete_parent: raise ValueError( @@ -428,9 +436,11 @@ def walk_dir( @type remote_dir: PathType @param remote_dir: Relative path to remote directory @type recursive: bool - @param recursive: If True, walks through the directory recursively. + @param recursive: If True, walks through the directory + recursively. @type typ: Literal["file", "directory", "all"] - @param typ: Specifies the type of files to walk through. Defaults to "file". + @param typ: Specifies the type of files to walk through. + Defaults to "file". @rtype: Iterator[str] @return: Iterator over the paths. """ @@ -440,13 +450,17 @@ def walk_dir( elif self.is_fsspec: full_path = str(self.path / remote_dir) for file in self.fs.ls(full_path, detail=True): - name = str(PurePosixPath(str(file["name"])).relative_to(self.path)) + name = str( + PurePosixPath(str(file["name"])).relative_to(self.path) + ) if typ == "all" or file["type"] == typ: yield name if recursive and file["type"] == "directory": yield from self.walk_dir(name, recursive, typ) - def read_to_byte_buffer(self, remote_path: Optional[PathType] = None) -> BytesIO: + def read_to_byte_buffer( + self, remote_path: Optional[PathType] = None + ) -> BytesIO: """Reads a file into a byte buffer. @type remote_path: Optional[PathType] @@ -467,7 +481,9 @@ def read_to_byte_buffer(self, remote_path: Optional[PathType] = None) -> BytesIO client = mlflow.MlflowClient(tracking_uri=self.tracking_uri) if self.run_id is None: - raise RuntimeError("`run_id` cannot be `None` when using `mlflow`") + raise RuntimeError( + "`run_id` cannot be `None` when using `mlflow`" + ) download_path = client.download_artifacts( run_id=self.run_id, path=self.artifact_path, dst_path="." ) @@ -486,12 +502,14 @@ def read_to_byte_buffer(self, remote_path: Optional[PathType] = None) -> BytesIO return buffer def get_file_uuid(self, path: PathType, local: bool = False) -> str: - """Reads a file and returns the (unique) UUID generated from file bytes. + """Reads a file and returns the (unique) UUID generated from + file bytes. @type path: PathType @param path: Relative path to remote file. @type local: bool - @param local: Specifies a local path as opposed to a remote path. + @param local: Specifies a local path as opposed to a remote + path. @rtype: str @return: The generated UUID. """ @@ -506,7 +524,9 @@ def get_file_uuid(self, path: PathType, local: bool = False) -> str: with self.fs.open(download_path, "rb") as f: file_contents = cast(bytes, f.read()) - file_hash_uuid = str(uuid.uuid5(uuid.NAMESPACE_URL, file_contents.hex())) + file_hash_uuid = str( + uuid.uuid5(uuid.NAMESPACE_URL, file_contents.hex()) + ) return file_hash_uuid @@ -516,7 +536,8 @@ def get_file_uuids( """Computes the UUIDs for all files stored in the filesystem. @type paths: List[PathType] - @param paths: A list of relative remote paths if remote else local paths. + @param paths: A list of relative remote paths if remote else + local paths. @type local: bool @param local: Specifies local paths as opposed to remote paths. @rtype: Dict[str, str] @@ -564,7 +585,8 @@ def exists(self, remote_path: PathType = "") -> bool: """Checks whether the given remote path exists. @type remote_path: PathType - @param remote_path: Relative path to remote file. Defaults to "" (root). + @param remote_path: Relative path to remote file. Defaults to "" + (root). @rtype: bool @return: True if the path exists. """ @@ -598,14 +620,14 @@ def get_protocol(path: str) -> str: def download(url: str, dest: Optional[PathType]) -> Path: """Downloads file or directory from remote storage. - Intended for downloading a single remote object, elevating the need to create an - instance of L{LuxonisFileSystem}. + Intended for downloading a single remote object, elevating the + need to create an instance of L{LuxonisFileSystem}. @type url: str @param url: URL to the file or directory @type dest: Optional[PathType] - @param dest: Destination directory. If unspecified, the current directory is - used. + @param dest: Destination directory. If unspecified, the current + directory is used. @rtype: Path @return: Path to the downloaded file or directory. """ @@ -632,8 +654,8 @@ def download(url: str, dest: Optional[PathType]) -> Path: def upload(local_path: PathType, url: str) -> None: """Uploads file or directory to remote storage. - Intended for uploading a single local object, elevating the need to create an - instance of L{LuxonisFileSystem}. + Intended for uploading a single local object, elevating the need + to create an instance of L{LuxonisFileSystem}. @type local_path: PathType @param local_path: Path to the local file or directory diff --git a/luxonis_ml/utils/logging.py b/luxonis_ml/utils/logging.py index 49e4c6c5..566afaa7 100644 --- a/luxonis_ml/utils/logging.py +++ b/luxonis_ml/utils/logging.py @@ -24,23 +24,26 @@ def setup_logging( ) -> None: """Globally configures logging. - Configures a standar Luxonis logger. Optionally utilizes rich library, configures - handling of warnings (from warnings module) and saves the logs to a file. + Configures a standar Luxonis logger. Optionally utilizes rich + library, configures handling of warnings (from warnings module) and + saves the logs to a file. @type file: str or None - @param file: Path to a file where logs will be saved. If None, logs will not be - saved. Defaults to None. + @param file: Path to a file where logs will be saved. If None, logs + will not be saved. Defaults to None. @type use_rich: bool - @param use_rich: If True, rich library will be used for logging. Defaults to False. + @param use_rich: If True, rich library will be used for logging. + Defaults to False. @type level: str or None - @param level: Logging level. One of "DEBUG", "INFO", "WARNING", "ERROR", and - "CRITICAL". Defaults to "INFO". The log level can be changed using "LOG_LEVEL" - environment variable. + @param level: Logging level. One of "DEBUG", "INFO", "WARNING", + "ERROR", and "CRITICAL". Defaults to "INFO". The log level can + be changed using "LOG_LEVEL" environment variable. @type configure_warnings: bool - @param configure_warnings: If True, warnings will be logged. Defaults to True. + @param configure_warnings: If True, warnings will be logged. + Defaults to True. @type rich_print: bool - @param rich_print: If True, builtins.print will be replaced with rich.print. - Defaults to False. + @param rich_print: If True, builtins.print will be replaced with + rich.print. Defaults to False. @param kwargs: Additional arguments passed to RichHandler. """ @@ -90,10 +93,14 @@ def setup_logging( if file is not None: file_handler = logging.FileHandler(file) - file_handler.setFormatter(logging.Formatter(file_format, datefmt=datefmt)) + file_handler.setFormatter( + logging.Formatter(file_format, datefmt=datefmt) + ) handlers.append(file_handler) - logging.basicConfig(level=level, format=format, datefmt=datefmt, handlers=handlers) + logging.basicConfig( + level=level, format=format, datefmt=datefmt, handlers=handlers + ) def _custom_warning_handler( message: str, @@ -175,11 +182,15 @@ def wrapper(*f_args, **f_kwargs): for arg_name in pos_arg_names: if arg_name in args: - _warn_deprecated(arg_name, fname, suggest, additional_message) + _warn_deprecated( + arg_name, fname, suggest, additional_message + ) for arg_name in f_kwargs: if arg_name in args: - _warn_deprecated(arg_name, fname, suggest, additional_message) + _warn_deprecated( + arg_name, fname, suggest, additional_message + ) return func(*f_args, **f_kwargs) diff --git a/luxonis_ml/utils/registry.py b/luxonis_ml/utils/registry.py index 6a545515..5bf976c3 100644 --- a/luxonis_ml/utils/registry.py +++ b/luxonis_ml/utils/registry.py @@ -35,8 +35,8 @@ def get(self, key: str) -> T: """Retrieves the registry record for the key. @type key: str - @param key: Name of the registered item, I{e.g.} the class name in string - format. + @param key: Name of the registered item, I{e.g.} the class name + in string format. @rtype: type @return: Corresponding class if L{key} exists @raise KeyError: If L{key} is not in the registry @@ -99,7 +99,10 @@ def _register(module: type) -> type: return _register def _register_module( - self, module: type, module_name: Optional[str] = None, force: bool = False + self, + module: type, + module_name: Optional[str] = None, + force: bool = False, ) -> None: """Registers a module by creating a (key, value) pair.""" if module_name is None: @@ -177,5 +180,7 @@ def __new__( ) if register: registry = registry if registry is not None else new_class.REGISTRY - registry.register_module(name=register_name or name, module=new_class) + registry.register_module( + name=register_name or name, module=new_class + ) return new_class diff --git a/pyproject.toml b/pyproject.toml index c7cff1f0..ee1a3499 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,7 @@ file = [ [tool.ruff] target-version = "py38" -line-length = 88 +line-length = 79 indent-width = 4 [tool.ruff.lint] @@ -75,15 +75,20 @@ select = ["E4", "E7", "E9", "F", "W", "B", "I"] [tool.docformatter] black = true style = "epytext" - -[tool.mypy] -python_version = "3.8" -ignore_missing_imports = true +wrap-summaries = 72 +wrap-descriptions = 72 [tool.pyright] typeCheckingMode = "basic" +reportMissingTypeStubs = "none" +reportPrivateImportUsage = "none" +reportPrivateUsage = "none" +reportIncompatibleVariableOverride = "none" +reportIncompatibleMethodOverride = "none" +reportUnnecessaryIsInstance = "none" [tool.pytest.ini_options] +testpaths = ["tests"] addopts = "--disable-warnings" [tool.coverage.run] @@ -97,9 +102,17 @@ omit = [ [tool.coverage.report] exclude_also = [ "def __repr__", + "def __rich_repr__", + "def __str__", + "assert", "raise AssertionError", "raise NotImplementedError", - "@(abc\\.)?abstractmethod", - "@(abc\\.)?abstractproperty", - "sys\\.exit", + "except ImportError", + "@abstractmethod", + "@overload", + "(sys\\.)?exit\\(", + "exit\\(", + "cv2\\.imshow", + "cv2\\.waitKey", + "logger\\.", ] diff --git a/tests/test_augmentations/test_letterbox.py b/tests/test_augmentations/test_letterbox.py index dbee8081..85ab460a 100644 --- a/tests/test_augmentations/test_letterbox.py +++ b/tests/test_augmentations/test_letterbox.py @@ -2,7 +2,9 @@ import numpy as np -from luxonis_ml.data.augmentations.custom.letterbox_resize import LetterboxResize +from luxonis_ml.data.augmentations.custom.letterbox_resize import ( + LetterboxResize, +) WIDTH: Final[int] = 640 HEIGHT: Final[int] = 480 diff --git a/tests/test_augmentations/test_mosaic.py b/tests/test_augmentations/test_mosaic.py index 72e3cf6f..328e6727 100644 --- a/tests/test_augmentations/test_mosaic.py +++ b/tests/test_augmentations/test_mosaic.py @@ -23,8 +23,15 @@ def test_mosaic4(): def test_bbox_mosaic4(): bbox = (0, 0, WIDTH, HEIGHT) for i in range(4): - mosaic_bbox = bbox_mosaic4(bbox, HEIGHT // 2, WIDTH // 2, i, HEIGHT, WIDTH) - assert pytest.approx(mosaic_bbox, abs=0.5) == (0, 0, WIDTH // 2, HEIGHT // 2) + mosaic_bbox = bbox_mosaic4( + bbox, HEIGHT // 2, WIDTH // 2, i, HEIGHT, WIDTH + ) + assert pytest.approx(mosaic_bbox, abs=0.5) == ( + 0, + 0, + WIDTH // 2, + HEIGHT // 2, + ) def test_keypoint_mosaic4(): @@ -45,6 +52,8 @@ def test_keypoint_mosaic4(): def test_Mosaic4(): img = (np.random.rand(HEIGHT, WIDTH, 3) * 255).astype(np.uint8) - mosaic4 = Mosaic4(out_height=HEIGHT, out_width=WIDTH, always_apply=True, p=1.0) + mosaic4 = Mosaic4( + out_height=HEIGHT, out_width=WIDTH, always_apply=True, p=1.0 + ) m = mosaic4(image_batch=[img, img, img, img], labels={}) assert m["image_batch"][0].shape == (HEIGHT, WIDTH, 3) diff --git a/tests/test_data/test_annotations.py b/tests/test_data/test_annotations.py index 556dfb1f..7c0b2971 100644 --- a/tests/test_data/test_annotations.py +++ b/tests/test_data/test_annotations.py @@ -1,4 +1,7 @@ -from luxonis_ml.data.datasets.utils import add_generator_wrapper, rescale_values +from luxonis_ml.data.datasets.utils import ( + add_generator_wrapper, + rescale_values, +) def test_rescale_values_keypoints(): @@ -14,7 +17,10 @@ def test_rescale_values_keypoints(): def test_rescale_values_segmentation_polyline(): bbox = {"x": 0.1, "y": 0.2, "w": 0.5, "h": 0.5} segmentation = {"polylines": [[0.2, 0.4], [0.5, 0.8]]} - expected = [(0.2 * 0.5 + 0.1, 0.4 * 0.5 + 0.2), (0.5 * 0.5 + 0.1, 0.8 * 0.5 + 0.2)] + expected = [ + (0.2 * 0.5 + 0.1, 0.4 * 0.5 + 0.2), + (0.5 * 0.5 + 0.1, 0.8 * 0.5 + 0.2), + ] assert rescale_values(bbox, segmentation, "segmentation") == expected diff --git a/tests/test_data/test_dataset.py b/tests/test_data/test_dataset.py index 26f16ea8..f2c27d24 100644 --- a/tests/test_data/test_dataset.py +++ b/tests/test_data/test_dataset.py @@ -67,7 +67,12 @@ URL_PREFIX: Final[str] = "gs://luxonis-test-bucket/luxonis-ml-test-data" WORK_DIR: Final[str] = "tests/data/parser_datasets" DATASET_NAME: Final[str] = "test-dataset" -TASKS: Final[Set[str]] = {"segmentation", "classification", "keypoints", "boundingbox"} +TASKS: Final[Set[str]] = { + "segmentation", + "classification", + "keypoints", + "boundingbox", +} DATA_DIR = Path("tests/data/test_dataset") @@ -84,7 +89,9 @@ def make_image(i) -> Path: path = DATA_DIR / f"img_{i}.jpg" if not path.exists(): img = np.zeros((512, 512, 3), dtype=np.uint8) - img[0:10, 0:10] = np.random.randint(0, 255, (10, 10, 3), dtype=np.uint8) + img[0:10, 0:10] = np.random.randint( + 0, 255, (10, 10, 3), dtype=np.uint8 + ) cv2.imwrite(str(path), img) return path @@ -98,11 +105,12 @@ def make_image(i) -> Path: ], ) def test_dataset( - bucket_storage: BucketStorage, platform_name: str, python_version: str, subtests + bucket_storage: BucketStorage, + platform_name: str, + python_version: str, + subtests, ): - dataset_name = ( - f"{DATASET_NAME}-{bucket_storage.value}-{platform_name}-{python_version}" - ) + dataset_name = f"{DATASET_NAME}-{bucket_storage.value}-{platform_name}-{python_version}" with subtests.test("test_create", bucket_storage=bucket_storage): parser = LuxonisParser( f"{URL_PREFIX}/COCO_people_subset.zip", @@ -115,7 +123,9 @@ def test_dataset( ) parser.parse() dataset = LuxonisDataset(dataset_name, bucket_storage=bucket_storage) - assert LuxonisDataset.exists(dataset_name, bucket_storage=bucket_storage) + assert LuxonisDataset.exists( + dataset_name, bucket_storage=bucket_storage + ) assert dataset.get_classes()[0] == ["person"] assert set(dataset.get_tasks()) == TASKS assert dataset.get_skeletons() == SKELETONS @@ -128,7 +138,9 @@ def test_dataset( print(dataset.source.to_document()) assert dataset.source.to_document() == LuxonisSource().to_document() dataset.update_source(LuxonisSource("test")) - assert dataset.source.to_document() == LuxonisSource("test").to_document() + assert ( + dataset.source.to_document() == LuxonisSource("test").to_document() + ) with subtests.test("test_load", bucket_storage=bucket_storage): loader = LuxonisLoader(dataset) @@ -157,7 +169,9 @@ def test_dataset( with subtests.test("test_delete", bucket_storage=bucket_storage): dataset.delete_dataset(delete_remote=True) - assert not LuxonisDataset.exists(dataset_name, bucket_storage=bucket_storage) + assert not LuxonisDataset.exists( + dataset_name, bucket_storage=bucket_storage + ) @pytest.mark.dependency(name="test_dataset[BucketStorage.LOCAL]") @@ -242,18 +256,24 @@ def generator(step=15): assert splits is not None assert set(splits.keys()) == {"train", "val", "test"} for split, split_data in splits.items(): - assert len(split_data) == 5, f"Split {split} has {len(split_data)} samples" + assert ( + len(split_data) == 5 + ), f"Split {split} has {len(split_data)} samples" dataset.add(generator()) splits = dataset.get_splits() assert splits is not None for split, split_data in splits.items(): - assert len(split_data) == 5, f"Split {split} has {len(split_data)} samples" + assert ( + len(split_data) == 5 + ), f"Split {split} has {len(split_data)} samples" dataset.make_splits(definitions) splits = dataset.get_splits() assert splits is not None for split, split_data in splits.items(): - assert len(split_data) == 10, f"Split {split} has {len(split_data)} samples" + assert ( + len(split_data) == 10 + ), f"Split {split} has {len(split_data)} samples" dataset.add(generator()) dataset.make_splits((1, 0, 0)) @@ -281,7 +301,9 @@ def generator(step=15): dataset.make_splits({"train": 1.5}) with pytest.raises(ValueError): - dataset.make_splits({split: defs * 2 for split, defs in splits.items()}) + dataset.make_splits( + {split: defs * 2 for split, defs in splits.items()} + ) dataset.add(generator(10)) dataset.make_splits({"custom_split": 1.0}) @@ -338,7 +360,9 @@ def generator(): for bbox_annotation in annotations.get( "BoundingBox2DAnnotation", defaultdict(list) )["values"]: - class_ = bbox_annotation["labelName"].split("-")[-1].lower() + class_ = ( + bbox_annotation["labelName"].split("-")[-1].lower() + ) if class_ == "motorbiek": class_ = "motorbike" x, y = bbox_annotation["origin"] @@ -399,7 +423,10 @@ def generator(): ] mask = cv2.cvtColor( cv2.imread( - str(sequence_path / vehicle_type_segmentation["filename"]) + str( + sequence_path + / vehicle_type_segmentation["filename"] + ) ), cv2.COLOR_BGR2RGB, ) diff --git a/tests/test_data/test_task_ingestion.py b/tests/test_data/test_task_ingestion.py index 5a846364..5f08b1ec 100644 --- a/tests/test_data/test_task_ingestion.py +++ b/tests/test_data/test_task_ingestion.py @@ -27,7 +27,9 @@ def make_image(i) -> Path: path = DATA_DIR / f"img_{i}.jpg" if not path.exists(): img = np.zeros((512, 512, 3), dtype=np.uint8) - img[0:10, 0:10] = np.random.randint(0, 255, (10, 10, 3), dtype=np.uint8) + img[0:10, 0:10] = np.random.randint( + 0, 255, (10, 10, 3), dtype=np.uint8 + ) cv2.imwrite(str(path), img) return path @@ -183,7 +185,12 @@ def generator3(): "annotation": { "type": "polyline", "class": "water", - "points": [(0.1, 0.7), (0.5, 0.2), (0.3, 0.3), (0.12, 0.45)], + "points": [ + (0.1, 0.7), + (0.5, 0.2), + (0.3, 0.3), + (0.12, 0.45), + ], }, } @@ -234,7 +241,14 @@ def generator4(): dataset.add(generator4()).make_splits((1, 0, 0)) classes_list, classes = dataset.get_classes() - assert set(classes_list) == {"dog", "cat", "water", "grass", "bike", "body"} + assert set(classes_list) == { + "dog", + "cat", + "water", + "grass", + "bike", + "body", + } assert set(classes["land-segmentation"]) == {"water", "grass"} assert set(classes["animals-boxes"]) == {"dog", "cat"} assert set(classes["land-segmentation-2"]) == {"water"} diff --git a/tests/test_nn_archive/test_nn_archive.py b/tests/test_nn_archive/test_nn_archive.py index e6ab55be..7cff6f6f 100644 --- a/tests/test_nn_archive/test_nn_archive.py +++ b/tests/test_nn_archive/test_nn_archive.py @@ -26,14 +26,22 @@ def create_onnx_model(): - input0 = helper.make_tensor_value_info("input0", TensorProto.FLOAT, [1, 3, 64, 64]) + input0 = helper.make_tensor_value_info( + "input0", TensorProto.FLOAT, [1, 3, 64, 64] + ) input1 = helper.make_tensor_value_info( "input1", TensorProto.FLOAT, [1, 3, 128, 128] ) - output0 = helper.make_tensor_value_info("output0", TensorProto.FLOAT, [1, 10]) - output1 = helper.make_tensor_value_info("output1", TensorProto.FLOAT, [1, 5, 5, 5]) - graph = helper.make_graph([], "DummyModel", [input0, input1], [output0, output1]) + output0 = helper.make_tensor_value_info( + "output0", TensorProto.FLOAT, [1, 10] + ) + output1 = helper.make_tensor_value_info( + "output1", TensorProto.FLOAT, [1, 5, 5, 5] + ) + graph = helper.make_graph( + [], "DummyModel", [input0, input1], [output0, output1] + ) model = helper.make_model(graph, producer_name="DummyModelProducer") checker.check_model(model) @@ -148,7 +156,12 @@ def test_layout(): ) assert inp.layout == "CHWD" out = Output( - **{"name": "output", "dtype": "float32", "shape": [1, 10], "layout": "nc"} + **{ + "name": "output", + "dtype": "float32", + "shape": [1, 10], + "layout": "nc", + } ) assert out.layout == "NC" diff --git a/tests/test_utils/test_config.py b/tests/test_utils/test_config.py index 52420cc1..18ed93b3 100644 --- a/tests/test_utils/test_config.py +++ b/tests/test_utils/test_config.py @@ -73,10 +73,17 @@ def test_invalid_config_path(): def test_config_simple(config_file: str): cfg = Config.get_config(config_file) - assert cfg.sub_config.str_sub_param == CONFIG_DATA["sub_config"]["str_sub_param"] - assert cfg.sub_config.int_sub_param == CONFIG_DATA["sub_config"]["int_sub_param"] assert ( - cfg.sub_config.float_sub_param == CONFIG_DATA["sub_config"]["float_sub_param"] + cfg.sub_config.str_sub_param + == CONFIG_DATA["sub_config"]["str_sub_param"] + ) + assert ( + cfg.sub_config.int_sub_param + == CONFIG_DATA["sub_config"]["int_sub_param"] + ) + assert ( + cfg.sub_config.float_sub_param + == CONFIG_DATA["sub_config"]["float_sub_param"] ) @@ -85,10 +92,16 @@ def test_config_simple_override(config_file: str): "sub_config.str_sub_param": "sub_param_override", } cfg = Config.get_config(config_file, overrides) - assert cfg.sub_config.str_sub_param == overrides["sub_config.str_sub_param"] - assert cfg.sub_config.int_sub_param == CONFIG_DATA["sub_config"]["int_sub_param"] assert ( - cfg.sub_config.float_sub_param == CONFIG_DATA["sub_config"]["float_sub_param"] + cfg.sub_config.str_sub_param == overrides["sub_config.str_sub_param"] + ) + assert ( + cfg.sub_config.int_sub_param + == CONFIG_DATA["sub_config"]["int_sub_param"] + ) + assert ( + cfg.sub_config.float_sub_param + == CONFIG_DATA["sub_config"]["float_sub_param"] ) @@ -96,9 +109,13 @@ def test_config_list_override(config_file: str): overrides = ["sub_config.str_sub_param", "sub_param_override"] cfg = Config.get_config(config_file, overrides) assert cfg.sub_config.str_sub_param == overrides[1] - assert cfg.sub_config.int_sub_param == CONFIG_DATA["sub_config"]["int_sub_param"] assert ( - cfg.sub_config.float_sub_param == CONFIG_DATA["sub_config"]["float_sub_param"] + cfg.sub_config.int_sub_param + == CONFIG_DATA["sub_config"]["int_sub_param"] + ) + assert ( + cfg.sub_config.float_sub_param + == CONFIG_DATA["sub_config"]["float_sub_param"] ) with pytest.raises(ValueError): Config.get_config(config_file, ["sub_config.str_sub_param"]) @@ -197,10 +214,17 @@ def test_from_dict(): "nested_dict_param.a.b": 2, }, ) - assert cfg.sub_config.str_sub_param == CONFIG_DATA["sub_config"]["str_sub_param"] - assert cfg.sub_config.int_sub_param == CONFIG_DATA["sub_config"]["int_sub_param"] assert ( - cfg.sub_config.float_sub_param == CONFIG_DATA["sub_config"]["float_sub_param"] + cfg.sub_config.str_sub_param + == CONFIG_DATA["sub_config"]["str_sub_param"] + ) + assert ( + cfg.sub_config.int_sub_param + == CONFIG_DATA["sub_config"]["int_sub_param"] + ) + assert ( + cfg.sub_config.float_sub_param + == CONFIG_DATA["sub_config"]["float_sub_param"] ) assert cfg.nested_list_param[0][1] == 3 assert cfg.nested_dict_param["a"]["b"] == 2 diff --git a/tests/test_utils/test_filesystem.py b/tests/test_utils/test_filesystem.py index a2dde5e9..a4cceefb 100644 --- a/tests/test_utils/test_filesystem.py +++ b/tests/test_utils/test_filesystem.py @@ -6,7 +6,10 @@ import pytest from luxonis_ml.utils import environ -from luxonis_ml.utils.filesystem import LuxonisFileSystem, _get_protocol_and_path +from luxonis_ml.utils.filesystem import ( + LuxonisFileSystem, + _get_protocol_and_path, +) URL_PATH = "luxonis-test-bucket/luxonis-ml-test-data/fs_test_data" @@ -29,13 +32,17 @@ # NOTE: needed for tests running in GitHub Actions using the matrix strategy # to avoid race conditions when running tests in parallel -def get_os_python_specific_url(protocol: str, platform: str, python_version: str): +def get_os_python_specific_url( + protocol: str, platform: str, python_version: str +): return f"{protocol}://{URL_PATH}_{platform}_{python_version}" @pytest.fixture def fs(request, python_version: str, platform_name: str): - url_path = get_os_python_specific_url(request.param, platform_name, python_version) + url_path = get_os_python_specific_url( + request.param, platform_name, python_version + ) yield LuxonisFileSystem(url_path) @@ -189,7 +196,8 @@ def check_contents(dir_path: Path): file_path = Path(dir_path, f"file_{i}.txt") assert file_path.exists() assert ( - file_path.read_text() == (LOCAL_DIR_PATH / f"file_{i}.txt").read_text() + file_path.read_text() + == (LOCAL_DIR_PATH / f"file_{i}.txt").read_text() ) with tempfile.TemporaryDirectory() as tempdir: @@ -222,8 +230,12 @@ def test_walk_dir(fs: LuxonisFileSystem): "test_dir_download", ], ) -def test_static_download(protocol: str, python_version: str, platform_name: str): - url_root = get_os_python_specific_url(protocol, platform_name, python_version) +def test_static_download( + protocol: str, python_version: str, platform_name: str +): + url_root = get_os_python_specific_url( + protocol, platform_name, python_version + ) with tempfile.TemporaryDirectory() as tempdir: url = f"{url_root}/file.txt" path = LuxonisFileSystem.download(url, tempdir) @@ -248,7 +260,9 @@ def test_static_download(protocol: str, python_version: str, platform_name: str) ], ) def test_static_upload(protocol: str, python_version: str, platform_name: str): - url_root = get_os_python_specific_url(protocol, platform_name, python_version) + url_root = get_os_python_specific_url( + protocol, platform_name, python_version + ) with tempfile.TemporaryDirectory() as tempdir: url = f"{url_root}/_file_upload_test.txt" LuxonisFileSystem.upload(LOCAL_FILE_PATH, url) @@ -266,7 +280,8 @@ def test_static_upload(protocol: str, python_version: str, platform_name: str): file_path = Path(dir_path, f"file_{i}.txt") assert file_path.exists() assert ( - file_path.read_text() == (LOCAL_DIR_PATH / f"file_{i}.txt").read_text() + file_path.read_text() + == (LOCAL_DIR_PATH / f"file_{i}.txt").read_text() ) fs = LuxonisFileSystem(url_root)