cleanlab · saskra · Feb 7, 2025 · Feb 7, 2025 · Feb 21, 2025 · Feb 21, 2025
diff --git a/src/cleanvision/dataset/fsspec_dataset.py b/src/cleanvision/dataset/fsspec_dataset.py
@@ -19,6 +19,7 @@ def __init__(
         data_folder: Optional[str] = None,
         filepaths: Optional[List[str]] = None,
         storage_opts: Dict[str, str] = {},
+        verbose: bool = True,
     ) -> None:
         super().__init__()
         self.storage_opts = storage_opts
@@ -32,7 +33,7 @@ def __init__(
             self.fs, dataset_path = fsspec.core.url_to_fs(
                 data_folder, **self.storage_opts
             )
-            self._filepaths = self.__get_filepaths(dataset_path)
+            self._filepaths = self.__get_filepaths(dataset_path, verbose)
         else:
             assert filepaths is not None
             if len(filepaths) != len(set(filepaths)):
@@ -64,10 +65,11 @@ def get_name(self, item: Union[int, str]) -> str:
         assert isinstance(item, str)
         return item.split("/")[-1]
 
-    def __get_filepaths(self, dataset_path: str) -> List[str]:
+    def __get_filepaths(self, dataset_path: str, verbose: bool) -> List[str]:
         """See an issue here: https://github.com/fsspec/filesystem_spec/issues/1019
         There's a problem with proper patterning on /**/ in fsspec"""
-        print(f"Reading images from {dataset_path}")
+        if verbose:
+            print(f"Reading images from {dataset_path}")
         filepaths = []
         for ext in IMAGE_FILE_EXTENSIONS:
             # initial *.ext search, top level

diff --git a/src/cleanvision/dataset/utils.py b/src/cleanvision/dataset/utils.py
@@ -19,11 +19,16 @@ def build_dataset(
     image_key: Optional[str] = None,
     torchvision_dataset: Optional["VisionDataset"] = None,
     storage_opts: Dict[str, str] = {},
+    verbose: bool = True,
 ) -> Dataset:
     if data_path:
-        return FSDataset(data_folder=data_path, storage_opts=storage_opts)
+        return FSDataset(
+            data_folder=data_path, storage_opts=storage_opts, verbose=verbose
+        )
     elif filepaths:
-        return FSDataset(filepaths=filepaths, storage_opts=storage_opts)
+        return FSDataset(
+            filepaths=filepaths, storage_opts=storage_opts, verbose=verbose
+        )
     elif hf_dataset and image_key:
         return HFDataset(hf_dataset, image_key)
     elif torchvision_dataset:

diff --git a/src/cleanvision/imagelab.py b/src/cleanvision/imagelab.py
@@ -124,6 +124,7 @@ def __init__(
         image_key: Optional[str] = None,
         torchvision_dataset: Optional["VisionDataset"] = None,
         storage_opts: Dict[str, Any] = {},
+        verbose: bool = True,
     ) -> None:
         self._dataset = build_dataset(
             data_path,
@@ -132,6 +133,7 @@ def __init__(
             image_key,
             torchvision_dataset,
             storage_opts=storage_opts,
+            verbose=verbose,
         )
         if len(self._dataset) == 0:
             raise ValueError("No images found in the dataset specified")
@@ -276,6 +278,7 @@ def find_issues(
                 dataset=self._dataset,
                 imagelab_info=self.info,
                 n_jobs=n_jobs,
+                verbose=verbose,
             )
 
             # update issues, issue_summary and info

diff --git a/src/cleanvision/issue_managers/duplicate_issue_manager.py b/src/cleanvision/issue_managers/duplicate_issue_manager.py
@@ -107,6 +107,7 @@ def find_issues(
         dataset: Optional[Dataset] = None,
         imagelab_info: Optional[Dict[str, Any]] = None,
         n_jobs: Optional[int] = None,
+        verbose: Optional[bool] = None,
         **kwargs: Any,
     ) -> None:
         super().find_issues(**kwargs)
@@ -125,7 +126,9 @@ def find_issues(
 
         results: List[Dict[str, Union[str, int]]] = []
         if n_jobs == 1:
-            for idx in tqdm(dataset.index):
+            for idx in tqdm(
+                dataset.index, leave=verbose, desc="Computing hashes", smoothing=0
+            ):
                 results.append(compute_hash(idx, dataset, to_compute, self.params))
         else:
             args = [
@@ -145,6 +148,9 @@ def find_issues(
                             compute_hash_wrapper, args, chunksize=chunksize
                         ),
                         total=len(dataset),
+                        leave=verbose,
+                        desc="Computing hashes",
+                        smoothing=0,
                     )
                 )
 

diff --git a/src/cleanvision/issue_managers/image_property_issue_manager.py b/src/cleanvision/issue_managers/image_property_issue_manager.py
@@ -114,6 +114,7 @@ def find_issues(
         dataset: Optional[Dataset] = None,
         imagelab_info: Optional[Dict[str, Any]] = None,
         n_jobs: Optional[int] = None,
+        verbose: Optional[bool] = None,
         **kwargs: Any,
     ) -> None:
         super().find_issues(**kwargs)
@@ -138,7 +139,9 @@ def find_issues(
         if to_be_computed:
             results: List[Dict[str, Union[int, float, str]]] = []
             if n_jobs == 1:
-                for idx in tqdm(dataset.index):
+                for idx in tqdm(
+                    dataset.index, leave=verbose, desc="Computing scores", smoothing=0
+                ):
                     results.append(
                         compute_scores(
                             idx, dataset, to_be_computed, self.image_properties
@@ -162,6 +165,9 @@ def find_issues(
                                 compute_scores_wrapper, args, chunksize=chunksize
                             ),
                             total=len(dataset),
+                            leave=verbose,
+                            desc="Computing scores",
+                            smoothing=0,
                         )
                     )
 

diff --git a/src/cleanvision/utils/base_issue_manager.py b/src/cleanvision/utils/base_issue_manager.py
@@ -32,6 +32,7 @@ def check_params(**kwargs: Any) -> None:
             "dataset": Dataset,
             "imagelab_info": Dict[str, Any],
             "n_jobs": int,
+            "verbose": bool,
         }
 
         for name, value in kwargs.items():

diff --git a/src/cleanvision/utils/utils.py b/src/cleanvision/utils/utils.py
@@ -51,6 +51,7 @@ def get_filepaths(
     """
 
     abs_dir_path = os.path.abspath(os.path.expanduser(dir_path))
+    # ToDo: Suppress print according to verbosity level
     print(f"Reading images from {abs_dir_path}")
     filepaths = []
     for ext in IMAGE_FILE_EXTENSIONS: