Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce output/ enhance verbosity handling #258

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions src/cleanvision/dataset/fsspec_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def __init__(
data_folder: Optional[str] = None,
filepaths: Optional[List[str]] = None,
storage_opts: Dict[str, str] = {},
verbose: bool = True,
) -> None:
super().__init__()
self.storage_opts = storage_opts
Expand All @@ -32,7 +33,7 @@ def __init__(
self.fs, dataset_path = fsspec.core.url_to_fs(
data_folder, **self.storage_opts
)
self._filepaths = self.__get_filepaths(dataset_path)
self._filepaths = self.__get_filepaths(dataset_path, verbose)
else:
assert filepaths is not None
if len(filepaths) != len(set(filepaths)):
Expand Down Expand Up @@ -64,10 +65,11 @@ def get_name(self, item: Union[int, str]) -> str:
assert isinstance(item, str)
return item.split("/")[-1]

def __get_filepaths(self, dataset_path: str) -> List[str]:
def __get_filepaths(self, dataset_path: str, verbose: bool) -> List[str]:
"""See an issue here: https://github.com/fsspec/filesystem_spec/issues/1019
There's a problem with proper patterning on /**/ in fsspec"""
print(f"Reading images from {dataset_path}")
if verbose:
print(f"Reading images from {dataset_path}")
filepaths = []
for ext in IMAGE_FILE_EXTENSIONS:
# initial *.ext search, top level
Expand Down
9 changes: 7 additions & 2 deletions src/cleanvision/dataset/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,16 @@ def build_dataset(
image_key: Optional[str] = None,
torchvision_dataset: Optional["VisionDataset"] = None,
storage_opts: Dict[str, str] = {},
verbose: bool = True,
) -> Dataset:
if data_path:
return FSDataset(data_folder=data_path, storage_opts=storage_opts)
return FSDataset(
data_folder=data_path, storage_opts=storage_opts, verbose=verbose
)
elif filepaths:
return FSDataset(filepaths=filepaths, storage_opts=storage_opts)
return FSDataset(
filepaths=filepaths, storage_opts=storage_opts, verbose=verbose
)
elif hf_dataset and image_key:
return HFDataset(hf_dataset, image_key)
elif torchvision_dataset:
Expand Down
3 changes: 3 additions & 0 deletions src/cleanvision/imagelab.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def __init__(
image_key: Optional[str] = None,
torchvision_dataset: Optional["VisionDataset"] = None,
storage_opts: Dict[str, Any] = {},
verbose: bool = True,
) -> None:
self._dataset = build_dataset(
data_path,
Expand All @@ -132,6 +133,7 @@ def __init__(
image_key,
torchvision_dataset,
storage_opts=storage_opts,
verbose=verbose,
)
if len(self._dataset) == 0:
raise ValueError("No images found in the dataset specified")
Expand Down Expand Up @@ -276,6 +278,7 @@ def find_issues(
dataset=self._dataset,
imagelab_info=self.info,
n_jobs=n_jobs,
verbose=verbose,
)

# update issues, issue_summary and info
Expand Down
8 changes: 7 additions & 1 deletion src/cleanvision/issue_managers/duplicate_issue_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def find_issues(
dataset: Optional[Dataset] = None,
imagelab_info: Optional[Dict[str, Any]] = None,
n_jobs: Optional[int] = None,
verbose: Optional[bool] = None,
**kwargs: Any,
) -> None:
super().find_issues(**kwargs)
Expand All @@ -125,7 +126,9 @@ def find_issues(

results: List[Dict[str, Union[str, int]]] = []
if n_jobs == 1:
for idx in tqdm(dataset.index):
for idx in tqdm(
dataset.index, leave=verbose, desc="Computing hashes", smoothing=0
):
results.append(compute_hash(idx, dataset, to_compute, self.params))
else:
args = [
Expand All @@ -145,6 +148,9 @@ def find_issues(
compute_hash_wrapper, args, chunksize=chunksize
),
total=len(dataset),
leave=verbose,
desc="Computing hashes",
smoothing=0,
)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def find_issues(
dataset: Optional[Dataset] = None,
imagelab_info: Optional[Dict[str, Any]] = None,
n_jobs: Optional[int] = None,
verbose: Optional[bool] = None,
**kwargs: Any,
) -> None:
super().find_issues(**kwargs)
Expand All @@ -138,7 +139,9 @@ def find_issues(
if to_be_computed:
results: List[Dict[str, Union[int, float, str]]] = []
if n_jobs == 1:
for idx in tqdm(dataset.index):
for idx in tqdm(
dataset.index, leave=verbose, desc="Computing scores", smoothing=0
):
results.append(
compute_scores(
idx, dataset, to_be_computed, self.image_properties
Expand All @@ -162,6 +165,9 @@ def find_issues(
compute_scores_wrapper, args, chunksize=chunksize
),
total=len(dataset),
leave=verbose,
desc="Computing scores",
smoothing=0,
)
)

Expand Down
1 change: 1 addition & 0 deletions src/cleanvision/utils/base_issue_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def check_params(**kwargs: Any) -> None:
"dataset": Dataset,
"imagelab_info": Dict[str, Any],
"n_jobs": int,
"verbose": bool,
}

for name, value in kwargs.items():
Expand Down
1 change: 1 addition & 0 deletions src/cleanvision/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def get_filepaths(
"""

abs_dir_path = os.path.abspath(os.path.expanduser(dir_path))
# ToDo: Suppress print according to verbosity level
print(f"Reading images from {abs_dir_path}")
filepaths = []
for ext in IMAGE_FILE_EXTENSIONS:
Expand Down
Loading