Skip to content

Commit

Permalink
Merge branch 'main' into files-search-fix
Browse files Browse the repository at this point in the history
  • Loading branch information
dbogunowicz authored Sep 8, 2023
2 parents 00fec74 + 05e55e5 commit 40bd274
Show file tree
Hide file tree
Showing 9 changed files with 84 additions and 102 deletions.
6 changes: 4 additions & 2 deletions src/sparsezoo/api/query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,10 @@ def _parse_fields(self) -> None:
def parse_list_fields_to_string(self, fields: List[str]) -> str:
parsed_fields = ""
for field in fields:
camel_case_field = to_camel_case(field)
parsed_fields += f"{camel_case_field} "
field_without_arguments, sep, args = field.partition("(")
camel_case_field = to_camel_case(field_without_arguments)
args_str = f"{sep}{args}" if args else ""
parsed_fields += f"{camel_case_field}{args_str} "
if camel_case_field in DEFAULT_FIELDS:
stringified_fields = self.parse_list_fields_to_string(
DEFAULT_FIELDS.get(camel_case_field)
Expand Down
23 changes: 10 additions & 13 deletions src/sparsezoo/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,20 +110,20 @@ def __init__(self, source: str, download_path: Optional[str] = None):
self.sample_originals: Directory = self._directory_from_files(
files,
directory_class=Directory,
display_name="sample_originals",
display_name="sample-originals",
)
self.sample_inputs: NumpyDirectory = self._directory_from_files(
files,
directory_class=NumpyDirectory,
display_name="sample_inputs",
display_name="sample-inputs",
)

self.model_card: File = self._file_from_files(files, display_name="model.md")

self.sample_outputs = self._directory_from_files(
files,
directory_class=NumpyDirectory,
display_name="sample_outputs",
display_name="sample-outputs",
allow_multiple_outputs=True,
regex=True,
)
Expand All @@ -133,7 +133,7 @@ def __init__(self, source: str, download_path: Optional[str] = None):
] = self._sample_outputs_list_to_dict(self.sample_outputs)

self.sample_labels: Directory = self._directory_from_files(
files, directory_class=Directory, display_name="sample_labels"
files, directory_class=Directory, display_name="sample-labels"
)

self.deployment: SelectDirectory = self._directory_from_files(
Expand All @@ -150,12 +150,9 @@ def __init__(self, source: str, download_path: Optional[str] = None):

self.logs: Directory = self._directory_from_files(files, display_name="logs")

self.recipes: SelectDirectory = self._directory_from_files(
files,
directory_class=SelectDirectory,
display_name="recipe",
stub_params=self.stub_params,
)
self.recipes = self._file_from_files(files, display_name="^recipe", regex=True)
if isinstance(self.recipes, File):
self.recipes = [self.recipes]

self._onnx_gz: OnnxGz = self._directory_from_files(
files, directory_class=OnnxGz, display_name="model.onnx.tar.gz"
Expand Down Expand Up @@ -691,7 +688,7 @@ def _sample_outputs_list_to_dict(
if not isinstance(directories, list):
# if found a single 'sample_outputs' directory,
# assume it should be mapped to its the native framework
expected_name = "sample_outputs"
expected_name = "sample-outputs"
if directories.name not in [expected_name, expected_name + ".tar.gz"]:
raise ValueError(
"Found single folder (or tar.gz archive)"
Expand All @@ -701,7 +698,7 @@ def _sample_outputs_list_to_dict(
engine_to_numpydir_map["framework"] = directories

else:
# if found multiple 'sample_outputs' directories,
# if found multiple 'sample-outputs' directories,
# use directory name to relate it with the appropriate
# inference engine
for directory in directories:
Expand All @@ -710,7 +707,7 @@ def _sample_outputs_list_to_dict(
engine_name = engine_name.replace(".tar.gz", "")
if engine_name not in ENGINES:
raise ValueError(
f"The name of the 'sample_outputs' directory should "
f"The name of the 'sample-outputs' directory should "
f"end with an engine name (one of the {ENGINES}). "
f"However, the name is {directory.name}."
)
Expand Down
28 changes: 9 additions & 19 deletions src/sparsezoo/model/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def load_files_from_stub(
fields=[
"model_id",
"model_onnx_size_compressed_bytes",
"files",
"files(version: 2)",
"benchmark_results",
"training_results",
"repo_name",
Expand All @@ -168,9 +168,11 @@ def load_files_from_stub(
model_id = model["model_id"]

files = model.get("files")
if len(files) == 0:
raise ValueError(f"No files found for stub {stub}")

include_file_download_url(files)
files = restructure_request_json(request_json=files)

if params is not None:
files = filter_files(files=files, params=params)

Expand Down Expand Up @@ -308,7 +310,7 @@ def save_outputs_to_tar(

path = os.path.join(
os.path.dirname(sample_inputs.path),
f"sample_outputs_{engine_type}",
f"sample-outputs_{engine_type}",
)
if not os.path.exists(path):
os.mkdir(path)
Expand Down Expand Up @@ -382,26 +384,14 @@ def restructure_request_json(
file_dict_deployment["file_type"] = "deployment"
request_json.append(file_dict_deployment)

# create recipes
recipe_dicts_list = fetch_from_request_json(request_json, "file_type", "recipe")
for (idx, file_dict) in recipe_dicts_list:
display_name = file_dict["display_name"]
# make sure that recipe name has a
# format `recipe_{...}`.
prefix = "recipe_"
if not display_name.startswith(prefix):
display_name = prefix + display_name
file_dict["display_name"] = display_name
request_json[idx] = file_dict

# restructure inputs/labels/originals/outputs directories
# use `sample-inputs.tar.gz` to simulate non-existent directories

files_to_create = [
"sample_inputs.tar.gz",
"sample_labels.tar.gz",
"sample_originals.tar.gz",
"sample_outputs.tar.gz",
"sample-inputs.tar.gz",
"sample-labels.tar.gz",
"sample-originals.tar.gz",
"sample-outputs.tar.gz",
]
types = ["inputs", "labels", "originals", "outputs"]
for file_name, type in zip(files_to_create, types):
Expand Down
2 changes: 1 addition & 1 deletion src/sparsezoo/objects/directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def download(
else:
for file in self.files:
file.download(
destination_path=os.path.join(destination_path, self.name)
destination_path=destination_path,
)
file._path = os.path.join(destination_path, self.name, file.name)

Expand Down
1 change: 1 addition & 0 deletions tests/sparsezoo/analyze/bert_pruned80_quant-none-vnni.json

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion tests/sparsezoo/analyze/bert_pruned_quantized.json

This file was deleted.

4 changes: 2 additions & 2 deletions tests/sparsezoo/analyze/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@
"bert_pruned_quantized": {
"stub": (
"zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/"
"12layer_pruned80_quant-none-vnni"
"pruned80_quant-none-vnni"
),
"truth": f"{os.path.dirname(__file__)}/bert_pruned_quantized.json",
"truth": f"{os.path.dirname(__file__)}/bert_pruned80_quant-none-vnni.json",
},
"resnet50_pruned_quantized": {
"stub": (
Expand Down
53 changes: 24 additions & 29 deletions tests/sparsezoo/model/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,14 @@
"onnx",
"model.onnx",
"model.onnx.tar.gz",
"recipe",
"sample_inputs.tar.gz",
"sample_originals.tar.gz",
"sample_labels.tar.gz",
"sample_outputs.tar.gz",
"sample_inputs",
"sample_originals",
"sample_labels",
"sample_outputs",
"sample-inputs.tar.gz",
"sample-originals.tar.gz",
"sample-labels.tar.gz",
"sample-outputs.tar.gz",
"sample-inputs",
"sample-originals",
"sample-labels",
"sample-outputs",
"benchmarks.yaml",
"eval.yaml",
"analysis.yaml",
Expand Down Expand Up @@ -116,9 +115,7 @@ def test_model_from_stub(self, setup):

@staticmethod
def _assert_correct_files_downloaded(model, args):
if args[0] == "recipe":
assert len(model.recipes.available) == 1
elif args[0] == "checkpoint":
if args[0] == "checkpoint":
assert len(model.training.available) == 1
elif args[0] == "deployment":
assert len(model.training.available) == 1
Expand All @@ -141,7 +138,7 @@ def _assert_validation_results_exist(model):
"pytorch/sparseml/imagenet/pruned-moderate"
),
True,
files_ic,
files_ic.union({"recipe.md", "recipe_transfer_learn.md"}),
),
(
(
Expand All @@ -150,7 +147,7 @@ def _assert_validation_results_exist(model):
"pytorch/huggingface/squad/pruned80_quant-none-vnni"
),
False,
files_nlp,
files_nlp.union({"recipe.md"}),
),
(
(
Expand All @@ -159,22 +156,22 @@ def _assert_validation_results_exist(model):
"pytorch/ultralytics/coco/pruned_quant-aggressive_94"
),
True,
files_yolo,
files_yolo.union({"recipe.md", "recipe_transfer_learn.md"}),
),
(
"yolov5-x-coco-pruned70.4block_quantized",
False,
files_yolo,
files_yolo.union({"recipe.md", "recipe_transfer_learn.md"}),
),
(
"yolov5-n6-voc_coco-pruned55",
False,
files_yolo,
files_yolo.union({"recipe.md"}),
),
(
"resnet_v1-50-imagenet-channel30_pruned90_quantized",
False,
files_yolo,
files_yolo.union({"recipe.md", "recipe_transfer_classification.md"}),
),
],
scope="function",
Expand All @@ -196,11 +193,10 @@ def test_folder_structure(self, setup):
_, clone_sample_outputs, expected_files, temp_dir = setup
if clone_sample_outputs:
for file_name in [
"sample_outputs_onnxruntime",
"sample_outputs_deepsparse",
"sample-outputs_onnxruntime",
"sample-outputs_deepsparse",
]:
expected_files.update({file_name, file_name + ".tar.gz"})

assert not set(os.listdir(temp_dir.name)).difference(expected_files)

def test_validate(self, setup):
Expand Down Expand Up @@ -246,19 +242,19 @@ def _add_mock_files(directory_path: str, clone_sample_outputs: bool):
)
Path(optional_recipe_yaml).touch()

# add remaining `sample_{...}` files, that may be potentially
# add remaining `sample-{...}` files, that may be potentially
# missing
mock_sample_file = os.path.join(directory_path, "sample_inputs.tar.gz")
for file_name in ["sample_originals.tar.gz", "sample_labels.tar.gz"]:
mock_sample_file = os.path.join(directory_path, "sample-inputs.tar.gz")
for file_name in ["sample-originals.tar.gz", "sample-labels.tar.gz"]:
expected_file_dir = os.path.join(directory_path, file_name)
if not os.path.isfile(expected_file_dir):
shutil.copyfile(mock_sample_file, expected_file_dir)

if clone_sample_outputs:
sample_outputs_file = os.path.join(directory_path, "sample_outputs.tar.gz")
sample_outputs_file = os.path.join(directory_path, "sample-outputs.tar.gz")
for file_name in [
"sample_outputs_onnxruntime.tar.gz",
"sample_outputs_deepsparse.tar.gz",
"sample-outputs_onnxruntime.tar.gz",
"sample-outputs_deepsparse.tar.gz",
]:
shutil.copyfile(
sample_outputs_file, os.path.join(directory_path, file_name)
Expand All @@ -271,12 +267,11 @@ def _test_generate_outputs_single_engine(self, engine, model_directory):
if engine == "onnxruntime":
# test whether the functionality saves the numpy files to tar properly
tar_file_expected_path = os.path.join(
directory_path, f"sample_outputs_{engine}.tar.gz"
directory_path, f"sample-outputs_{engine}.tar.gz"
)
if os.path.isfile(tar_file_expected_path):
os.remove(tar_file_expected_path)
save_to_tar = True

output_expected = next(iter(model_directory.sample_outputs[engine]))
output_expected = list(output_expected.values())
output = next(
Expand Down
Loading

0 comments on commit 40bd274

Please sign in to comment.