diff --git a/.github/workflows/3-test-docker.yml b/.github/workflows/3-test-docker.yml index ac6b59061..c3c443ae1 100755 --- a/.github/workflows/3-test-docker.yml +++ b/.github/workflows/3-test-docker.yml @@ -44,50 +44,3 @@ jobs: -c /armory-repo/pyproject.toml \ -m "not docker_required and unit" \ ./tests/ - - - docker-deepspeech-unit: - name: ☁️ Docker Deepspeech Image Tests - runs-on: ubuntu-latest - steps: - - name: 🐄 checkout armory full depth with tags for scm - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: 🐍 Use Python 3.9 - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - - name: ⚙️ Installing Armory - shell: bash - run: | - pip install . - armory configure --use-defaults - - - name: 🚧 Build the Container - run: | - python docker/build.py --framework pytorch-deepspeech - - - name: 🤞 Run Image tests - run: | - IMAGE_VERSION=`armory --show-docker-version-tag` - - docker run \ - --rm \ - --workdir /armory-repo \ - twosixarmory/pytorch-deepspeech:${IMAGE_VERSION} \ - pytest \ - -c /armory-repo/pyproject.toml \ - -m "not docker_required and unit" \ - ./tests/ - - docker run \ - --rm \ - --workdir /armory-repo \ - twosixarmory/pytorch-deepspeech:${IMAGE_VERSION} \ - pytest \ - -c /armory-repo/pyproject.toml \ - -m "pytorch_deepspeech" \ - ./tests/ diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d24b48ba9..aebb6c6bb 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -74,7 +74,6 @@ jobs: matrix: include: - image: armory - - image: pytorch-deepspeech steps: - name: 🐍 Setup Python 3.9 uses: actions/setup-python@v4 diff --git a/README.md b/README.md index 992d753f8..ace021c6c 100644 --- a/README.md +++ b/README.md @@ -61,21 +61,21 @@ from the evaluation can be found in the output directory. To later close the interactive container simply run CTRL+C from the terminal where this command was ran. ## armory launch -* `armory launch ` +* `armory launch ` This will launch a framework specific container, with appropriate mounted volumes, for the user to attach to for debugging purposes. A command to attach to the container will be returned from this call, and it can be ran in a separate terminal. To later close the interactive container simply run CTRL+C from the terminal where this command was ran. -* `armory launch --jupyter`. +* `armory launch --jupyter`. Similar to the interactive launch, this will spin up a container for a specific framework, but will instead return the web address of a jupyter lab server where debugging can be performed. To close the jupyter server simply run CTRL+C from the terminal where this command was ran. ## armory exec -* `armory exec -- ` +* `armory exec -- ` This will run a specific command within a framework specific container. A notable use case for this would be to run test cases using pytest. After completion of the command the container will be removed. diff --git a/armory/__main__.py b/armory/__main__.py index 038161020..b058d77f8 100755 --- a/armory/__main__.py +++ b/armory/__main__.py @@ -190,7 +190,7 @@ def _docker_image(parser): "docker_image", metavar="", type=str, - help="docker image framework: 'armory', or 'pytorch-deepspeech'", + help="docker image framework: 'armory'", action=DockerImage, ) @@ -201,7 +201,7 @@ def _docker_image_optional(parser): default=armory.docker.images.ARMORY_IMAGE_NAME, metavar="", type=str, - help="docker image framework: 'armory', or 'pytorch-deepspeech'", + help="docker image framework: 'armory'", action=DockerImage, ) diff --git a/armory/baseline_models/pytorch/deep_speech.py b/armory/baseline_models/pytorch/deep_speech.py deleted file mode 100644 index b3efd23d1..000000000 --- a/armory/baseline_models/pytorch/deep_speech.py +++ /dev/null @@ -1,24 +0,0 @@ -""" -Automatic speech recognition model - -Model contributed by: MITRE Corporation -""" - -from typing import Optional - -from art.estimators.speech_recognition import PyTorchDeepSpeech - -from armory.utils.external_repo import ExternalRepoImport - -# Test for external repo at import time to fail fast -with ExternalRepoImport( - repo="SeanNaren/deepspeech.pytorch@V3.0", - experiment="librispeech_asr_snr_undefended.json", -): - from deepspeech_pytorch.model import DeepSpeech # noqa: F401 - - -def get_art_model( - model_kwargs: dict, wrapper_kwargs: dict, weights_path: Optional[str] = None -) -> PyTorchDeepSpeech: - return PyTorchDeepSpeech(**wrapper_kwargs) diff --git a/armory/baseline_models/pytorch/sincnet.py b/armory/baseline_models/pytorch/sincnet.py deleted file mode 100644 index 37401045d..000000000 --- a/armory/baseline_models/pytorch/sincnet.py +++ /dev/null @@ -1,289 +0,0 @@ -""" -CNN model for raw audio classification - -Model contributed by: MITRE Corporation -Adapted from: https://github.com/mravanelli/SincNet -""" -from typing import Optional - -from art.estimators.classification import PyTorchClassifier -import numpy as np -import torch -from torch import nn - -from armory.utils.external_repo import ExternalRepoImport - -with ExternalRepoImport( - repo="hkakitani/SincNet", - experiment="librispeech_baseline_sincnet.json", -): - from SincNet import dnn_models - -# NOTE: Underlying dataset sample rate is 16 kHz. SincNet uses this SAMPLE_RATE to -# determine internal filter high cutoff frequency. -SAMPLE_RATE = 8000 -WINDOW_STEP_SIZE = 375 -WINDOW_LENGTH = int(SAMPLE_RATE * WINDOW_STEP_SIZE / 1000) - -DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - -def numpy_random_preprocessing_fn(batch: np.ndarray): - """ - Standardize, then normalize sound clips - - Then generate a random cut of the input - """ - processed_batch = [] - for clip in batch: - # convert and normalize - signal = clip.astype(np.float32) - # Signal normalization - signal = signal / np.max(np.abs(signal)) - - # make a pseudorandom cut of size equal to WINDOW_LENGTH - # (from SincNet's create_batches_rnd) - signal_length = len(signal) - np.random.seed(signal_length) - signal_start = int( - np.random.randint(signal_length / WINDOW_LENGTH - 1) - * WINDOW_LENGTH - % signal_length - ) - signal_stop = signal_start + WINDOW_LENGTH - signal = signal[signal_start:signal_stop] - processed_batch.append(signal) - - return np.array(processed_batch) - - -def numpy_all_preprocessing_fn(batch: np.ndarray): - """ - Input is comprised of one or more clips, where each clip i - is given as an ndarray with shape (n_i,). - Preprocessing normalizes each clip and breaks each clip into an integer number - of non-overlapping segments of length WINDOW_LENGTH. - Output is a list of clips, each of shape (int(n_i/WINDOW_LENGTH), WINDOW_LENGTH) - """ - if len(batch) != 1: - raise NotImplementedError( - "Requires ART variable length input capability for batch size != 1" - ) - processed_batch = [] - for clip in batch: - # convert and normalize - signal = clip.astype(np.float64) - signal = signal / np.max(np.abs(signal)) - - # break into a number of chunks of equal length - num_chunks = int(len(signal) / WINDOW_LENGTH) - signal = signal[: num_chunks * WINDOW_LENGTH] - signal = np.reshape(signal, (num_chunks, WINDOW_LENGTH), order="C") - processed_batch.append(signal) - # remove outer batch (of size 1) - processed_batch = processed_batch[0] - return np.array(processed_batch) - - -def torch_random_preprocessing_fn(x): - """ - Standardize, then normalize sound clips - """ - if x.shape[0] != 1: - raise ValueError(f"Shape of batch x {x.shape[0]} != 1") - if x.dtype != torch.float32: - raise ValueError(f"dtype of batch x {x.dtype} != torch.float32") - if x.max() > 1.0: - raise ValueError(f"batch x max {x.max()} > 1.0") - if x.min() < -1.0: - raise ValueError(f"batch x min {x.min()} < -1.0") - x = x.squeeze(0) - - # Signal normalization - x = x / x.abs().max() - - # get pseudorandom chunk of fixed length (from SincNet's create_batches_rnd) - signal_length = len(x) - np.random.seed(signal_length) - start = int( - np.random.randint(signal_length / WINDOW_LENGTH - 1) - * WINDOW_LENGTH - % signal_length - ) - - x = x[start : start + WINDOW_LENGTH] - - x = x.unsqueeze(0) - return x - - -def torch_all_preprocessing_fn(x: torch.Tensor): - """ - Input is comprised of one or more clips, where each clip i - is given as an ndarray with shape (n_i,). - Preprocessing normalizes each clip and breaks each clip into an integer number - of non-overlapping segments of length WINDOW_LENGTH. - Output is a list of clips, each of shape (int(n_i/WINDOW_LENGTH), WINDOW_LENGTH) - """ - if x.shape[0] != 1: - raise NotImplementedError( - "Requires ART variable length input capability for batch size != 1" - ) - if x.max() > 1.0: - raise ValueError(f"batch x max {x.max()} > 1.0") - if x.min() < -1.0: - raise ValueError(f"batch x min {x.min()} < -1.0") - if x.dtype != torch.float32: - raise ValueError(f"dtype of batch x {x.dtype} != torch.float32") - x = x.squeeze(0) - - # Signal normalization - x = x / x.abs().max() - - # break into a number of chunks of equal length - num_chunks = int(len(x) / WINDOW_LENGTH) - x = x[: num_chunks * WINDOW_LENGTH] - x = x.reshape((num_chunks, WINDOW_LENGTH)) - - return x - - -def sincnet(weights_path: Optional[str] = None) -> dnn_models.SincWrapper: - """ - Set configuration options and instantiates SincWrapper object - """ - pretrained = weights_path is not None - if pretrained: - model_params = torch.load(weights_path, map_location=DEVICE) - else: - model_params = {} - CNN_params = model_params.get("CNN_model_par") - DNN1_params = model_params.get("DNN1_model_par") - DNN2_params = model_params.get("DNN2_model_par") - - # from SincNet/cfg/SincNet_dev_LibriSpeech.cfg - cnn_N_filt = [80, 60, 60] - cnn_len_filt = [251, 5, 5] - cnn_max_pool_len = [3, 3, 3] - cnn_use_laynorm_inp = True - cnn_use_batchnorm_inp = False - cnn_use_laynorm = [True, True, True] - cnn_use_batchnorm = [False, False, False] - cnn_act = ["relu", "relu", "relu"] - cnn_drop = [0.0, 0.0, 0.0] - - fc_lay = [2048, 2048, 2048] - fc_drop = [0.0, 0.0, 0.0] - fc_use_laynorm_inp = True - fc_use_batchnorm_inp = False - fc_use_batchnorm = [True, True, True] - fc_use_laynorm = [False, False, False] - fc_act = ["leaky_relu", "linear", "leaky_relu"] - - class_lay = [40] - class_drop = [0.0, 0.0] - class_use_laynorm_inp = True - class_use_batchnorm_inp = False - class_use_batchnorm = [False] - class_use_laynorm = [False] - class_act = ["softmax"] - - CNN_options = { - "input_dim": WINDOW_LENGTH, - "fs": SAMPLE_RATE, - "cnn_N_filt": cnn_N_filt, - "cnn_len_filt": cnn_len_filt, - "cnn_max_pool_len": cnn_max_pool_len, - "cnn_use_laynorm_inp": cnn_use_laynorm_inp, - "cnn_use_batchnorm_inp": cnn_use_batchnorm_inp, - "cnn_use_laynorm": cnn_use_laynorm, - "cnn_use_batchnorm": cnn_use_batchnorm, - "cnn_act": cnn_act, - "cnn_drop": cnn_drop, - "pretrained": pretrained, - "model_params": CNN_params, - } - - DNN1_options = { - "fc_lay": fc_lay, - "fc_drop": fc_drop, - "fc_use_batchnorm": fc_use_batchnorm, - "fc_use_laynorm": fc_use_laynorm, - "fc_use_laynorm_inp": fc_use_laynorm_inp, - "fc_use_batchnorm_inp": fc_use_batchnorm_inp, - "fc_act": fc_act, - "pretrained": pretrained, - "model_params": DNN1_params, - } - - DNN2_options = { - "input_dim": fc_lay[-1], - "fc_lay": class_lay, - "fc_drop": class_drop, - "fc_use_batchnorm": class_use_batchnorm, - "fc_use_laynorm": class_use_laynorm, - "fc_use_laynorm_inp": class_use_laynorm_inp, - "fc_use_batchnorm_inp": class_use_batchnorm_inp, - "fc_act": class_act, - } - - sincNet = dnn_models.SincWrapper(DNN2_options, DNN1_options, CNN_options) - - if pretrained: - sincNet.eval() - sincNet.load_state_dict(DNN2_params) - - else: - sincNet.train() - - return sincNet - - -class SincNetWrapper(nn.Module): - MODES = { - "random": torch_random_preprocessing_fn, - "all": torch_all_preprocessing_fn, - } - - def __init__(self, model_kwargs: dict, weights_path: Optional[str]) -> None: - super().__init__() - predict_mode = model_kwargs.pop("predict_mode", "all") - if predict_mode not in self.MODES: - raise ValueError(f"predict_mode {predict_mode} not in {tuple(self.MODES)}") - self.predict_mode = predict_mode - - self.model = sincnet(weights_path=weights_path, **model_kwargs) - self.model.to(DEVICE) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - if self.training: - # preprocessing should be done before model for arbitrary length input - return self.model(x) - - x = self.MODES[self.predict_mode](x) - output = self.model(x) - if self.predict_mode == "all": - output = torch.mean(output, dim=0, keepdim=True) - return output - - -preprocessing_fn = numpy_random_preprocessing_fn - - -def get_art_model( - model_kwargs: dict, wrapper_kwargs: dict, weights_path: Optional[str] = None -) -> PyTorchClassifier: - model = SincNetWrapper(model_kwargs, weights_path) - model.to(DEVICE) - - wrapped_model = PyTorchClassifier( - model, - loss=torch.nn.NLLLoss(), - optimizer=torch.optim.RMSprop( - model.parameters(), lr=0.001, alpha=0.95, eps=1e-8 - ), - input_shape=(None,), - nb_classes=40, - **wrapper_kwargs, - ) - return wrapped_model diff --git a/armory/datasets/README.md b/armory/datasets/README.md index 76c2d8ac2..27e4f9933 100644 --- a/armory/datasets/README.md +++ b/armory/datasets/README.md @@ -84,6 +84,19 @@ info, ds = load.load("digit") info, ds = load.from_directory("/armory/datasets/new_builds/digit/1.0.8") ``` +### Apache Beam Datasets + +Currently, `librispeech` and `librispeech_dev_clean` use apache beam to build. +Apache beam is not installed by default in the container due to older dependencies. +If building in the container, do: +``` +pip install apache-beam +``` + +When building, armory does not provide beam options by default. +This makes building VERY slow unless overrides are provided. +It is recommended that these are built directly using tfds on the command line. + ## Packaging and Uploading for Cache After a dataset has been successfully built and loaded (locally), it can be packaged and uploaded to the cache. @@ -91,43 +104,44 @@ After a dataset has been successfully built and loaded (locally), it can be pack First, it is recommended that you test the packaging and untarring process without upload/download. In python: -``` +```python from armory.datasets import package -package.package("my_dataset") # creates a tar.gz file -package.update("my_dataset") # adds the tar hash info to "cached_datasets.json" -package.verify("my_dataset") # uses the "cached_datasets.json" information to verify hash information on tar file -package.extract("my_dataset", overwrite=False) # This should raise an error, unless you first remove the built dataset; it will ask you to overwrite -package.extract("my_dataset", overwrite=True) # extracts the tar file into the data directory, overwriting the old one (if overwrite is false, this should raise an error) +my_dataset = "my_dataset" +package.package(my_dataset) # creates a tar.gz file +package.update(my_dataset) # adds the tar hash info to "cached_datasets.json" +package.verify(my_dataset) # uses the "cached_datasets.json" information to verify hash information on tar file +package.extract(my_dataset, overwrite=False) # This should raise an error, unless you first remove the built dataset; it will ask you to overwrite +package.extract(my_dataset, overwrite=True) # extracts the tar file into the data directory, overwriting the old one (if overwrite is false, this should raise an error) ``` If you can successfully load the dataset after extracting it here, this part is good. Now, to upload to s3 (you will need `ARMORY_PRIVATE_S3_ID` and `ARMORY_PRIVATE_S3_KEY`): -``` +```python from armory.datasets import upload -upload.upload("my_dataset") # this will fail, as you need to explicitly force it to be public -upload.upload("my_dataset", public=True) +upload.upload(my_dataset) # this will fail, as you need to explicitly force it to be public +upload.upload(my_dataset, public=True) ``` Or, alternatively to packaging and uploading, you can use this convenience function: -``` -package.add_to_cache("my_dataset", public=True) +```python +package.add_to_cache(my_dataset, public=True) ``` To download, which will download it directly to the tar cache directory, do: ``` from armory.datasets import download -download.download("my_dataset", overwrite=True, verify=True) +download.download(my_dataset, overwrite=True, verify=True) ``` You can also download and extract with: ``` from armory.datasets import load -load.ensure_download_extract("my_dataset", verify=True) +load.ensure_download_extract(my_dataset, verify=True) ``` or just try to load it directly ``` -load.load("my_dataset") +load.load(my_dataset) ``` # Running / Testing with current armory scenario files diff --git a/armory/datasets/cached_datasets.json b/armory/datasets/cached_datasets.json index a3b6a2fd8..add4b96e4 100644 --- a/armory/datasets/cached_datasets.json +++ b/armory/datasets/cached_datasets.json @@ -13,6 +13,13 @@ "url": null, "version": "1.0.8" }, + "librispeech_dev_test": { + "sha256": "5c5c6cb53e458e2415bc4f242122155d51f32d7e78770176afe01acb584c4caa", + "size": 2332265306, + "subdir": "librispeech_dev_test/2.1.0", + "url": null, + "version": "2.1.0" + }, "mnist": { "sha256": "fdc3408e29580367145e95ac7cb1d51e807105b174314cd52c16d27a13b98979", "size": 16920751, diff --git a/armory/datasets/preprocessing.py b/armory/datasets/preprocessing.py index 91e7c15b1..1ca0e4190 100644 --- a/armory/datasets/preprocessing.py +++ b/armory/datasets/preprocessing.py @@ -64,6 +64,24 @@ def xview(element): ) +@register +def librispeech(element, audio_kwargs=None): + # TODO: determine how to fix np.array([], dtype=object) output for text + # https://github.com/tensorflow/tensorflow/issues/34871 + # Our traditional behavior to decode to str once in numpy + # This can be done via: y.astype("U") + # Currently, this is handled by scenarios or metrics after dataset output + # NOTE: 16000 sampling rate + if audio_kwargs is None: + audio_kwargs = {} + text = element["text"] + speech = audio_to_canon(element["speech"], **audio_kwargs) + return (speech, text) + + +librispeech_dev_test = register(librispeech, "librispeech_dev_test") + + def image_to_canon(image, resize=None, target_dtype=tf.float32, input_type="uint8"): """ TFDS Image feature uses (height, width, channels) @@ -98,14 +116,6 @@ def audio_to_canon(audio, resample=None, target_dtype=tf.float32, input_type="in return audio -# config = { -# "preprocessor": "mnist(max_frames=1)" -# "preprocessor_kwargs": { -# "max_frames": null, -# } -# } - - def video_to_canon( video, resize=None, diff --git a/armory/datasets/standard/librispeech_dev_test/__init__.py b/armory/datasets/standard/librispeech_dev_test/__init__.py new file mode 100644 index 000000000..d84f1d722 --- /dev/null +++ b/armory/datasets/standard/librispeech_dev_test/__init__.py @@ -0,0 +1,3 @@ +"""librispeech_dev_test dataset.""" + +from .librispeech_dev_test import LibrispeechDevTest diff --git a/armory/datasets/standard/librispeech_dev_test/checksums.tsv b/armory/datasets/standard/librispeech_dev_test/checksums.tsv new file mode 100644 index 000000000..edb48d2cf --- /dev/null +++ b/armory/datasets/standard/librispeech_dev_test/checksums.tsv @@ -0,0 +1 @@ +# NOTE: This file is empty due to subclassing the existing tfds librispeech builder: https://github.com/tensorflow/datasets/blob/master/tensorflow_datasets/audio/librispeech.py diff --git a/armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py b/armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py new file mode 100644 index 000000000..5fcb31e72 --- /dev/null +++ b/armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py @@ -0,0 +1,41 @@ +""" +Subset of librispeech containing just 'dev' and 'test' splits. + +checksums.tsv is empty as it uses the underlying librispeech class. + +NOTE: In order to build, this requires apache beam installed. + In the container, do: `pip install apache-beam` + This is not installed by default due to older dependencies + +NOTE: when building, armory does not provide beam options by default + This makes building VERY slow unless overrides are provided + It is recommended that this is built directly using tfds on the command line + +Using DirectRunner with apache beam, can build with this: + tfds build /workspace/armory/datasets/standard/librispeech_dev_test --data_dir /armory/datasets/new_builds --force_checksums_validation --beam_pipeline_options="runner=DirectRunner,direct_num_workers=16,direct_running_mode=multi_processing" + See: https://beam.apache.org/releases/pydoc/2.43.0/_modules/apache_beam/options/pipeline_options.html#DirectOptions +""" + +import tensorflow_datasets as tfds +from tensorflow_datasets.audio import librispeech + +_SUBSET = ( + "dev_clean", + "dev_other", + "test_clean", + "test_other", +) +_DL_URLS = {k: v for k, v in librispeech._DL_URLS.items() if k in _SUBSET} + + +class LibrispeechDevTest(librispeech.Librispeech): + """DatasetBuilder for subset of Librispeech""" + + def _split_generators(self, dl_manager): + extracted_dirs = dl_manager.download_and_extract(_DL_URLS) + self._populate_metadata(extracted_dirs) + splits = [ + tfds.core.SplitGenerator(name=k, gen_kwargs={"directory": v}) + for k, v in extracted_dirs.items() + ] + return splits diff --git a/armory/docker/images.py b/armory/docker/images.py index 5f677fbd1..f8aca2eb2 100644 --- a/armory/docker/images.py +++ b/armory/docker/images.py @@ -15,14 +15,16 @@ TAG = version.to_docker_tag(armory.__version__) ARMORY_IMAGE_NAME = f"twosixarmory/armory:{TAG}" -DEEPSPEECH_IMAGE_NAME = f"twosixarmory/pytorch-deepspeech:{TAG}" IMAGE_MAP = { "armory": ARMORY_IMAGE_NAME, "tf2": ARMORY_IMAGE_NAME, "pytorch": ARMORY_IMAGE_NAME, "carla-mot": ARMORY_IMAGE_NAME, - "pytorch-deepspeech": DEEPSPEECH_IMAGE_NAME, +} +DEPRECATED_IMAGES_VERSION = { + "tf1": "< 0.15.0", + "pytorch-deepspeech": "<= 0.16.1", } @@ -65,8 +67,11 @@ def is_armory(image_name: str): user, repo, _ = split_name(image_name) if user and user != "twosixarmory": return False - if repo == "tf1": - raise ValueError("tf1 docker image is deprecated. Use Armory version < 0.15.0") + if repo in DEPRECATED_IMAGES_VERSION: + old_version = DEPRECATED_IMAGES_VERSION[repo] + raise ValueError( + f"{repo} docker image is deprecated. Use Armory version {old_version}" + ) return repo in IMAGE_MAP diff --git a/armory/metrics/task.py b/armory/metrics/task.py index 7d4bc78b9..fdc0e725f 100644 --- a/armory/metrics/task.py +++ b/armory/metrics/task.py @@ -22,7 +22,6 @@ set_namespace, result_formatter, ) -from armory.utils.external_repo import ExternalPipInstalledImport aggregate = MetricNameSpace() population = MetricNameSpace() @@ -139,11 +138,7 @@ def __init__(self, model_name="roberta-large-mnli", cache_dir=None): paths.runtime_paths().saved_model_dir, "huggingface" ) - with ExternalPipInstalledImport( - package="transformers", - dockerimage="twosixarmory/pytorch-deepspeech", - ): - from transformers import AutoTokenizer, AutoModelForSequenceClassification + from transformers import AutoTokenizer, AutoModelForSequenceClassification self.tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) self.model = AutoModelForSequenceClassification.from_pretrained( diff --git a/armory/scenarios/audio_asr.py b/armory/scenarios/audio_asr.py index 93c73aca8..22bb29b2c 100644 --- a/armory/scenarios/audio_asr.py +++ b/armory/scenarios/audio_asr.py @@ -110,5 +110,6 @@ def load_test_dataset(self, test_split_default="test_clean"): def _load_sample_exporter(self): return AudioExporter( self.export_dir, - self.test_dataset.context.sample_rate, + self.test_dataset.info.metadata["sample_rate"], # TODO: smarter way? + # self.test_dataset.info['speech'].sample_rate, # TODO: get in a smarter way ) diff --git a/armory/scenarios/audio_classification.py b/armory/scenarios/audio_classification.py index ef0aa1e90..7bcdf7545 100644 --- a/armory/scenarios/audio_classification.py +++ b/armory/scenarios/audio_classification.py @@ -16,5 +16,6 @@ def load_test_dataset(self): def _load_sample_exporter(self): return AudioExporter( self.export_dir, - self.test_dataset.context.sample_rate, + self.test_dataset.info.metadata["sample_rate"], # TODO: smarter way? + # self.test_dataset.info['speech'].sample_rate, ) diff --git a/docker/Dockerfile-pytorch-deepspeech b/docker/Dockerfile-pytorch-deepspeech deleted file mode 100644 index 7df8c8c39..000000000 --- a/docker/Dockerfile-pytorch-deepspeech +++ /dev/null @@ -1,45 +0,0 @@ -ARG base_image_tag - -FROM twosixarmory/base:${base_image_tag} AS armory-local - -WORKDIR /armory-repo - -# NOTE: This COPY command is filtered using the `.dockerignore` file -# in the root of the repo. -COPY ./ /armory-repo - -RUN pip install git+https://github.com/romesco/hydra-lightning/\#subdirectory=hydra-configs-pytorch-lightning - -RUN echo "Building Armory from local source" && \ - echo "Updating Base Image..." && \ - python -m pip install --upgrade pip && \ - echo "Installing Armory..." && \ - pip install --no-compile --no-cache-dir --editable '.[engine,deepspeech,jupyter]' && \ - echo "Configuring Armory..." && \ - armory configure --use-default && \ - echo "Cleaning up..." && \ - rm -rf /armory-repo/.git - -WORKDIR /workspace - - -# ------------------------------------------------------------------ -# DEVELOPER NOTES: -# ------------------------------------------------------------------ -# TODO: determine if this environment setup is needed -# $ ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/cuda/lib64" - -# NOTE: -# - pytorch-lightning >= 1.5.0 will break Deep Speech 2 -# - torchmetrics >= 0.8.0 will break pytorch-lightning 1.4 -# - hydra-lightning installs omegaconf -# - google-cloud-storage needed for checkpoint.py import -# - only sox python bindings are installed; underlying sox binaries not needed - -# NOTE: Listed dependencies of PyTorch Deep Speech 2, but do not appear -# to be used for inference (only for training), they are not installed: -# - torchelastic -# - wget -# - flask -# - fairscale -# ------------------------------------------------------------------ diff --git a/docker/README.md b/docker/README.md index f03934ba4..e295f608d 100755 --- a/docker/README.md +++ b/docker/README.md @@ -27,7 +27,7 @@ they can be built from the release branch of the repo: ``` git checkout -b r0.16.0 bash docker/build-base.sh -python docker/build.py [--no-pull] +python docker/build.py [--no-pull] ``` If possible, we recommend downloading the base image instead of building, which can be done by removing the `--no-pull` argument from `build.py`. diff --git a/docker/build.py b/docker/build.py index ff70ba801..16fcae74a 100644 --- a/docker/build.py +++ b/docker/build.py @@ -13,7 +13,7 @@ script_dir = Path(__file__).parent root_dir = script_dir.parent -armory_frameworks = ["armory", "pytorch-deepspeech"] +armory_frameworks = ["armory"] # NOTE: Podman is not officially supported, but this enables # use as a drop-in replacement for building. diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index e88c3e752..2f7acd238 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -56,7 +56,7 @@ Note: only release versions of armory will be published to [Dockerhub](https://h development branch images much be built locally using: ```bash cd YOUR_ARMORY_REPO -bash docker/build.sh dev +bash docker/build.sh dev ``` ## Style Guide diff --git a/docs/baseline_models.md b/docs/baseline_models.md index 4fde37b87..ede6900ca 100644 --- a/docs/baseline_models.md +++ b/docs/baseline_models.md @@ -37,8 +37,6 @@ The model files can be found in [armory/baseline_models/pytorch](../armory/basel | Model | S3 weight_files | |:----------: |:---------------------------------------------:| | Cifar10 CNN | | -| DeepSpeech 2 | | -| Sincnet CNN | `sincnet_librispeech_v1.pth` | | MARS | `mars_ucf101_v1.pth` , `mars_kinetics_v1.pth` | | ResNet50 CNN | `resnet50_imagenet_v1.pth` | | MNIST CNN | `undefended_mnist_5epochs.pth` | @@ -59,4 +57,4 @@ The weights for this model are downloaded from the link listed below. ### Preprocessing Functions Preprocessing functions have been moved inside each model's forward pass. This is to allow each -model to receive as input the canonicalized form of a dataset. \ No newline at end of file +model to receive as input the canonicalized form of a dataset. diff --git a/docs/datasets.md b/docs/datasets.md index 681147a2a..6d60a1850 100644 --- a/docs/datasets.md +++ b/docs/datasets.md @@ -56,14 +56,10 @@ The carla_over_obj_det_train dataset has the same properties as the above mentio | Dataset | Description | x_shape | x_dtype | y_shape | y_dtype | sampling_rate | splits | |:----------: |:-----------: |:-------: |:--------: |:--------: |:-------: |:-------: |:------: | | [digit](https://github.com/Jakobovski/free-spoken-digit-dataset) | Audio dataset of spoken digits | (N, variable_length) | int64 | (N,) | int64 | 8 kHz | train, test | -| [librispeech](http://www.openslr.org/12/) | Librispeech dataset for automatic speech recognition | (N, variable_length) | float32 | (N,) | bytes | 16 kHz | dev_clean, dev_other, test_clean, train_clean100 | -| [librispeech-full](http://www.openslr.org/12/) | Full Librispeech dataset for automatic speech recognition | (N, variable_length) | float32 | (N,) | bytes | 16 kHz | dev_clean, dev_other, test_clean, train_clean100, train_clean360, train_other500 | -| [librispeech_dev_clean](http://www.openslr.org/12/) | Librispeech dev dataset for speaker identification | (N, variable_length) | float32 | (N,) | int64 | 16 kHz | train, validation, test | -| [librispeech_dev_clean_asr](http://www.openslr.org/12) | Librispeech dev dataset for automatic speech recognition | (N, variable_length) | float32 | (N,) | bytes | 16 kHz | train, validation, test | +| [librispeech](http://www.openslr.org/12/) | Librispeech dataset for automatic speech recognition (NOTE: not currently cached. Use TFDS builder.) | (N, variable_length) | float32 | (N,) | bytes | 16 kHz | dev_clean, dev_other, test_clean, test_other, train_clean100, train_clean360, train_other500 | +| [librispeech_dev_test](http://www.openslr.org/12/) | Librispeech with ontly dev and test splits | (N, variable_length) | float32 | (N,) | int64 | 16 kHz | dev_clean, dev_other, test_clean, test_other | | [speech_commands](https://www.tensorflow.org/datasets/catalog/speech_commands) | Speech commands dataset for audio poisoning | (N, variable_length) | float32 | (N,) | int64 | 16 kHz | train, validation, test | -NOTE: because the Librispeech dataset is over 300 GB with all splits, the ```librispeech_full``` dataset has -all splits, whereas the ```librispeech``` dataset does not have the train_clean360 or train_other500 splits.
### Video Datasets @@ -101,9 +97,6 @@ Tensorflow Datasets [library](https://www.tensorflow.org/datasets/catalog/overvi | resisc_45 | train | First 5/7 of dataset | See armory/data/resisc45/resisc45_dataset_partition.py | | | validation | Next 1/7 of dataset | | | | test | Final 1/7 of dataset | | -| librispeech_dev_clean | train | 1371 recordings from dev_clean dataset | Assign discrete clips so at least 50% of audio time | -| | validation | 692 recordings from dev_clean dataset | is in train, at least 25% is in validation, | -| | test | 640 recordings from dev_clean dataset | and the remainder are in test |
diff --git a/docs/docker.md b/docs/docker.md index e1813fe20..853bc30b5 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -4,16 +4,16 @@ inside a docker container. ## Images -There are two docker images that are currently published to dockerhub for every release of +There is a single docker image that is currently published to dockerhub for every release of the armory framework: 1. `twosixarmory/armory:` -2. `twosixarmory/pytorch-deepspeech:` NOTE: as of Armory version 0.15.0, we no longer support or publish a `tf1` image. -If `tf1` functionality is needed, please use the `tf2` image and use `tf1` compatibility mode. +If `tf1` functionality is needed, please use the `armory` image and use `tf1` compatibility mode. +NOTE: as of Armory version > 0.16.1, we no longer support the `pytorch-deepspeech` image. -We additionally publish a base image, `twosixarmory/base:latest`, from which the three main images are derived. +We additionally publish a base image, `twosixarmory/base:latest`, from which the main image is derived. This is updated less frequently, and each release does not necessarily have a corresponding new base. When using `armory launch` or `armory exec` the framework specific arguments will @@ -87,7 +87,7 @@ they can be built from the release branch of the repo: ``` git checkout -b r0.16.0 bash docker/build-base.sh -python docker/build.py [--no-pull] +python docker/build.py [--no-pull] ``` If possible, we recommend downloading the base image instead of building, which can be done by removing the `--no-pull` argument from `build.py`. diff --git a/docs/getting_started.md b/docs/getting_started.md index 02a643170..b91277703 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -6,7 +6,7 @@ Armory can be installed from PyPi: pip install armory-testbed[framework-flavor] ``` -Where `framework-flavor` is one of `tensorflow`, `pytorch` or `deepspeech` +Where `framework-flavor` is one of `tensorflow` or `pytorch` as described below in [the armory flavors](#the-armory-flavors). When a user runs a given configuration file, the necessary docker image, datasets and @@ -61,7 +61,6 @@ Armory supports multiple frameworks: - tensorflow - pytorch - - deepspeech In releases prior to 0.16, there was a complex set of `*-requirements.txt` files that were needed to provision the python environment for the various frameworks. @@ -82,17 +81,13 @@ which installs the libraries needed for tensorflow evaluations. Similarly, pip install armory-testbed[pytorch] -or - - pip install armory-testbed[deepspeech] - depending on the framework you want to use. We don't recommend trying to install multiple frameworks at the same time as this may lead to dependency -conflicts. So +conflicts. So, pip install armory-testbed[tensorflow,pytorch] -is unsupported and may not even install. +may not install. ## additional flavors @@ -103,7 +98,7 @@ You can freely add `jupyterlab` to the flavor list to as needed, for example People developing armory will likely want to add the `developer` flavor to their set: - pip install armory-testbed[deepspeech,developer,jupyterlab] + pip install armory-testbed[developer,jupyterlab] Developers who are creating new Armory datasets will need diff --git a/docs/index.md b/docs/index.md index 13e9933f8..d991b857b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -35,14 +35,14 @@ terminal where this command was ran. Please see [running_armory_scenarios_intera 2) `armory launch` -* `armory launch --interactive`. +* `armory launch --interactive`. This will launch a framework specific container, with appropriate mounted volumes, for the user to attach to for debugging purposes. A command to attach to the container will be returned from this call, and it can be ran in a separate terminal. To later close the interactive container simply run CTRL+C from the terminal where this command was ran. -* `armory launch --jupyter`. +* `armory launch --jupyter`. Similar to the interactive launch, this will spin up a container for a specific framework, but will instead return the web address of a jupyter lab server where debugging can be performed. To close the jupyter server simply run CTRL+C from the @@ -50,12 +50,12 @@ terminal where this command was ran. 3) `armory exec` -* `armory exec -- `. +* `armory exec -- `. This will run a specific command within a framework specific container. A notable use case for this would be to run test cases using pytest. After completion of the command the container will be removed. -To use custom docker images with `launch` or `exec`, replace `` with its +To use custom docker images with `launch` or `exec`, replace `` with its full name: ``. For use with `run`, you will need to modify the [configuration file](configuration_files.md). diff --git a/docs/no_docker_mode.md b/docs/no_docker_mode.md index e99308fea..af930089d 100644 --- a/docs/no_docker_mode.md +++ b/docs/no_docker_mode.md @@ -31,18 +31,9 @@ Once this is complete, and you have ensured you are in the `[armory-repo]` direc you can setup the environment with the following: ```bash pip install --upgrade pip==22.0.3 -pip install -e .[engine,datasets,math,pytorch,deepspeech,tensorflow] +pip install -e .[engine,datasets,math,pytorch,tensorflow] ``` -If you are using the `deepspeech` scenarios, you will also need to -install the `hydra-lightning` configs with: - - pip install git+https://github.com/romesco/hydra-lightning/#subdirectory=hydra-configs-pytorch-lightning - -as described [in that package's README](https://github.com/romesco/hydra-lightning#readme). -This is necessary because there is no proper release of that package (nor does one -appear likely). - Once this completes, you should run `armory configure` (If you haven't already done this previously) to setup the armory configuration (e.g. dataset download directory, output directory, etc.). diff --git a/pyproject.toml b/pyproject.toml index 8fd31cee1..46c2aec37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,8 @@ engine = [ "botocore" , # Needed for armory.data.utils "ffmpeg-python", # Needed for armory.utils.export "pydub", # this is in ART's extra-requires + "librosa", # audio poisoning + "transformers", # audio metrics "tidecv", # Needed for TIDE metrics # Both `opencv-python` and `opencv-python-headless` must specify # the same version. @@ -92,17 +94,6 @@ tensorflow = [ "tensorflow >= 2.10.0", ] -deepspeech = [ - "armory-testbed[pytorch,engine,datasets,math]", - "python-levenshtein", - "torchmetrics < 0.8.0", - "sox", - "librosa", - "google-cloud-storage", - "transformers", - "pytorch-lightning < 1.5.0", -] - math = [ "numpy", "pandas", @@ -202,5 +193,4 @@ markers = [ "docker_required", # This Test Requires Docker to run "end_to_end", # Full End to End Test (typically slow) "unit", # These are Unit Tests (fast and lightweight) - "pytorch_deepspeech", # This test requires pytorch-deepspeech container (or equivalent dependencies) to run ] diff --git a/scenario_configs/asr_librispeech_entailment.json b/scenario_configs/asr_librispeech_entailment.json index 752937374..b3038f281 120000 --- a/scenario_configs/asr_librispeech_entailment.json +++ b/scenario_configs/asr_librispeech_entailment.json @@ -1 +1 @@ -eval5/asr_librispeech/entailment.json \ No newline at end of file +eval6/asr_librispeech/hubert_entailment.json \ No newline at end of file diff --git a/scenario_configs/asr_librispeech_targeted.json b/scenario_configs/asr_librispeech_targeted.json index 04b2e2ac6..37d7e1bd6 120000 --- a/scenario_configs/asr_librispeech_targeted.json +++ b/scenario_configs/asr_librispeech_targeted.json @@ -1 +1 @@ -eval5/asr_librispeech/untargeted_snr_pgd.json \ No newline at end of file +eval6/asr_librispeech/hubert_targeted_snr_pgd.json \ No newline at end of file diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_defended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_defended.json deleted file mode 100755 index cdecb16ce..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_defended.json +++ /dev/null @@ -1,92 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "decrease_factor_alpha": 0.5, - "decrease_factor_eps": 0.5, - "eps": 0.0075, - "global_max_length": 562480, - "increase_factor_alpha": 2.0, - "initial_rescale": 1.0, - "learning_rate_1": 0.0001, - "learning_rate_2": 1e-07, - "max_iter_1": 400, - "max_iter_2": 100, - "num_iter_decrease_alpha": 50 - }, - "module": "art.attacks.evasion.imperceptible_asr.imperceptible_asr_pytorch", - "name": "ImperceptibleASRPyTorch", - "targeted": true, - "targeted_labels": { - "scheme": "matched length", - "transcripts": [ - "REALLY SHORT TEST STRING", - "THE TEST STRING HAS A LENGTH EQUAL TO THE MEDIAN OF THE CLEAN TEST TRANSCRIPT LENGTHS", - "THIS IS AN EXCEEDINGLY LONG TEST STRING BUT NOT REALLY AS THE LONGEST STRING HAS OVER FIVE HUNDRED CHARACTERS IN ITS TRANSCRIPT AND INCLUDES A LIST OF PEOPLE AND SPEAKS OF A SENATOR FROM NEW JERSEY" - ] - }, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": { - "kwargs": { - "apply_fit": false, - "apply_predict": true, - "channels_first": false, - "sample_rate": 16000, - "verbose": false - }, - "module": "art.defences.preprocessor", - "name": "Mp3CompressionPyTorch", - "type": "Preprocessor" - }, - "metric": { - "means": false, - "perturbation": "snr_db", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_undefended.json deleted file mode 100755 index 71b02f0dc..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_undefended.json +++ /dev/null @@ -1,81 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "decrease_factor_alpha": 0.5, - "decrease_factor_eps": 0.5, - "eps": 0.0075, - "global_max_length": 562480, - "increase_factor_alpha": 2.0, - "initial_rescale": 1.0, - "learning_rate_1": 0.0001, - "learning_rate_2": 1e-07, - "max_iter_1": 400, - "max_iter_2": 100, - "num_iter_decrease_alpha": 50 - }, - "module": "art.attacks.evasion.imperceptible_asr.imperceptible_asr_pytorch", - "name": "ImperceptibleASRPyTorch", - "targeted": true, - "targeted_labels": { - "scheme": "matched length", - "transcripts": [ - "REALLY SHORT TEST STRING", - "THE TEST STRING HAS A LENGTH EQUAL TO THE MEDIAN OF THE CLEAN TEST TRANSCRIPT LENGTHS", - "THIS IS AN EXCEEDINGLY LONG TEST STRING BUT NOT REALLY AS THE LONGEST STRING HAS OVER FIVE HUNDRED CHARACTERS IN ITS TRANSCRIPT AND INCLUDES A LIST OF PEOPLE AND SPEAKS OF A SENATOR FROM NEW JERSEY" - ] - }, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": null, - "metric": { - "means": false, - "perturbation": "snr_db", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_defended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_defended.json deleted file mode 100755 index c4d41fb71..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_defended.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "partial_attack": false, - "snr_db": 20, - "targeted": false - }, - "module": "armory.art_experimental.attacks.kenansville_dft", - "name": "KenansvilleDFT", - "use_label": false - }, - "dataset": { - "batch_size": 8, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": { - "kwargs": { - "apply_fit": false, - "apply_predict": true, - "channels_first": false, - "sample_rate": 16000, - "verbose": false - }, - "module": "art.defences.preprocessor", - "name": "Mp3Compression", - "type": "Preprocessor" - }, - "metric": { - "means": false, - "perturbation": "snr_db", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_undefended.json deleted file mode 100755 index 1a8e25bed..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_undefended.json +++ /dev/null @@ -1,64 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "partial_attack": false, - "snr_db": 20, - "targeted": false - }, - "module": "armory.art_experimental.attacks.kenansville_dft", - "name": "KenansvilleDFT", - "use_label": false - }, - "dataset": { - "batch_size": 8, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": null, - "metric": { - "means": false, - "perturbation": "snr_db", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_defended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_defended.json deleted file mode 100755 index c54f8ef78..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_defended.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 1.5, - "eps_step": 0.05, - "max_iter": 100, - "norm": 2, - "num_random_init": 0, - "random_eps": false, - "targeted": false, - "verbose": false - }, - "module": "art.attacks.evasion", - "name": "ProjectedGradientDescent", - "targeted": false, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": { - "kwargs": { - "apply_fit": false, - "apply_predict": true, - "channels_first": false, - "sample_rate": 16000, - "verbose": false - }, - "module": "art.defences.preprocessor", - "name": "Mp3Compression", - "type": "Preprocessor" - }, - "metric": { - "means": false, - "perturbation": "snr_db", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_multipath_channel_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_multipath_channel_undefended.json deleted file mode 100755 index ac814e83a..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_multipath_channel_undefended.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "audio_channel": { - "attenuation": 0.5, - "delay": 300, - "pytorch": true - }, - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 1.5, - "eps_step": 0.05, - "max_iter": 100, - "norm": 2, - "num_random_init": 0, - "random_eps": false, - "targeted": false, - "verbose": false - }, - "module": "art.attacks.evasion", - "name": "ProjectedGradientDescent", - "targeted": false, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": null, - "metric": { - "means": false, - "perturbation": "snr_db", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_undefended.json deleted file mode 100755 index 94a7bef1c..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_undefended.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 1.5, - "eps_step": 0.05, - "max_iter": 100, - "norm": 2, - "num_random_init": 0, - "random_eps": false, - "targeted": false, - "verbose": false - }, - "module": "art.attacks.evasion", - "name": "ProjectedGradientDescent", - "targeted": false, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": null, - "metric": { - "means": false, - "perturbation": "snr_db", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_targeted.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_targeted.json deleted file mode 100644 index 263adccac..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_targeted.json +++ /dev/null @@ -1,81 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 10, - "eps_step": 0.5, - "max_iter": 10, - "norm": "snr", - "num_random_init": 0, - "targeted": true - }, - "module": "armory.art_experimental.attacks.snr_pgd", - "name": "SNR_PGD_Numpy", - "targeted": true, - "targeted_labels": { - "kwargs": { - "import_from": "armory.attacks.librispeech_target_labels", - "transcripts": "matched_length" - }, - "module": "armory.utils.labels", - "name": "MatchedTranscriptLengthTargeter" - }, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": null, - "metric": { - "means": false, - "perturbation": "linf", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_undefended.json deleted file mode 100755 index 9ed517ef0..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_undefended.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 10, - "eps_step": 0.5, - "max_iter": 10, - "norm": "snr", - "num_random_init": 0, - "targeted": true - }, - "module": "armory.art_experimental.attacks.snr_pgd", - "name": "SNR_PGD_Numpy", - "targeted": true, - "targeted_labels": { - "kwargs": { - "value": "TEST STRING" - }, - "module": "armory.utils.labels", - "name": "FixedStringTargeter" - }, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": null, - "metric": { - "means": false, - "perturbation": "linf", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet.json b/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet.json deleted file mode 100644 index 71d688d97..000000000 --- a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "_description": "Librispeech_dev_clean raw audio classification, contributed by MITRE Corporation", - "adhoc": null, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 0.2, - "eps_step": 0.1, - "minimal": false, - "num_random_init": 0, - "targeted": false - }, - "module": "art.attacks.evasion", - "name": "FastGradientMethod", - "use_label": false - }, - "dataset": { - "batch_size": 1, - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech_dev_clean" - }, - "defense": null, - "metric": { - "means": true, - "perturbation": "linf", - "record_metric_per_sample": false, - "task": [ - "categorical_accuracy" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "fit_batch_size": 16, - "nb_epochs": 20000 - }, - "model_kwargs": { - "predict_mode": "all" - }, - "module": "armory.baseline_models.pytorch.sincnet", - "name": "get_art_model", - "weights_file": "sincnet_librispeech_v1.pth", - "wrapper_kwargs": { - "clip_values": [ - -1.0, - 1.0 - ] - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_classification", - "name": "AudioClassificationTask" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch", - "external_github_repo": "hkakitani/SincNet", - "gpus": "all", - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json b/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json deleted file mode 100644 index 8ea65668d..000000000 --- a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json +++ /dev/null @@ -1,69 +0,0 @@ -{ - "_description": "Librispeech_dev_clean raw audio classification, contributed by MITRE Corporation", - "adhoc": null, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 10, - "eps_step": 0.5, - "max_iter": 10, - "norm": "snr", - "num_random_init": 0, - "targeted": false - }, - "module": "armory.art_experimental.attacks.snr_pgd", - "name": "SNR_PGD", - "use_label": false - }, - "dataset": { - "batch_size": 1, - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech_dev_clean" - }, - "defense": null, - "metric": { - "means": true, - "perturbation": [ - "snr", - "snr_db" - ], - "record_metric_per_sample": true, - "task": [ - "categorical_accuracy" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "fit_batch_size": 16, - "nb_epochs": 20000 - }, - "model_kwargs": { - "predict_mode": "all" - }, - "module": "armory.baseline_models.pytorch.sincnet", - "name": "get_art_model", - "weights_file": "sincnet_librispeech_v1.pth", - "wrapper_kwargs": { - "clip_values": [ - -1.0, - 1.0 - ] - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_classification", - "name": "AudioClassificationTask" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch", - "external_github_repo": "hkakitani/SincNet", - "gpus": "all", - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_targeted.json b/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_targeted.json deleted file mode 100644 index 526353755..000000000 --- a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_targeted.json +++ /dev/null @@ -1,72 +0,0 @@ -{ - "_description": "Librispeech_dev_clean raw audio classification, contributed by MITRE Corporation", - "adhoc": null, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 0.2, - "eps_step": 0.1, - "minimal": false, - "num_random_init": 0, - "targeted": true - }, - "module": "art.attacks.evasion", - "name": "FastGradientMethod", - "targeted_labels": { - "kwargs": { - "num_classes": 40 - }, - "module": "armory.utils.labels", - "name": "RoundRobinTargeter" - }, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech_dev_clean" - }, - "defense": null, - "metric": { - "means": true, - "perturbation": "linf", - "record_metric_per_sample": false, - "task": [ - "categorical_accuracy" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "fit_batch_size": 16, - "nb_epochs": 20000 - }, - "model_kwargs": { - "predict_mode": "all" - }, - "module": "armory.baseline_models.pytorch.sincnet", - "name": "get_art_model", - "weights_file": "sincnet_librispeech_v1.pth", - "wrapper_kwargs": { - "clip_values": [ - -1.0, - 1.0 - ] - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_classification", - "name": "AudioClassificationTask" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch", - "external_github_repo": "hkakitani/SincNet", - "gpus": "all", - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval5/asr_librispeech/defended_entailment.json b/scenario_configs/eval5/asr_librispeech/defended_entailment.json deleted file mode 100644 index 5727d7654..000000000 --- a/scenario_configs/eval5/asr_librispeech/defended_entailment.json +++ /dev/null @@ -1,97 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 20, - "eps_step": 0.05, - "max_iter": 500, - "norm": "snr", - "num_random_init": 0, - "targeted": true - }, - "module": "armory.art_experimental.attacks.snr_pgd", - "name": "SNR_PGD_Numpy", - "targeted": true, - "targeted_labels": { - "kwargs": { - "dtype": "str", - "import_from": "armory.attacks.librispeech_target_labels", - "values": "entailment_100" - }, - "module": "armory.utils.labels", - "name": "ManualTargeter" - }, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": { - "kwargs": { - "apply_fit": false, - "apply_predict": true, - "channels_first": false, - "sample_rate": 16000, - "verbose": false - }, - "module": "art.defences.preprocessor", - "name": "Mp3Compression", - "type": "Preprocessor" - }, - "metric": { - "means": false, - "perturbation": "snr_db", - "record_metric_per_sample": true, - "task": [ - "entailment", - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": [ - "SeanNaren/deepspeech.pytorch@V3.0" - ], - "gpus": "all", - "local_repo_path": null, - "num_eval_batches": 100, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval5/asr_librispeech/defended_targeted_snr_pgd.json b/scenario_configs/eval5/asr_librispeech/defended_targeted_snr_pgd.json deleted file mode 100644 index c9ff3fdb2..000000000 --- a/scenario_configs/eval5/asr_librispeech/defended_targeted_snr_pgd.json +++ /dev/null @@ -1,92 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 20, - "eps_step": 0.5, - "max_iter": 500, - "norm": "snr", - "num_random_init": 0, - "targeted": true - }, - "module": "armory.art_experimental.attacks.snr_pgd", - "name": "SNR_PGD_Numpy", - "targeted": true, - "targeted_labels": { - "kwargs": { - "import_from": "armory.attacks.librispeech_target_labels", - "transcripts": "matched_length" - }, - "module": "armory.utils.labels", - "name": "MatchedTranscriptLengthTargeter" - }, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": { - "kwargs": { - "apply_fit": false, - "apply_predict": true, - "channels_first": false, - "sample_rate": 16000, - "verbose": false - }, - "module": "art.defences.preprocessor", - "name": "Mp3Compression", - "type": "Preprocessor" - }, - "metric": { - "means": false, - "perturbation": "linf", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval5/asr_librispeech/untargeted_snr_pgd.json b/scenario_configs/eval5/asr_librispeech/untargeted_snr_pgd.json deleted file mode 100644 index 58a8c1af8..000000000 --- a/scenario_configs/eval5/asr_librispeech/untargeted_snr_pgd.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 20, - "eps_step": 0.5, - "max_iter": 500, - "norm": "snr", - "num_random_init": 0, - "targeted": false - }, - "module": "armory.art_experimental.attacks.snr_pgd", - "name": "SNR_PGD_Numpy", - "targeted": false, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": null, - "metric": { - "means": false, - "perturbation": "linf", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval5/asr_librispeech/defended_untargeted_snr_pgd.json b/scenario_configs/eval6/asr_librispeech/hubert_defended_untargeted.json similarity index 70% rename from scenario_configs/eval5/asr_librispeech/defended_untargeted_snr_pgd.json rename to scenario_configs/eval6/asr_librispeech/hubert_defended_untargeted.json index 4c128b261..006318915 100644 --- a/scenario_configs/eval5/asr_librispeech/defended_untargeted_snr_pgd.json +++ b/scenario_configs/eval6/asr_librispeech/hubert_defended_untargeted.json @@ -1,5 +1,5 @@ { - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", + "_description": "Baseline HuBERT ASR on LibriSpeech", "adhoc": { "skip_adversarial": false }, @@ -20,12 +20,11 @@ "use_label": false }, "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" + "test": { + "batch_size": 1, + "name": "librispeech_dev_test", + "split": "test_clean" + } }, "defense": { "kwargs": { @@ -41,7 +40,7 @@ }, "metric": { "means": false, - "perturbation": "linf", + "perturbation": "snr_db", "record_metric_per_sample": true, "task": [ "word_error_rate" @@ -53,19 +52,13 @@ "nb_epochs": 20000 }, "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", + "module": "armory.baseline_models.pytorch.hubert_asr_large", "name": "get_art_model", "predict_kwargs": { "transcription_output": true }, "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } + "wrapper_kwargs": {} }, "scenario": { "kwargs": {}, @@ -73,8 +66,8 @@ "name": "AutomaticSpeechRecognition" }, "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", + "docker_image": "twosixarmory/pytorch", + "external_github_repo": null, "gpus": "all", "local_repo_path": null, "output_dir": null, diff --git a/scenario_configs/eval5/asr_librispeech/entailment.json b/scenario_configs/eval6/asr_librispeech/hubert_entailment.json similarity index 70% rename from scenario_configs/eval5/asr_librispeech/entailment.json rename to scenario_configs/eval6/asr_librispeech/hubert_entailment.json index 21f5ff3e1..1a3388db5 100644 --- a/scenario_configs/eval5/asr_librispeech/entailment.json +++ b/scenario_configs/eval6/asr_librispeech/hubert_entailment.json @@ -1,5 +1,5 @@ { - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", + "_description": "Baseline HuBERT ASR on LibriSpeech", "adhoc": { "skip_adversarial": false }, @@ -29,12 +29,11 @@ "use_label": false }, "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" + "test": { + "batch_size": 1, + "name": "librispeech_dev_test", + "split": "test_clean" + } }, "defense": null, "metric": { @@ -52,19 +51,13 @@ "nb_epochs": 20000 }, "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", + "module": "armory.baseline_models.pytorch.hubert_asr_large", "name": "get_art_model", "predict_kwargs": { "transcription_output": true }, "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } + "wrapper_kwargs": {} }, "scenario": { "kwargs": {}, @@ -72,13 +65,10 @@ "name": "AutomaticSpeechRecognition" }, "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": [ - "SeanNaren/deepspeech.pytorch@V3.0" - ], + "docker_image": "twosixarmory/pytorch", + "external_github_repo": null, "gpus": "all", "local_repo_path": null, - "num_eval_batches": 100, "output_dir": null, "output_filename": null, "use_gpu": false diff --git a/scenario_configs/eval5/asr_librispeech/targeted_snr_pgd.json b/scenario_configs/eval6/asr_librispeech/hubert_targeted_snr_pgd.json similarity index 69% rename from scenario_configs/eval5/asr_librispeech/targeted_snr_pgd.json rename to scenario_configs/eval6/asr_librispeech/hubert_targeted_snr_pgd.json index f650a46eb..5f469cca6 100644 --- a/scenario_configs/eval5/asr_librispeech/targeted_snr_pgd.json +++ b/scenario_configs/eval6/asr_librispeech/hubert_targeted_snr_pgd.json @@ -1,5 +1,5 @@ { - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", + "_description": "Baseline HuBERT ASR on LibriSpeech", "adhoc": { "skip_adversarial": false }, @@ -28,17 +28,16 @@ "use_label": false }, "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" + "test": { + "batch_size": 1, + "name": "librispeech_dev_test", + "split": "test_clean" + } }, "defense": null, "metric": { "means": false, - "perturbation": "linf", + "perturbation": "snr_db", "record_metric_per_sample": true, "task": [ "word_error_rate" @@ -50,19 +49,13 @@ "nb_epochs": 20000 }, "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", + "module": "armory.baseline_models.pytorch.hubert_asr_large", "name": "get_art_model", "predict_kwargs": { "transcription_output": true }, "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } + "wrapper_kwargs": {} }, "scenario": { "kwargs": {}, @@ -70,8 +63,8 @@ "name": "AutomaticSpeechRecognition" }, "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", + "docker_image": "twosixarmory/pytorch", + "external_github_repo": null, "gpus": "all", "local_repo_path": null, "output_dir": null, diff --git a/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json b/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json index 25b1b5bc0..3ce122237 100644 --- a/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json +++ b/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json @@ -1,5 +1,5 @@ { - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", + "_description": "Baseline HuBERT ASR on LibriSpeech", "adhoc": { "skip_adversarial": false }, @@ -20,17 +20,16 @@ "use_label": false }, "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" + "test": { + "batch_size": 1, + "name": "librispeech_dev_test", + "split": "test_clean" + } }, "defense": null, "metric": { "means": false, - "perturbation": "linf", + "perturbation": "snr_db", "record_metric_per_sample": true, "task": [ "word_error_rate" diff --git a/scenario_configs/speaker_id_librispeech.json b/scenario_configs/speaker_id_librispeech.json deleted file mode 120000 index c9d0b713e..000000000 --- a/scenario_configs/speaker_id_librispeech.json +++ /dev/null @@ -1 +0,0 @@ -eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index e6a4db595..5e1c2a49a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,10 +14,7 @@ logger = logging.getLogger(__name__) REQUIRED_DOCKER_IMAGES = [ - f"twosixarmory/pytorch:{__version__}", - f"twosixarmory/tf2:{__version__}", - f"twosixarmory/pytorch-deepspeech:{__version__}", - f"twosixarmory/carla-mot:{__version__}", + f"twosixarmory/armory:{__version__}", ] diff --git a/tests/unit/test_docker_build_script.py b/tests/unit/test_docker_build_script.py index 65fe1f2ed..e78b7404c 100644 --- a/tests/unit/test_docker_build_script.py +++ b/tests/unit/test_docker_build_script.py @@ -43,12 +43,8 @@ def image_tag(armory_version_tbi): @pytest.mark.parametrize( "img, opt", [ - # ("base", ""), - ("pytorch", ""), - ("tf2", ""), - ("pytorch-deepspeech", ""), - ("pytorch-deepspeech", "--no-cache"), - # ("base", "--no-cache"), + ("armory", ""), + ("armory", "--no-cache"), ], ) def test_build_script(img, opt, image_tag, armory_version_tbi): diff --git a/tests/unit/test_task_metrics.py b/tests/unit/test_task_metrics.py index 12f028826..32056f73b 100644 --- a/tests/unit/test_task_metrics.py +++ b/tests/unit/test_task_metrics.py @@ -13,7 +13,6 @@ @pytest.mark.docker_required -@pytest.mark.pytorch_deepspeech @pytest.mark.slow def test_entailment(): """