diff --git a/.github/workflows/3-test-docker.yml b/.github/workflows/3-test-docker.yml
index ac6b59061..c3c443ae1 100755
--- a/.github/workflows/3-test-docker.yml
+++ b/.github/workflows/3-test-docker.yml
@@ -44,50 +44,3 @@ jobs:
                 -c /armory-repo/pyproject.toml        \
                 -m "not docker_required and unit"     \
                 ./tests/
-
-
-  docker-deepspeech-unit:
-    name: ☁️ Docker Deepspeech Image Tests
-    runs-on: ubuntu-latest
-    steps:
-      - name: 🐄 checkout armory full depth with tags for scm
-        uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-
-      - name: 🐍 Use Python 3.9
-        uses: actions/setup-python@v4
-        with:
-          python-version: 3.9
-
-      - name: ⚙️ Installing Armory
-        shell: bash
-        run: |
-          pip install .
-          armory configure --use-defaults
-
-      - name: 🚧 Build the Container
-        run: |
-          python docker/build.py --framework pytorch-deepspeech
-
-      - name: 🤞 Run Image tests
-        run: |
-          IMAGE_VERSION=`armory --show-docker-version-tag`
-
-          docker run                                           \
-            --rm                                               \
-            --workdir /armory-repo                             \
-            twosixarmory/pytorch-deepspeech:${IMAGE_VERSION}  \
-              pytest                                           \
-                -c /armory-repo/pyproject.toml                 \
-                -m "not docker_required and unit"              \
-                ./tests/
-
-            docker run                                         \
-            --rm                                               \
-            --workdir /armory-repo                             \
-            twosixarmory/pytorch-deepspeech:${IMAGE_VERSION}  \
-              pytest                                           \
-                -c /armory-repo/pyproject.toml                 \
-                -m "pytorch_deepspeech"                        \
-                ./tests/
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index d24b48ba9..aebb6c6bb 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -74,7 +74,6 @@ jobs:
       matrix:
         include:
           - image: armory
-          - image: pytorch-deepspeech
     steps:
       - name: 🐍 Setup Python 3.9
         uses: actions/setup-python@v4
diff --git a/README.md b/README.md
index 992d753f8..ace021c6c 100644
--- a/README.md
+++ b/README.md
@@ -61,21 +61,21 @@ from the evaluation can be found in the output directory. To later close the
 interactive container simply run CTRL+C from the terminal where this command was ran.
 
 ## armory launch
-* `armory launch <armory|pytorch-deepspeech>`
+* `armory launch <armory>`
 This will launch a framework specific container, with appropriate mounted volumes, for
 the user to attach to for debugging purposes. A command to attach to the container will
 be returned from this call, and it can be ran in a separate terminal. To later close
 the interactive container simply run CTRL+C from the terminal where this command was
 ran.
 
-* `armory launch <armory|pytorch-deepspeech> --jupyter`.
+* `armory launch <armory> --jupyter`.
 Similar to the interactive launch, this will spin up a container for a specific
 framework, but will instead return the web address of a jupyter lab server where
 debugging can be performed. To close the jupyter server simply run CTRL+C from the
 terminal where this command was ran.
 
 ## armory exec
-* `armory exec <armory|pytorch-deepspeech> -- <cmd>`
+* `armory exec <armory> -- <cmd>`
 This will run a specific command within a framework specific container. A notable use
 case for this would be to run test cases using pytest. After completion of the command
 the container will be removed.
diff --git a/armory/__main__.py b/armory/__main__.py
index 038161020..b058d77f8 100755
--- a/armory/__main__.py
+++ b/armory/__main__.py
@@ -190,7 +190,7 @@ def _docker_image(parser):
         "docker_image",
         metavar="<docker image>",
         type=str,
-        help="docker image framework: 'armory', or 'pytorch-deepspeech'",
+        help="docker image framework: 'armory'",
         action=DockerImage,
     )
 
@@ -201,7 +201,7 @@ def _docker_image_optional(parser):
         default=armory.docker.images.ARMORY_IMAGE_NAME,
         metavar="<docker image>",
         type=str,
-        help="docker image framework: 'armory', or 'pytorch-deepspeech'",
+        help="docker image framework: 'armory'",
         action=DockerImage,
     )
 
diff --git a/armory/baseline_models/pytorch/deep_speech.py b/armory/baseline_models/pytorch/deep_speech.py
deleted file mode 100644
index b3efd23d1..000000000
--- a/armory/baseline_models/pytorch/deep_speech.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""
-Automatic speech recognition model
-
-Model contributed by: MITRE Corporation
-"""
-
-from typing import Optional
-
-from art.estimators.speech_recognition import PyTorchDeepSpeech
-
-from armory.utils.external_repo import ExternalRepoImport
-
-# Test for external repo at import time to fail fast
-with ExternalRepoImport(
-    repo="SeanNaren/deepspeech.pytorch@V3.0",
-    experiment="librispeech_asr_snr_undefended.json",
-):
-    from deepspeech_pytorch.model import DeepSpeech  # noqa: F401
-
-
-def get_art_model(
-    model_kwargs: dict, wrapper_kwargs: dict, weights_path: Optional[str] = None
-) -> PyTorchDeepSpeech:
-    return PyTorchDeepSpeech(**wrapper_kwargs)
diff --git a/armory/baseline_models/pytorch/sincnet.py b/armory/baseline_models/pytorch/sincnet.py
deleted file mode 100644
index 37401045d..000000000
--- a/armory/baseline_models/pytorch/sincnet.py
+++ /dev/null
@@ -1,289 +0,0 @@
-"""
-CNN model for raw audio classification
-
-Model contributed by: MITRE Corporation
-Adapted from: https://github.com/mravanelli/SincNet
-"""
-from typing import Optional
-
-from art.estimators.classification import PyTorchClassifier
-import numpy as np
-import torch
-from torch import nn
-
-from armory.utils.external_repo import ExternalRepoImport
-
-with ExternalRepoImport(
-    repo="hkakitani/SincNet",
-    experiment="librispeech_baseline_sincnet.json",
-):
-    from SincNet import dnn_models
-
-# NOTE: Underlying dataset sample rate is 16 kHz. SincNet uses this SAMPLE_RATE to
-# determine internal filter high cutoff frequency.
-SAMPLE_RATE = 8000
-WINDOW_STEP_SIZE = 375
-WINDOW_LENGTH = int(SAMPLE_RATE * WINDOW_STEP_SIZE / 1000)
-
-DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-
-def numpy_random_preprocessing_fn(batch: np.ndarray):
-    """
-    Standardize, then normalize sound clips
-
-    Then generate a random cut of the input
-    """
-    processed_batch = []
-    for clip in batch:
-        # convert and normalize
-        signal = clip.astype(np.float32)
-        # Signal normalization
-        signal = signal / np.max(np.abs(signal))
-
-        # make a pseudorandom cut of size equal to WINDOW_LENGTH
-        # (from SincNet's create_batches_rnd)
-        signal_length = len(signal)
-        np.random.seed(signal_length)
-        signal_start = int(
-            np.random.randint(signal_length / WINDOW_LENGTH - 1)
-            * WINDOW_LENGTH
-            % signal_length
-        )
-        signal_stop = signal_start + WINDOW_LENGTH
-        signal = signal[signal_start:signal_stop]
-        processed_batch.append(signal)
-
-    return np.array(processed_batch)
-
-
-def numpy_all_preprocessing_fn(batch: np.ndarray):
-    """
-    Input is comprised of one or more clips, where each clip i
-    is given as an ndarray with shape (n_i,).
-    Preprocessing normalizes each clip and breaks each clip into an integer number
-    of non-overlapping segments of length WINDOW_LENGTH.
-    Output is a list of clips, each of shape (int(n_i/WINDOW_LENGTH), WINDOW_LENGTH)
-    """
-    if len(batch) != 1:
-        raise NotImplementedError(
-            "Requires ART variable length input capability for batch size != 1"
-        )
-    processed_batch = []
-    for clip in batch:
-        # convert and normalize
-        signal = clip.astype(np.float64)
-        signal = signal / np.max(np.abs(signal))
-
-        # break into a number of chunks of equal length
-        num_chunks = int(len(signal) / WINDOW_LENGTH)
-        signal = signal[: num_chunks * WINDOW_LENGTH]
-        signal = np.reshape(signal, (num_chunks, WINDOW_LENGTH), order="C")
-        processed_batch.append(signal)
-    # remove outer batch (of size 1)
-    processed_batch = processed_batch[0]
-    return np.array(processed_batch)
-
-
-def torch_random_preprocessing_fn(x):
-    """
-    Standardize, then normalize sound clips
-    """
-    if x.shape[0] != 1:
-        raise ValueError(f"Shape of batch x {x.shape[0]} != 1")
-    if x.dtype != torch.float32:
-        raise ValueError(f"dtype of batch x {x.dtype} != torch.float32")
-    if x.max() > 1.0:
-        raise ValueError(f"batch x max {x.max()} > 1.0")
-    if x.min() < -1.0:
-        raise ValueError(f"batch x min {x.min()} < -1.0")
-    x = x.squeeze(0)
-
-    # Signal normalization
-    x = x / x.abs().max()
-
-    # get pseudorandom chunk of fixed length (from SincNet's create_batches_rnd)
-    signal_length = len(x)
-    np.random.seed(signal_length)
-    start = int(
-        np.random.randint(signal_length / WINDOW_LENGTH - 1)
-        * WINDOW_LENGTH
-        % signal_length
-    )
-
-    x = x[start : start + WINDOW_LENGTH]
-
-    x = x.unsqueeze(0)
-    return x
-
-
-def torch_all_preprocessing_fn(x: torch.Tensor):
-    """
-    Input is comprised of one or more clips, where each clip i
-    is given as an ndarray with shape (n_i,).
-    Preprocessing normalizes each clip and breaks each clip into an integer number
-    of non-overlapping segments of length WINDOW_LENGTH.
-    Output is a list of clips, each of shape (int(n_i/WINDOW_LENGTH), WINDOW_LENGTH)
-    """
-    if x.shape[0] != 1:
-        raise NotImplementedError(
-            "Requires ART variable length input capability for batch size != 1"
-        )
-    if x.max() > 1.0:
-        raise ValueError(f"batch x max {x.max()} > 1.0")
-    if x.min() < -1.0:
-        raise ValueError(f"batch x min {x.min()} < -1.0")
-    if x.dtype != torch.float32:
-        raise ValueError(f"dtype of batch x {x.dtype} != torch.float32")
-    x = x.squeeze(0)
-
-    # Signal normalization
-    x = x / x.abs().max()
-
-    # break into a number of chunks of equal length
-    num_chunks = int(len(x) / WINDOW_LENGTH)
-    x = x[: num_chunks * WINDOW_LENGTH]
-    x = x.reshape((num_chunks, WINDOW_LENGTH))
-
-    return x
-
-
-def sincnet(weights_path: Optional[str] = None) -> dnn_models.SincWrapper:
-    """
-    Set configuration options and instantiates SincWrapper object
-    """
-    pretrained = weights_path is not None
-    if pretrained:
-        model_params = torch.load(weights_path, map_location=DEVICE)
-    else:
-        model_params = {}
-    CNN_params = model_params.get("CNN_model_par")
-    DNN1_params = model_params.get("DNN1_model_par")
-    DNN2_params = model_params.get("DNN2_model_par")
-
-    # from SincNet/cfg/SincNet_dev_LibriSpeech.cfg
-    cnn_N_filt = [80, 60, 60]
-    cnn_len_filt = [251, 5, 5]
-    cnn_max_pool_len = [3, 3, 3]
-    cnn_use_laynorm_inp = True
-    cnn_use_batchnorm_inp = False
-    cnn_use_laynorm = [True, True, True]
-    cnn_use_batchnorm = [False, False, False]
-    cnn_act = ["relu", "relu", "relu"]
-    cnn_drop = [0.0, 0.0, 0.0]
-
-    fc_lay = [2048, 2048, 2048]
-    fc_drop = [0.0, 0.0, 0.0]
-    fc_use_laynorm_inp = True
-    fc_use_batchnorm_inp = False
-    fc_use_batchnorm = [True, True, True]
-    fc_use_laynorm = [False, False, False]
-    fc_act = ["leaky_relu", "linear", "leaky_relu"]
-
-    class_lay = [40]
-    class_drop = [0.0, 0.0]
-    class_use_laynorm_inp = True
-    class_use_batchnorm_inp = False
-    class_use_batchnorm = [False]
-    class_use_laynorm = [False]
-    class_act = ["softmax"]
-
-    CNN_options = {
-        "input_dim": WINDOW_LENGTH,
-        "fs": SAMPLE_RATE,
-        "cnn_N_filt": cnn_N_filt,
-        "cnn_len_filt": cnn_len_filt,
-        "cnn_max_pool_len": cnn_max_pool_len,
-        "cnn_use_laynorm_inp": cnn_use_laynorm_inp,
-        "cnn_use_batchnorm_inp": cnn_use_batchnorm_inp,
-        "cnn_use_laynorm": cnn_use_laynorm,
-        "cnn_use_batchnorm": cnn_use_batchnorm,
-        "cnn_act": cnn_act,
-        "cnn_drop": cnn_drop,
-        "pretrained": pretrained,
-        "model_params": CNN_params,
-    }
-
-    DNN1_options = {
-        "fc_lay": fc_lay,
-        "fc_drop": fc_drop,
-        "fc_use_batchnorm": fc_use_batchnorm,
-        "fc_use_laynorm": fc_use_laynorm,
-        "fc_use_laynorm_inp": fc_use_laynorm_inp,
-        "fc_use_batchnorm_inp": fc_use_batchnorm_inp,
-        "fc_act": fc_act,
-        "pretrained": pretrained,
-        "model_params": DNN1_params,
-    }
-
-    DNN2_options = {
-        "input_dim": fc_lay[-1],
-        "fc_lay": class_lay,
-        "fc_drop": class_drop,
-        "fc_use_batchnorm": class_use_batchnorm,
-        "fc_use_laynorm": class_use_laynorm,
-        "fc_use_laynorm_inp": class_use_laynorm_inp,
-        "fc_use_batchnorm_inp": class_use_batchnorm_inp,
-        "fc_act": class_act,
-    }
-
-    sincNet = dnn_models.SincWrapper(DNN2_options, DNN1_options, CNN_options)
-
-    if pretrained:
-        sincNet.eval()
-        sincNet.load_state_dict(DNN2_params)
-
-    else:
-        sincNet.train()
-
-    return sincNet
-
-
-class SincNetWrapper(nn.Module):
-    MODES = {
-        "random": torch_random_preprocessing_fn,
-        "all": torch_all_preprocessing_fn,
-    }
-
-    def __init__(self, model_kwargs: dict, weights_path: Optional[str]) -> None:
-        super().__init__()
-        predict_mode = model_kwargs.pop("predict_mode", "all")
-        if predict_mode not in self.MODES:
-            raise ValueError(f"predict_mode {predict_mode} not in {tuple(self.MODES)}")
-        self.predict_mode = predict_mode
-
-        self.model = sincnet(weights_path=weights_path, **model_kwargs)
-        self.model.to(DEVICE)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.training:
-            # preprocessing should be done before model for arbitrary length input
-            return self.model(x)
-
-        x = self.MODES[self.predict_mode](x)
-        output = self.model(x)
-        if self.predict_mode == "all":
-            output = torch.mean(output, dim=0, keepdim=True)
-        return output
-
-
-preprocessing_fn = numpy_random_preprocessing_fn
-
-
-def get_art_model(
-    model_kwargs: dict, wrapper_kwargs: dict, weights_path: Optional[str] = None
-) -> PyTorchClassifier:
-    model = SincNetWrapper(model_kwargs, weights_path)
-    model.to(DEVICE)
-
-    wrapped_model = PyTorchClassifier(
-        model,
-        loss=torch.nn.NLLLoss(),
-        optimizer=torch.optim.RMSprop(
-            model.parameters(), lr=0.001, alpha=0.95, eps=1e-8
-        ),
-        input_shape=(None,),
-        nb_classes=40,
-        **wrapper_kwargs,
-    )
-    return wrapped_model
diff --git a/armory/datasets/README.md b/armory/datasets/README.md
index 76c2d8ac2..27e4f9933 100644
--- a/armory/datasets/README.md
+++ b/armory/datasets/README.md
@@ -84,6 +84,19 @@ info, ds = load.load("digit")
 info, ds = load.from_directory("/armory/datasets/new_builds/digit/1.0.8")
 ```
 
+### Apache Beam Datasets
+
+Currently, `librispeech` and `librispeech_dev_clean` use apache beam to build.
+Apache beam is not installed by default in the container due to older dependencies.
+If building in the container, do:
+```
+pip install apache-beam
+```
+
+When building, armory does not provide beam options by default.
+This makes building VERY slow unless overrides are provided.
+It is recommended that these are built directly using tfds on the command line.
+
 ## Packaging and Uploading for Cache
 
 After a dataset has been successfully built and loaded (locally), it can be packaged and uploaded to the cache.
@@ -91,43 +104,44 @@ After a dataset has been successfully built and loaded (locally), it can be pack
 First, it is recommended that you test the packaging and untarring process without upload/download.
 
 In python:
-```
+```python
 from armory.datasets import package
-package.package("my_dataset")  # creates a tar.gz file
-package.update("my_dataset")  # adds the tar hash info to "cached_datasets.json"
-package.verify("my_dataset")  # uses the "cached_datasets.json" information to verify hash information on tar file
-package.extract("my_dataset", overwrite=False)  # This should raise an error, unless you first remove the built dataset; it will ask you to overwrite
-package.extract("my_dataset", overwrite=True)  # extracts the tar file into the data directory, overwriting the old one (if overwrite is false, this should raise an error)
+my_dataset = "my_dataset"
+package.package(my_dataset)  # creates a tar.gz file
+package.update(my_dataset)  # adds the tar hash info to "cached_datasets.json"
+package.verify(my_dataset)  # uses the "cached_datasets.json" information to verify hash information on tar file
+package.extract(my_dataset, overwrite=False)  # This should raise an error, unless you first remove the built dataset; it will ask you to overwrite
+package.extract(my_dataset, overwrite=True)  # extracts the tar file into the data directory, overwriting the old one (if overwrite is false, this should raise an error)
 ```
 
 If you can successfully load the dataset after extracting it here, this part is good.
 
 Now, to upload to s3 (you will need `ARMORY_PRIVATE_S3_ID` and `ARMORY_PRIVATE_S3_KEY`):
-```
+```python
 from armory.datasets import upload
-upload.upload("my_dataset")  # this will fail, as you need to explicitly force it to be public
-upload.upload("my_dataset", public=True)
+upload.upload(my_dataset)  # this will fail, as you need to explicitly force it to be public
+upload.upload(my_dataset, public=True)
 ```
 
 Or, alternatively to packaging and uploading, you can use this convenience function:
-```
-package.add_to_cache("my_dataset", public=True)
+```python
+package.add_to_cache(my_dataset, public=True)
 ```
 
 To download, which will download it directly to the tar cache directory, do:
 ```
 from armory.datasets import download
-download.download("my_dataset", overwrite=True, verify=True)
+download.download(my_dataset, overwrite=True, verify=True)
 ```
 
 You can also download and extract with:
 ```
 from armory.datasets import load
-load.ensure_download_extract("my_dataset", verify=True)
+load.ensure_download_extract(my_dataset, verify=True)
 ```
 or just try to load it directly
 ```
-load.load("my_dataset")
+load.load(my_dataset)
 ```
 
 # Running / Testing with current armory scenario files
diff --git a/armory/datasets/cached_datasets.json b/armory/datasets/cached_datasets.json
index a3b6a2fd8..add4b96e4 100644
--- a/armory/datasets/cached_datasets.json
+++ b/armory/datasets/cached_datasets.json
@@ -13,6 +13,13 @@
         "url": null,
         "version": "1.0.8"
     },
+    "librispeech_dev_test": {
+        "sha256": "5c5c6cb53e458e2415bc4f242122155d51f32d7e78770176afe01acb584c4caa",
+        "size": 2332265306,
+        "subdir": "librispeech_dev_test/2.1.0",
+        "url": null,
+        "version": "2.1.0"
+    },
     "mnist": {
         "sha256": "fdc3408e29580367145e95ac7cb1d51e807105b174314cd52c16d27a13b98979",
         "size": 16920751,
diff --git a/armory/datasets/preprocessing.py b/armory/datasets/preprocessing.py
index 91e7c15b1..1ca0e4190 100644
--- a/armory/datasets/preprocessing.py
+++ b/armory/datasets/preprocessing.py
@@ -64,6 +64,24 @@ def xview(element):
     )
 
 
+@register
+def librispeech(element, audio_kwargs=None):
+    # TODO: determine how to fix np.array([<byte>], dtype=object) output for text
+    #    https://github.com/tensorflow/tensorflow/issues/34871
+    #    Our traditional behavior to decode to str once in numpy
+    #    This can be done via: y.astype("U")
+    #    Currently, this is handled by scenarios or metrics after dataset output
+    # NOTE: 16000 sampling rate
+    if audio_kwargs is None:
+        audio_kwargs = {}
+    text = element["text"]
+    speech = audio_to_canon(element["speech"], **audio_kwargs)
+    return (speech, text)
+
+
+librispeech_dev_test = register(librispeech, "librispeech_dev_test")
+
+
 def image_to_canon(image, resize=None, target_dtype=tf.float32, input_type="uint8"):
     """
     TFDS Image feature uses (height, width, channels)
@@ -98,14 +116,6 @@ def audio_to_canon(audio, resample=None, target_dtype=tf.float32, input_type="in
     return audio
 
 
-# config = {
-#     "preprocessor": "mnist(max_frames=1)"
-#     "preprocessor_kwargs": {
-#         "max_frames": null,
-#     }
-# }
-
-
 def video_to_canon(
     video,
     resize=None,
diff --git a/armory/datasets/standard/librispeech_dev_test/__init__.py b/armory/datasets/standard/librispeech_dev_test/__init__.py
new file mode 100644
index 000000000..d84f1d722
--- /dev/null
+++ b/armory/datasets/standard/librispeech_dev_test/__init__.py
@@ -0,0 +1,3 @@
+"""librispeech_dev_test dataset."""
+
+from .librispeech_dev_test import LibrispeechDevTest
diff --git a/armory/datasets/standard/librispeech_dev_test/checksums.tsv b/armory/datasets/standard/librispeech_dev_test/checksums.tsv
new file mode 100644
index 000000000..edb48d2cf
--- /dev/null
+++ b/armory/datasets/standard/librispeech_dev_test/checksums.tsv
@@ -0,0 +1 @@
+# NOTE: This file is empty due to subclassing the existing tfds librispeech builder: https://github.com/tensorflow/datasets/blob/master/tensorflow_datasets/audio/librispeech.py
diff --git a/armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py b/armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py
new file mode 100644
index 000000000..5fcb31e72
--- /dev/null
+++ b/armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py
@@ -0,0 +1,41 @@
+"""
+Subset of librispeech containing just 'dev' and 'test' splits.
+
+checksums.tsv is empty as it uses the underlying librispeech class.
+
+NOTE: In order to build, this requires apache beam installed.
+    In the container, do: `pip install apache-beam`
+    This is not installed by default due to older dependencies
+
+NOTE: when building, armory does not provide beam options by default
+    This makes building VERY slow unless overrides are provided
+    It is recommended that this is built directly using tfds on the command line
+
+Using DirectRunner with apache beam, can build with this:
+    tfds build /workspace/armory/datasets/standard/librispeech_dev_test --data_dir /armory/datasets/new_builds --force_checksums_validation --beam_pipeline_options="runner=DirectRunner,direct_num_workers=16,direct_running_mode=multi_processing"
+    See: https://beam.apache.org/releases/pydoc/2.43.0/_modules/apache_beam/options/pipeline_options.html#DirectOptions
+"""
+
+import tensorflow_datasets as tfds
+from tensorflow_datasets.audio import librispeech
+
+_SUBSET = (
+    "dev_clean",
+    "dev_other",
+    "test_clean",
+    "test_other",
+)
+_DL_URLS = {k: v for k, v in librispeech._DL_URLS.items() if k in _SUBSET}
+
+
+class LibrispeechDevTest(librispeech.Librispeech):
+    """DatasetBuilder for subset of Librispeech"""
+
+    def _split_generators(self, dl_manager):
+        extracted_dirs = dl_manager.download_and_extract(_DL_URLS)
+        self._populate_metadata(extracted_dirs)
+        splits = [
+            tfds.core.SplitGenerator(name=k, gen_kwargs={"directory": v})
+            for k, v in extracted_dirs.items()
+        ]
+        return splits
diff --git a/armory/docker/images.py b/armory/docker/images.py
index 5f677fbd1..f8aca2eb2 100644
--- a/armory/docker/images.py
+++ b/armory/docker/images.py
@@ -15,14 +15,16 @@
 
 TAG = version.to_docker_tag(armory.__version__)
 ARMORY_IMAGE_NAME = f"twosixarmory/armory:{TAG}"
-DEEPSPEECH_IMAGE_NAME = f"twosixarmory/pytorch-deepspeech:{TAG}"
 
 IMAGE_MAP = {
     "armory": ARMORY_IMAGE_NAME,
     "tf2": ARMORY_IMAGE_NAME,
     "pytorch": ARMORY_IMAGE_NAME,
     "carla-mot": ARMORY_IMAGE_NAME,
-    "pytorch-deepspeech": DEEPSPEECH_IMAGE_NAME,
+}
+DEPRECATED_IMAGES_VERSION = {
+    "tf1": "< 0.15.0",
+    "pytorch-deepspeech": "<= 0.16.1",
 }
 
 
@@ -65,8 +67,11 @@ def is_armory(image_name: str):
     user, repo, _ = split_name(image_name)
     if user and user != "twosixarmory":
         return False
-    if repo == "tf1":
-        raise ValueError("tf1 docker image is deprecated. Use Armory version < 0.15.0")
+    if repo in DEPRECATED_IMAGES_VERSION:
+        old_version = DEPRECATED_IMAGES_VERSION[repo]
+        raise ValueError(
+            f"{repo} docker image is deprecated. Use Armory version {old_version}"
+        )
     return repo in IMAGE_MAP
 
 
diff --git a/armory/metrics/task.py b/armory/metrics/task.py
index 7d4bc78b9..fdc0e725f 100644
--- a/armory/metrics/task.py
+++ b/armory/metrics/task.py
@@ -22,7 +22,6 @@
     set_namespace,
     result_formatter,
 )
-from armory.utils.external_repo import ExternalPipInstalledImport
 
 aggregate = MetricNameSpace()
 population = MetricNameSpace()
@@ -139,11 +138,7 @@ def __init__(self, model_name="roberta-large-mnli", cache_dir=None):
                 paths.runtime_paths().saved_model_dir, "huggingface"
             )
 
-        with ExternalPipInstalledImport(
-            package="transformers",
-            dockerimage="twosixarmory/pytorch-deepspeech",
-        ):
-            from transformers import AutoTokenizer, AutoModelForSequenceClassification
+        from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
         self.tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
         self.model = AutoModelForSequenceClassification.from_pretrained(
diff --git a/armory/scenarios/audio_asr.py b/armory/scenarios/audio_asr.py
index 93c73aca8..22bb29b2c 100644
--- a/armory/scenarios/audio_asr.py
+++ b/armory/scenarios/audio_asr.py
@@ -110,5 +110,6 @@ def load_test_dataset(self, test_split_default="test_clean"):
     def _load_sample_exporter(self):
         return AudioExporter(
             self.export_dir,
-            self.test_dataset.context.sample_rate,
+            self.test_dataset.info.metadata["sample_rate"],  # TODO: smarter way?
+            # self.test_dataset.info['speech'].sample_rate,  # TODO: get in a smarter way
         )
diff --git a/armory/scenarios/audio_classification.py b/armory/scenarios/audio_classification.py
index ef0aa1e90..7bcdf7545 100644
--- a/armory/scenarios/audio_classification.py
+++ b/armory/scenarios/audio_classification.py
@@ -16,5 +16,6 @@ def load_test_dataset(self):
     def _load_sample_exporter(self):
         return AudioExporter(
             self.export_dir,
-            self.test_dataset.context.sample_rate,
+            self.test_dataset.info.metadata["sample_rate"],  # TODO: smarter way?
+            # self.test_dataset.info['speech'].sample_rate,
         )
diff --git a/docker/Dockerfile-pytorch-deepspeech b/docker/Dockerfile-pytorch-deepspeech
deleted file mode 100644
index 7df8c8c39..000000000
--- a/docker/Dockerfile-pytorch-deepspeech
+++ /dev/null
@@ -1,45 +0,0 @@
-ARG base_image_tag
-
-FROM twosixarmory/base:${base_image_tag} AS armory-local
-
-WORKDIR /armory-repo
-
-# NOTE: This COPY command is filtered using the `.dockerignore` file
-#       in the root of the repo.
-COPY ./ /armory-repo
-
-RUN pip install git+https://github.com/romesco/hydra-lightning/\#subdirectory=hydra-configs-pytorch-lightning
-
-RUN echo "Building Armory from local source"                                            && \
-    echo "Updating Base Image..."                                                       && \
-      python -m pip install --upgrade pip                                               && \
-    echo "Installing Armory..."                                                         && \
-      pip install --no-compile --no-cache-dir --editable '.[engine,deepspeech,jupyter]' && \
-    echo "Configuring Armory..."                                                        && \
-      armory configure --use-default                                                    && \
-    echo "Cleaning up..."                                                               && \
-      rm -rf /armory-repo/.git
-
-WORKDIR /workspace
-
-
-# ------------------------------------------------------------------
-# DEVELOPER NOTES:
-# ------------------------------------------------------------------
-# TODO: determine if this environment setup is needed
-#  $ ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/cuda/lib64"
-
-# NOTE:
-#  - pytorch-lightning >= 1.5.0 will break Deep Speech 2
-#  - torchmetrics >= 0.8.0 will break pytorch-lightning 1.4
-#  - hydra-lightning installs omegaconf
-#  - google-cloud-storage needed for checkpoint.py import
-#  - only sox python bindings are installed; underlying sox binaries not needed
-
-# NOTE: Listed dependencies of PyTorch Deep Speech 2, but do not appear
-#       to be used for inference (only for training), they are not installed:
-#         - torchelastic
-#         - wget
-#         - flask
-#         - fairscale
-# ------------------------------------------------------------------
diff --git a/docker/README.md b/docker/README.md
index f03934ba4..e295f608d 100755
--- a/docker/README.md
+++ b/docker/README.md
@@ -27,7 +27,7 @@ they can be built from the release branch of the repo:
 ```
 git checkout -b r0.16.0
 bash docker/build-base.sh
-python docker/build.py <armory|pytorch-deepspeech|all> [--no-pull]
+python docker/build.py <armory|all> [--no-pull]
 ```
 
 If possible, we recommend downloading the base image instead of building, which can be done by removing the `--no-pull` argument from `build.py`.
diff --git a/docker/build.py b/docker/build.py
index ff70ba801..16fcae74a 100644
--- a/docker/build.py
+++ b/docker/build.py
@@ -13,7 +13,7 @@
 script_dir = Path(__file__).parent
 root_dir = script_dir.parent
 
-armory_frameworks = ["armory", "pytorch-deepspeech"]
+armory_frameworks = ["armory"]
 
 # NOTE: Podman is not officially supported, but this enables
 #       use as a drop-in replacement for building.
diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md
index e88c3e752..2f7acd238 100644
--- a/docs/CONTRIBUTING.md
+++ b/docs/CONTRIBUTING.md
@@ -56,7 +56,7 @@ Note: only release versions of armory will be published to [Dockerhub](https://h
 development branch images much be built locally using:
 ```bash
 cd YOUR_ARMORY_REPO
-bash docker/build.sh <tf2|pytorch|pytorch-deepspeech|all> dev
+bash docker/build.sh <armory|all> dev
 ```
 
 ## Style Guide
diff --git a/docs/baseline_models.md b/docs/baseline_models.md
index 4fde37b87..ede6900ca 100644
--- a/docs/baseline_models.md
+++ b/docs/baseline_models.md
@@ -37,8 +37,6 @@ The model files can be found in [armory/baseline_models/pytorch](../armory/basel
 | Model   |                S3 weight_files                | 
 |:----------: |:---------------------------------------------:| 
 | Cifar10 CNN |                                               |  
-| DeepSpeech 2 |                                               |
-| Sincnet CNN |         `sincnet_librispeech_v1.pth`          |
 | MARS | `mars_ucf101_v1.pth` , `mars_kinetics_v1.pth` |
 | ResNet50 CNN |          `resnet50_imagenet_v1.pth`           |
 | MNIST CNN |        `undefended_mnist_5epochs.pth`         |
@@ -59,4 +57,4 @@ The weights for this model are downloaded from the link listed below.
 
 ### Preprocessing Functions
 Preprocessing functions have been moved inside each model's forward pass. This is to allow each
-model to receive as input the canonicalized form of a dataset.
\ No newline at end of file
+model to receive as input the canonicalized form of a dataset.
diff --git a/docs/datasets.md b/docs/datasets.md
index 681147a2a..6d60a1850 100644
--- a/docs/datasets.md
+++ b/docs/datasets.md
@@ -56,14 +56,10 @@ The carla_over_obj_det_train dataset has the same properties as the above mentio
 | Dataset    | Description | x_shape | x_dtype  | y_shape  | y_dtype | sampling_rate | splits |
 |:----------: |:-----------: |:-------: |:--------: |:--------: |:-------: |:-------: |:------: |
 | [digit](https://github.com/Jakobovski/free-spoken-digit-dataset) | Audio dataset of spoken digits | (N, variable_length) | int64 | (N,) | int64 | 8 kHz | train, test |
-| [librispeech](http://www.openslr.org/12/) | Librispeech dataset for automatic speech recognition  | (N, variable_length)  | float32 | (N,)  | bytes | 16 kHz | dev_clean, dev_other, test_clean, train_clean100 |
-| [librispeech-full](http://www.openslr.org/12/) | Full Librispeech dataset for automatic speech recognition | (N, variable_length)  | float32 | (N,)  | bytes | 16 kHz | dev_clean, dev_other, test_clean, train_clean100, train_clean360, train_other500 |
-| [librispeech_dev_clean](http://www.openslr.org/12/) | Librispeech dev dataset for speaker identification  | (N, variable_length)  | float32 | (N,)  | int64 | 16 kHz | train, validation, test |
-| [librispeech_dev_clean_asr](http://www.openslr.org/12) | Librispeech dev dataset for automatic speech recognition | (N, variable_length) | float32 | (N,) | bytes | 16 kHz | train, validation, test |
+| [librispeech](http://www.openslr.org/12/) | Librispeech dataset for automatic speech recognition (NOTE: not currently cached. Use TFDS builder.) | (N, variable_length)  | float32 | (N,)  | bytes | 16 kHz | dev_clean, dev_other, test_clean, test_other, train_clean100, train_clean360, train_other500 |
+| [librispeech_dev_test](http://www.openslr.org/12/) | Librispeech with ontly dev and test splits | (N, variable_length)  | float32 | (N,)  | int64 | 16 kHz | dev_clean, dev_other, test_clean, test_other |
 | [speech_commands](https://www.tensorflow.org/datasets/catalog/speech_commands) | Speech commands dataset for audio poisoning | (N, variable_length) | float32 | (N,) | int64 | 16 kHz | train, validation, test |
 
-NOTE: because the Librispeech dataset is over 300 GB with all splits, the ```librispeech_full``` dataset has
-all splits, whereas the ```librispeech``` dataset does not have the train_clean360 or train_other500 splits.
 <br>
 
 ### Video Datasets
@@ -101,9 +97,6 @@ Tensorflow Datasets [library](https://www.tensorflow.org/datasets/catalog/overvi
 |       resisc_45       |    train   |         First 5/7 of dataset           | See armory/data/resisc45/resisc45_dataset_partition.py |
 |                       | validation |          Next 1/7 of dataset           |                                                        |
 |                       |    test    |         Final 1/7 of dataset           |                                                        |
-| librispeech_dev_clean |    train   | 1371 recordings from dev_clean dataset |   Assign discrete clips so at least 50% of audio time  |
-|                       | validation |  692 recordings from dev_clean dataset |       is in train, at least 25% is in validation,      |
-|                       |    test    |  640 recordings from dev_clean dataset |              and the remainder are in test             |
 
 
 <br>
diff --git a/docs/docker.md b/docs/docker.md
index e1813fe20..853bc30b5 100644
--- a/docs/docker.md
+++ b/docs/docker.md
@@ -4,16 +4,16 @@ inside a docker container.
 
 
 ## Images
-There are two docker images that are currently published to dockerhub for every release of
+There is a single docker image that is currently published to dockerhub for every release of
 the armory framework:
 
 1. `twosixarmory/armory:<version>`
-2. `twosixarmory/pytorch-deepspeech:<version>`
 
 NOTE: as of Armory version 0.15.0, we no longer support or publish a `tf1` image.
-If `tf1` functionality is needed, please use the `tf2` image and use `tf1` compatibility mode.
+If `tf1` functionality is needed, please use the `armory` image and use `tf1` compatibility mode.
+NOTE: as of Armory version > 0.16.1, we no longer support the `pytorch-deepspeech` image.
 
-We additionally publish a base image, `twosixarmory/base:latest`, from which the three main images are derived.
+We additionally publish a base image, `twosixarmory/base:latest`, from which the main image is derived.
 This is updated less frequently, and each release does not necessarily have a corresponding new base.
 
 When using `armory launch` or `armory exec` the framework specific arguments will
@@ -87,7 +87,7 @@ they can be built from the release branch of the repo:
 ```
 git checkout -b r0.16.0
 bash docker/build-base.sh
-python docker/build.py <armory|pytorch-deepspeech|all> [--no-pull]
+python docker/build.py <armory|all> [--no-pull]
 ```
 
 If possible, we recommend downloading the base image instead of building, which can be done by removing the `--no-pull` argument from `build.py`.
diff --git a/docs/getting_started.md b/docs/getting_started.md
index 02a643170..b91277703 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -6,7 +6,7 @@ Armory can be installed from PyPi:
 pip install armory-testbed[framework-flavor]
 ```
 
-Where `framework-flavor` is one of `tensorflow`, `pytorch` or `deepspeech`
+Where `framework-flavor` is one of `tensorflow` or `pytorch`
 as described below in [the armory flavors](#the-armory-flavors).
 
 When a user runs a given configuration file, the necessary docker image, datasets and
@@ -61,7 +61,6 @@ Armory supports multiple frameworks:
 
   - tensorflow
   - pytorch
-  - deepspeech
 
 In releases prior to 0.16, there was a complex set of `*-requirements.txt` files
 that were needed to provision the python environment for the various frameworks.
@@ -82,17 +81,13 @@ which installs the libraries needed for tensorflow evaluations. Similarly,
 
     pip install armory-testbed[pytorch]
 
-or
-
-    pip install armory-testbed[deepspeech]
-
 depending on the framework you want to use. We don't recommend trying to
 install multiple frameworks at the same time as this may lead to dependency
-conflicts. So
+conflicts. So,
 
     pip install armory-testbed[tensorflow,pytorch]
 
-is unsupported and may not even install.
+may not install.
 
 ## additional flavors
 
@@ -103,7 +98,7 @@ You can freely add `jupyterlab` to the flavor list to as needed, for example
 People developing armory will likely want to add the `developer` flavor to their
 set:
 
-    pip install armory-testbed[deepspeech,developer,jupyterlab]
+    pip install armory-testbed[developer,jupyterlab]
 
 Developers who are creating new Armory datasets will need
 
diff --git a/docs/index.md b/docs/index.md
index 13e9933f8..d991b857b 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -35,14 +35,14 @@ terminal where this command was ran. Please see [running_armory_scenarios_intera
 
 2) `armory launch`
 
-* `armory launch <tf2|pytorch|pytorch-deepspeech> --interactive`. 
+* `armory launch <armory> --interactive`. 
 This will launch a framework specific container, with appropriate mounted volumes, for 
 the user to attach to for debugging purposes. A command to attach to the container will
 be returned from this call, and it can be ran in a separate terminal. To later close 
 the interactive container simply run CTRL+C from the terminal where this command was 
 ran.
 
-* `armory launch <tf2|pytorch|pytorch-deepspeech> --jupyter`. 
+* `armory launch <armory> --jupyter`. 
 Similar to the interactive launch, this will spin up a container for a specific 
 framework, but will instead return the web address of a jupyter lab server where 
 debugging can be performed. To close the jupyter server simply run CTRL+C from the 
@@ -50,12 +50,12 @@ terminal where this command was ran.
 
 3) `armory exec` 
 
-* `armory exec <tf2|pytorch|pytorch-deepspeech> -- <cmd>`. 
+* `armory exec <armory> -- <cmd>`. 
 This will run a specific command within a framework specific container. A notable use
 case for this would be to run test cases using pytest. After completion of the command 
 the container will be removed.
 
-To use custom docker images with `launch` or `exec`, replace `<tf2|pytorch|pytorch-deepspeech>` with its
+To use custom docker images with `launch` or `exec`, replace `<armory>` with its
 full name: `<your_image/name:your_tag>`. For use with `run`, you will need to modify the
 [configuration file](configuration_files.md).
 
diff --git a/docs/no_docker_mode.md b/docs/no_docker_mode.md
index e99308fea..af930089d 100644
--- a/docs/no_docker_mode.md
+++ b/docs/no_docker_mode.md
@@ -31,18 +31,9 @@ Once this is complete, and you have ensured you are in the `[armory-repo]` direc
 you can setup the environment with the following:
 ```bash
 pip install --upgrade pip==22.0.3
-pip install -e .[engine,datasets,math,pytorch,deepspeech,tensorflow]
+pip install -e .[engine,datasets,math,pytorch,tensorflow]
 ```
 
-If you are using the `deepspeech` scenarios, you will also need to
-install the `hydra-lightning` configs with:
-
-    pip install git+https://github.com/romesco/hydra-lightning/#subdirectory=hydra-configs-pytorch-lightning
-
-as described [in that package's README](https://github.com/romesco/hydra-lightning#readme).
-This is necessary because there is no proper release of that package (nor does one
-appear likely).
-
 Once this completes, you should run `armory configure` (If you haven't already done this
 previously) to setup the armory configuration
 (e.g. dataset download directory, output directory, etc.).
diff --git a/pyproject.toml b/pyproject.toml
index 8fd31cee1..46c2aec37 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,6 +71,8 @@ engine = [
     "botocore" ,           # Needed for armory.data.utils
     "ffmpeg-python",       # Needed for armory.utils.export
     "pydub",               # this is in ART's extra-requires
+    "librosa", # audio poisoning
+    "transformers", # audio metrics
     "tidecv",              # Needed for TIDE metrics
     # Both `opencv-python` and `opencv-python-headless` must specify
     # the same version.
@@ -92,17 +94,6 @@ tensorflow = [
     "tensorflow >= 2.10.0",
 ]
 
-deepspeech = [
-    "armory-testbed[pytorch,engine,datasets,math]",
-    "python-levenshtein",
-    "torchmetrics < 0.8.0",
-    "sox",
-    "librosa",
-    "google-cloud-storage",
-    "transformers",
-    "pytorch-lightning < 1.5.0",
-]
-
 math = [
     "numpy",
     "pandas",
@@ -202,5 +193,4 @@ markers = [
   "docker_required",     # This Test Requires Docker to run
   "end_to_end",          # Full End to End Test (typically slow)
   "unit",                # These are Unit Tests (fast and lightweight)
-  "pytorch_deepspeech",  # This test requires pytorch-deepspeech container (or equivalent dependencies) to run
 ]
diff --git a/scenario_configs/asr_librispeech_entailment.json b/scenario_configs/asr_librispeech_entailment.json
index 752937374..b3038f281 120000
--- a/scenario_configs/asr_librispeech_entailment.json
+++ b/scenario_configs/asr_librispeech_entailment.json
@@ -1 +1 @@
-eval5/asr_librispeech/entailment.json
\ No newline at end of file
+eval6/asr_librispeech/hubert_entailment.json
\ No newline at end of file
diff --git a/scenario_configs/asr_librispeech_targeted.json b/scenario_configs/asr_librispeech_targeted.json
index 04b2e2ac6..37d7e1bd6 120000
--- a/scenario_configs/asr_librispeech_targeted.json
+++ b/scenario_configs/asr_librispeech_targeted.json
@@ -1 +1 @@
-eval5/asr_librispeech/untargeted_snr_pgd.json
\ No newline at end of file
+eval6/asr_librispeech/hubert_targeted_snr_pgd.json
\ No newline at end of file
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_defended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_defended.json
deleted file mode 100755
index cdecb16ce..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_defended.json
+++ /dev/null
@@ -1,92 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "decrease_factor_alpha": 0.5,
-            "decrease_factor_eps": 0.5,
-            "eps": 0.0075,
-            "global_max_length": 562480,
-            "increase_factor_alpha": 2.0,
-            "initial_rescale": 1.0,
-            "learning_rate_1": 0.0001,
-            "learning_rate_2": 1e-07,
-            "max_iter_1": 400,
-            "max_iter_2": 100,
-            "num_iter_decrease_alpha": 50
-        },
-        "module": "art.attacks.evasion.imperceptible_asr.imperceptible_asr_pytorch",
-        "name": "ImperceptibleASRPyTorch",
-        "targeted": true,
-        "targeted_labels": {
-            "scheme": "matched length",
-            "transcripts": [
-                "REALLY SHORT TEST STRING",
-                "THE TEST STRING HAS A LENGTH EQUAL TO THE MEDIAN OF THE CLEAN TEST TRANSCRIPT LENGTHS",
-                "THIS IS AN EXCEEDINGLY LONG TEST STRING BUT NOT REALLY AS THE LONGEST STRING HAS OVER FIVE HUNDRED CHARACTERS IN ITS TRANSCRIPT AND INCLUDES A LIST OF PEOPLE AND SPEAKS OF A SENATOR FROM NEW JERSEY"
-            ]
-        },
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": {
-        "kwargs": {
-            "apply_fit": false,
-            "apply_predict": true,
-            "channels_first": false,
-            "sample_rate": 16000,
-            "verbose": false
-        },
-        "module": "art.defences.preprocessor",
-        "name": "Mp3CompressionPyTorch",
-        "type": "Preprocessor"
-    },
-    "metric": {
-        "means": false,
-        "perturbation": "snr_db",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_undefended.json
deleted file mode 100755
index 71b02f0dc..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_undefended.json
+++ /dev/null
@@ -1,81 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "decrease_factor_alpha": 0.5,
-            "decrease_factor_eps": 0.5,
-            "eps": 0.0075,
-            "global_max_length": 562480,
-            "increase_factor_alpha": 2.0,
-            "initial_rescale": 1.0,
-            "learning_rate_1": 0.0001,
-            "learning_rate_2": 1e-07,
-            "max_iter_1": 400,
-            "max_iter_2": 100,
-            "num_iter_decrease_alpha": 50
-        },
-        "module": "art.attacks.evasion.imperceptible_asr.imperceptible_asr_pytorch",
-        "name": "ImperceptibleASRPyTorch",
-        "targeted": true,
-        "targeted_labels": {
-            "scheme": "matched length",
-            "transcripts": [
-                "REALLY SHORT TEST STRING",
-                "THE TEST STRING HAS A LENGTH EQUAL TO THE MEDIAN OF THE CLEAN TEST TRANSCRIPT LENGTHS",
-                "THIS IS AN EXCEEDINGLY LONG TEST STRING BUT NOT REALLY AS THE LONGEST STRING HAS OVER FIVE HUNDRED CHARACTERS IN ITS TRANSCRIPT AND INCLUDES A LIST OF PEOPLE AND SPEAKS OF A SENATOR FROM NEW JERSEY"
-            ]
-        },
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": null,
-    "metric": {
-        "means": false,
-        "perturbation": "snr_db",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_defended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_defended.json
deleted file mode 100755
index c4d41fb71..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_defended.json
+++ /dev/null
@@ -1,75 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "partial_attack": false,
-            "snr_db": 20,
-            "targeted": false
-        },
-        "module": "armory.art_experimental.attacks.kenansville_dft",
-        "name": "KenansvilleDFT",
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 8,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": {
-        "kwargs": {
-            "apply_fit": false,
-            "apply_predict": true,
-            "channels_first": false,
-            "sample_rate": 16000,
-            "verbose": false
-        },
-        "module": "art.defences.preprocessor",
-        "name": "Mp3Compression",
-        "type": "Preprocessor"
-    },
-    "metric": {
-        "means": false,
-        "perturbation": "snr_db",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_undefended.json
deleted file mode 100755
index 1a8e25bed..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_undefended.json
+++ /dev/null
@@ -1,64 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "partial_attack": false,
-            "snr_db": 20,
-            "targeted": false
-        },
-        "module": "armory.art_experimental.attacks.kenansville_dft",
-        "name": "KenansvilleDFT",
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 8,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": null,
-    "metric": {
-        "means": false,
-        "perturbation": "snr_db",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_defended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_defended.json
deleted file mode 100755
index c54f8ef78..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_defended.json
+++ /dev/null
@@ -1,86 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 1.5,
-            "eps_step": 0.05,
-            "max_iter": 100,
-            "norm": 2,
-            "num_random_init": 0,
-            "random_eps": false,
-            "targeted": false,
-            "verbose": false
-        },
-        "module": "art.attacks.evasion",
-        "name": "ProjectedGradientDescent",
-        "targeted": false,
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": {
-        "kwargs": {
-            "apply_fit": false,
-            "apply_predict": true,
-            "channels_first": false,
-            "sample_rate": 16000,
-            "verbose": false
-        },
-        "module": "art.defences.preprocessor",
-        "name": "Mp3Compression",
-        "type": "Preprocessor"
-    },
-    "metric": {
-        "means": false,
-        "perturbation": "snr_db",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_multipath_channel_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_multipath_channel_undefended.json
deleted file mode 100755
index ac814e83a..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_multipath_channel_undefended.json
+++ /dev/null
@@ -1,80 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "audio_channel": {
-            "attenuation": 0.5,
-            "delay": 300,
-            "pytorch": true
-        },
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 1.5,
-            "eps_step": 0.05,
-            "max_iter": 100,
-            "norm": 2,
-            "num_random_init": 0,
-            "random_eps": false,
-            "targeted": false,
-            "verbose": false
-        },
-        "module": "art.attacks.evasion",
-        "name": "ProjectedGradientDescent",
-        "targeted": false,
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": null,
-    "metric": {
-        "means": false,
-        "perturbation": "snr_db",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_undefended.json
deleted file mode 100755
index 94a7bef1c..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_undefended.json
+++ /dev/null
@@ -1,75 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 1.5,
-            "eps_step": 0.05,
-            "max_iter": 100,
-            "norm": 2,
-            "num_random_init": 0,
-            "random_eps": false,
-            "targeted": false,
-            "verbose": false
-        },
-        "module": "art.attacks.evasion",
-        "name": "ProjectedGradientDescent",
-        "targeted": false,
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": null,
-    "metric": {
-        "means": false,
-        "perturbation": "snr_db",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_targeted.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_targeted.json
deleted file mode 100644
index 263adccac..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_targeted.json
+++ /dev/null
@@ -1,81 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 10,
-            "eps_step": 0.5,
-            "max_iter": 10,
-            "norm": "snr",
-            "num_random_init": 0,
-            "targeted": true
-        },
-        "module": "armory.art_experimental.attacks.snr_pgd",
-        "name": "SNR_PGD_Numpy",
-        "targeted": true,
-        "targeted_labels": {
-            "kwargs": {
-                "import_from": "armory.attacks.librispeech_target_labels",
-                "transcripts": "matched_length"
-            },
-            "module": "armory.utils.labels",
-            "name": "MatchedTranscriptLengthTargeter"
-        },
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": null,
-    "metric": {
-        "means": false,
-        "perturbation": "linf",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_undefended.json
deleted file mode 100755
index 9ed517ef0..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_undefended.json
+++ /dev/null
@@ -1,80 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 10,
-            "eps_step": 0.5,
-            "max_iter": 10,
-            "norm": "snr",
-            "num_random_init": 0,
-            "targeted": true
-        },
-        "module": "armory.art_experimental.attacks.snr_pgd",
-        "name": "SNR_PGD_Numpy",
-        "targeted": true,
-        "targeted_labels": {
-            "kwargs": {
-                "value": "TEST STRING"
-            },
-            "module": "armory.utils.labels",
-            "name": "FixedStringTargeter"
-        },
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": null,
-    "metric": {
-        "means": false,
-        "perturbation": "linf",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet.json b/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet.json
deleted file mode 100644
index 71d688d97..000000000
--- a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet.json
+++ /dev/null
@@ -1,65 +0,0 @@
-{
-    "_description": "Librispeech_dev_clean raw audio classification, contributed by MITRE Corporation",
-    "adhoc": null,
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 0.2,
-            "eps_step": 0.1,
-            "minimal": false,
-            "num_random_init": 0,
-            "targeted": false
-        },
-        "module": "art.attacks.evasion",
-        "name": "FastGradientMethod",
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech_dev_clean"
-    },
-    "defense": null,
-    "metric": {
-        "means": true,
-        "perturbation": "linf",
-        "record_metric_per_sample": false,
-        "task": [
-            "categorical_accuracy"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "fit_batch_size": 16,
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {
-            "predict_mode": "all"
-        },
-        "module": "armory.baseline_models.pytorch.sincnet",
-        "name": "get_art_model",
-        "weights_file": "sincnet_librispeech_v1.pth",
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1.0,
-                1.0
-            ]
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_classification",
-        "name": "AudioClassificationTask"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch",
-        "external_github_repo": "hkakitani/SincNet",
-        "gpus": "all",
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json b/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json
deleted file mode 100644
index 8ea65668d..000000000
--- a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json
+++ /dev/null
@@ -1,69 +0,0 @@
-{
-    "_description": "Librispeech_dev_clean raw audio classification, contributed by MITRE Corporation",
-    "adhoc": null,
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 10,
-            "eps_step": 0.5,
-            "max_iter": 10,
-            "norm": "snr",
-            "num_random_init": 0,
-            "targeted": false
-        },
-        "module": "armory.art_experimental.attacks.snr_pgd",
-        "name": "SNR_PGD",
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech_dev_clean"
-    },
-    "defense": null,
-    "metric": {
-        "means": true,
-        "perturbation": [
-            "snr",
-            "snr_db"
-        ],
-        "record_metric_per_sample": true,
-        "task": [
-            "categorical_accuracy"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "fit_batch_size": 16,
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {
-            "predict_mode": "all"
-        },
-        "module": "armory.baseline_models.pytorch.sincnet",
-        "name": "get_art_model",
-        "weights_file": "sincnet_librispeech_v1.pth",
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1.0,
-                1.0
-            ]
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_classification",
-        "name": "AudioClassificationTask"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch",
-        "external_github_repo": "hkakitani/SincNet",
-        "gpus": "all",
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_targeted.json b/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_targeted.json
deleted file mode 100644
index 526353755..000000000
--- a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_targeted.json
+++ /dev/null
@@ -1,72 +0,0 @@
-{
-    "_description": "Librispeech_dev_clean raw audio classification, contributed by MITRE Corporation",
-    "adhoc": null,
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 0.2,
-            "eps_step": 0.1,
-            "minimal": false,
-            "num_random_init": 0,
-            "targeted": true
-        },
-        "module": "art.attacks.evasion",
-        "name": "FastGradientMethod",
-        "targeted_labels": {
-            "kwargs": {
-                "num_classes": 40
-            },
-            "module": "armory.utils.labels",
-            "name": "RoundRobinTargeter"
-        },
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech_dev_clean"
-    },
-    "defense": null,
-    "metric": {
-        "means": true,
-        "perturbation": "linf",
-        "record_metric_per_sample": false,
-        "task": [
-            "categorical_accuracy"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "fit_batch_size": 16,
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {
-            "predict_mode": "all"
-        },
-        "module": "armory.baseline_models.pytorch.sincnet",
-        "name": "get_art_model",
-        "weights_file": "sincnet_librispeech_v1.pth",
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1.0,
-                1.0
-            ]
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_classification",
-        "name": "AudioClassificationTask"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch",
-        "external_github_repo": "hkakitani/SincNet",
-        "gpus": "all",
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval5/asr_librispeech/defended_entailment.json b/scenario_configs/eval5/asr_librispeech/defended_entailment.json
deleted file mode 100644
index 5727d7654..000000000
--- a/scenario_configs/eval5/asr_librispeech/defended_entailment.json
+++ /dev/null
@@ -1,97 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 20,
-            "eps_step": 0.05,
-            "max_iter": 500,
-            "norm": "snr",
-            "num_random_init": 0,
-            "targeted": true
-        },
-        "module": "armory.art_experimental.attacks.snr_pgd",
-        "name": "SNR_PGD_Numpy",
-        "targeted": true,
-        "targeted_labels": {
-            "kwargs": {
-                "dtype": "str",
-                "import_from": "armory.attacks.librispeech_target_labels",
-                "values": "entailment_100"
-            },
-            "module": "armory.utils.labels",
-            "name": "ManualTargeter"
-        },
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": {
-        "kwargs": {
-            "apply_fit": false,
-            "apply_predict": true,
-            "channels_first": false,
-            "sample_rate": 16000,
-            "verbose": false
-        },
-        "module": "art.defences.preprocessor",
-        "name": "Mp3Compression",
-        "type": "Preprocessor"
-    },
-    "metric": {
-        "means": false,
-        "perturbation": "snr_db",
-        "record_metric_per_sample": true,
-        "task": [
-            "entailment",
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": [
-            "SeanNaren/deepspeech.pytorch@V3.0"
-        ],
-        "gpus": "all",
-        "local_repo_path": null,
-        "num_eval_batches": 100,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval5/asr_librispeech/defended_targeted_snr_pgd.json b/scenario_configs/eval5/asr_librispeech/defended_targeted_snr_pgd.json
deleted file mode 100644
index c9ff3fdb2..000000000
--- a/scenario_configs/eval5/asr_librispeech/defended_targeted_snr_pgd.json
+++ /dev/null
@@ -1,92 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 20,
-            "eps_step": 0.5,
-            "max_iter": 500,
-            "norm": "snr",
-            "num_random_init": 0,
-            "targeted": true
-        },
-        "module": "armory.art_experimental.attacks.snr_pgd",
-        "name": "SNR_PGD_Numpy",
-        "targeted": true,
-        "targeted_labels": {
-            "kwargs": {
-                "import_from": "armory.attacks.librispeech_target_labels",
-                "transcripts": "matched_length"
-            },
-            "module": "armory.utils.labels",
-            "name": "MatchedTranscriptLengthTargeter"
-        },
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": {
-        "kwargs": {
-            "apply_fit": false,
-            "apply_predict": true,
-            "channels_first": false,
-            "sample_rate": 16000,
-            "verbose": false
-        },
-        "module": "art.defences.preprocessor",
-        "name": "Mp3Compression",
-        "type": "Preprocessor"
-    },
-    "metric": {
-        "means": false,
-        "perturbation": "linf",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval5/asr_librispeech/untargeted_snr_pgd.json b/scenario_configs/eval5/asr_librispeech/untargeted_snr_pgd.json
deleted file mode 100644
index 58a8c1af8..000000000
--- a/scenario_configs/eval5/asr_librispeech/untargeted_snr_pgd.json
+++ /dev/null
@@ -1,73 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 20,
-            "eps_step": 0.5,
-            "max_iter": 500,
-            "norm": "snr",
-            "num_random_init": 0,
-            "targeted": false
-        },
-        "module": "armory.art_experimental.attacks.snr_pgd",
-        "name": "SNR_PGD_Numpy",
-        "targeted": false,
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": null,
-    "metric": {
-        "means": false,
-        "perturbation": "linf",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval5/asr_librispeech/defended_untargeted_snr_pgd.json b/scenario_configs/eval6/asr_librispeech/hubert_defended_untargeted.json
similarity index 70%
rename from scenario_configs/eval5/asr_librispeech/defended_untargeted_snr_pgd.json
rename to scenario_configs/eval6/asr_librispeech/hubert_defended_untargeted.json
index 4c128b261..006318915 100644
--- a/scenario_configs/eval5/asr_librispeech/defended_untargeted_snr_pgd.json
+++ b/scenario_configs/eval6/asr_librispeech/hubert_defended_untargeted.json
@@ -1,5 +1,5 @@
 {
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
+    "_description": "Baseline HuBERT ASR on LibriSpeech",
     "adhoc": {
         "skip_adversarial": false
     },
@@ -20,12 +20,11 @@
         "use_label": false
     },
     "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
+        "test": {
+            "batch_size": 1,
+            "name": "librispeech_dev_test",
+            "split": "test_clean"
+        }
     },
     "defense": {
         "kwargs": {
@@ -41,7 +40,7 @@
     },
     "metric": {
         "means": false,
-        "perturbation": "linf",
+        "perturbation": "snr_db",
         "record_metric_per_sample": true,
         "task": [
             "word_error_rate"
@@ -53,19 +52,13 @@
             "nb_epochs": 20000
         },
         "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
+        "module": "armory.baseline_models.pytorch.hubert_asr_large",
         "name": "get_art_model",
         "predict_kwargs": {
             "transcription_output": true
         },
         "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
+        "wrapper_kwargs": {}
     },
     "scenario": {
         "kwargs": {},
@@ -73,8 +66,8 @@
         "name": "AutomaticSpeechRecognition"
     },
     "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
+        "docker_image": "twosixarmory/pytorch",
+        "external_github_repo": null,
         "gpus": "all",
         "local_repo_path": null,
         "output_dir": null,
diff --git a/scenario_configs/eval5/asr_librispeech/entailment.json b/scenario_configs/eval6/asr_librispeech/hubert_entailment.json
similarity index 70%
rename from scenario_configs/eval5/asr_librispeech/entailment.json
rename to scenario_configs/eval6/asr_librispeech/hubert_entailment.json
index 21f5ff3e1..1a3388db5 100644
--- a/scenario_configs/eval5/asr_librispeech/entailment.json
+++ b/scenario_configs/eval6/asr_librispeech/hubert_entailment.json
@@ -1,5 +1,5 @@
 {
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
+    "_description": "Baseline HuBERT ASR on LibriSpeech",
     "adhoc": {
         "skip_adversarial": false
     },
@@ -29,12 +29,11 @@
         "use_label": false
     },
     "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
+        "test": {
+            "batch_size": 1,
+            "name": "librispeech_dev_test",
+            "split": "test_clean"
+        }
     },
     "defense": null,
     "metric": {
@@ -52,19 +51,13 @@
             "nb_epochs": 20000
         },
         "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
+        "module": "armory.baseline_models.pytorch.hubert_asr_large",
         "name": "get_art_model",
         "predict_kwargs": {
             "transcription_output": true
         },
         "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
+        "wrapper_kwargs": {}
     },
     "scenario": {
         "kwargs": {},
@@ -72,13 +65,10 @@
         "name": "AutomaticSpeechRecognition"
     },
     "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": [
-            "SeanNaren/deepspeech.pytorch@V3.0"
-        ],
+        "docker_image": "twosixarmory/pytorch",
+        "external_github_repo": null,
         "gpus": "all",
         "local_repo_path": null,
-        "num_eval_batches": 100,
         "output_dir": null,
         "output_filename": null,
         "use_gpu": false
diff --git a/scenario_configs/eval5/asr_librispeech/targeted_snr_pgd.json b/scenario_configs/eval6/asr_librispeech/hubert_targeted_snr_pgd.json
similarity index 69%
rename from scenario_configs/eval5/asr_librispeech/targeted_snr_pgd.json
rename to scenario_configs/eval6/asr_librispeech/hubert_targeted_snr_pgd.json
index f650a46eb..5f469cca6 100644
--- a/scenario_configs/eval5/asr_librispeech/targeted_snr_pgd.json
+++ b/scenario_configs/eval6/asr_librispeech/hubert_targeted_snr_pgd.json
@@ -1,5 +1,5 @@
 {
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
+    "_description": "Baseline HuBERT ASR on LibriSpeech",
     "adhoc": {
         "skip_adversarial": false
     },
@@ -28,17 +28,16 @@
         "use_label": false
     },
     "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
+        "test": {
+            "batch_size": 1,
+            "name": "librispeech_dev_test",
+            "split": "test_clean"
+        }
     },
     "defense": null,
     "metric": {
         "means": false,
-        "perturbation": "linf",
+        "perturbation": "snr_db",
         "record_metric_per_sample": true,
         "task": [
             "word_error_rate"
@@ -50,19 +49,13 @@
             "nb_epochs": 20000
         },
         "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
+        "module": "armory.baseline_models.pytorch.hubert_asr_large",
         "name": "get_art_model",
         "predict_kwargs": {
             "transcription_output": true
         },
         "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
+        "wrapper_kwargs": {}
     },
     "scenario": {
         "kwargs": {},
@@ -70,8 +63,8 @@
         "name": "AutomaticSpeechRecognition"
     },
     "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
+        "docker_image": "twosixarmory/pytorch",
+        "external_github_repo": null,
         "gpus": "all",
         "local_repo_path": null,
         "output_dir": null,
diff --git a/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json b/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json
index 25b1b5bc0..3ce122237 100644
--- a/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json
+++ b/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json
@@ -1,5 +1,5 @@
 {
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
+    "_description": "Baseline HuBERT ASR on LibriSpeech",
     "adhoc": {
         "skip_adversarial": false
     },
@@ -20,17 +20,16 @@
         "use_label": false
     },
     "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
+        "test": {
+            "batch_size": 1,
+            "name": "librispeech_dev_test",
+            "split": "test_clean"
+        }
     },
     "defense": null,
     "metric": {
         "means": false,
-        "perturbation": "linf",
+        "perturbation": "snr_db",
         "record_metric_per_sample": true,
         "task": [
             "word_error_rate"
diff --git a/scenario_configs/speaker_id_librispeech.json b/scenario_configs/speaker_id_librispeech.json
deleted file mode 120000
index c9d0b713e..000000000
--- a/scenario_configs/speaker_id_librispeech.json
+++ /dev/null
@@ -1 +0,0 @@
-eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index e6a4db595..5e1c2a49a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -14,10 +14,7 @@
 logger = logging.getLogger(__name__)
 
 REQUIRED_DOCKER_IMAGES = [
-    f"twosixarmory/pytorch:{__version__}",
-    f"twosixarmory/tf2:{__version__}",
-    f"twosixarmory/pytorch-deepspeech:{__version__}",
-    f"twosixarmory/carla-mot:{__version__}",
+    f"twosixarmory/armory:{__version__}",
 ]
 
 
diff --git a/tests/unit/test_docker_build_script.py b/tests/unit/test_docker_build_script.py
index 65fe1f2ed..e78b7404c 100644
--- a/tests/unit/test_docker_build_script.py
+++ b/tests/unit/test_docker_build_script.py
@@ -43,12 +43,8 @@ def image_tag(armory_version_tbi):
 @pytest.mark.parametrize(
     "img, opt",
     [
-        #        ("base", ""),
-        ("pytorch", ""),
-        ("tf2", ""),
-        ("pytorch-deepspeech", ""),
-        ("pytorch-deepspeech", "--no-cache"),
-        #        ("base", "--no-cache"),
+        ("armory", ""),
+        ("armory", "--no-cache"),
     ],
 )
 def test_build_script(img, opt, image_tag, armory_version_tbi):
diff --git a/tests/unit/test_task_metrics.py b/tests/unit/test_task_metrics.py
index 12f028826..32056f73b 100644
--- a/tests/unit/test_task_metrics.py
+++ b/tests/unit/test_task_metrics.py
@@ -13,7 +13,6 @@
 
 
 @pytest.mark.docker_required
-@pytest.mark.pytorch_deepspeech
 @pytest.mark.slow
 def test_entailment():
     """