Remove ffmpeg fallback from sox_io backend

In #2419, we added ffmpeg as fallback for sox_io backend. The was a warkaround for solving the issue with libmad removal. Now that we introduced `backend` argument to I/O functions, and libsox integration is moved to dynamic binding where users can use libsox with libmad integration, we do not need the workaround. This commit is based on reverting #2416 (fd7ace1).
pytorch · Jul 28, 2023 · 2121368 · 2121368
1 parent 7368e33
commit 2121368
Show file tree

Hide file tree

Showing 18 changed files with 20 additions and 767 deletions.
diff --git a/test/torchaudio_unittest/backend/dispatcher/sox/load_test.py b/test/torchaudio_unittest/backend/dispatcher/sox/load_test.py
@@ -318,25 +318,6 @@ def test_sox(self, frame_offset, num_frames, channels_first, normalize):
 
         self._test(torch.ops.torchaudio.sox_io_load_audio_file, frame_offset, num_frames, channels_first, normalize)
 
-    @nested_params(
-        [0, 1, 10, 100, 1000],
-        [-1, 1, 10, 100, 1000],
-        [True, False],
-        [True, False],
-    )
-    def test_ffmpeg(self, frame_offset, num_frames, channels_first, normalize):
-        """The combination of properly changes the output tensor"""
-        from torchaudio.io._compat import load_audio, load_audio_fileobj
-
-        self._test(load_audio, frame_offset, num_frames, channels_first, normalize)
-
-        # test file-like obj
-        def func(path, *args):
-            with open(path, "rb") as fileobj:
-                return load_audio_fileobj(fileobj, *args)
-
-        self._test(func, frame_offset, num_frames, channels_first, normalize)
-
 
 @skipIfNoSox
 @skipIfNoExec("sox")

diff --git a/test/torchaudio_unittest/backend/sox_io/info_test.py b/test/torchaudio_unittest/backend/sox_io/info_test.py
@@ -277,6 +277,7 @@ def test_htk(self):
 
 
 @skipIfNoSox
+@skipIfNoSoxDecoder("opus")
 class TestInfoOpus(PytorchTestCase):
     @parameterized.expand(
         list(
@@ -304,17 +305,15 @@ class TestLoadWithoutExtension(PytorchTestCase):
     def test_mp3(self):
         """MP3 file without extension can be loaded
 
-        Originally, we added `format` argument for this case, but now we use FFmpeg
-        for MP3 decoding, which works even without `format` argument.
         https://github.com/pytorch/audio/issues/1040
 
         The file was generated with the following command
             ffmpeg -f lavfi -i "sine=frequency=1000:duration=5" -ar 16000 -f mp3 test_noext
         """
         path = get_asset_path("mp3_without_ext")
-        sinfo = sox_io_backend.info(path)
+        sinfo = sox_io_backend.info(path, format="mp3")
         assert sinfo.sample_rate == 16000
-        assert sinfo.num_frames == 80000
+        assert sinfo.num_frames == 81216
         assert sinfo.num_channels == 1
         assert sinfo.bits_per_sample == 0  # bit_per_sample is irrelevant for compressed formats
         assert sinfo.encoding == "MP3"

diff --git a/test/torchaudio_unittest/backend/sox_io/load_test.py b/test/torchaudio_unittest/backend/sox_io/load_test.py
@@ -315,40 +315,19 @@ def test_sox(self, frame_offset, num_frames, channels_first, normalize):
 
         self._test(torch.ops.torchaudio.sox_io_load_audio_file, frame_offset, num_frames, channels_first, normalize)
 
-    @nested_params(
-        [0, 1, 10, 100, 1000],
-        [-1, 1, 10, 100, 1000],
-        [True, False],
-        [True, False],
-    )
-    def test_ffmpeg(self, frame_offset, num_frames, channels_first, normalize):
-        """The combination of properly changes the output tensor"""
-        from torchaudio.io._compat import load_audio, load_audio_fileobj
-
-        self._test(load_audio, frame_offset, num_frames, channels_first, normalize)
-
-        # test file-like obj
-        def func(path, *args):
-            with open(path, "rb") as fileobj:
-                return load_audio_fileobj(fileobj, *args)
-
-        self._test(func, frame_offset, num_frames, channels_first, normalize)
-
 
 @skipIfNoSox
 class TestLoadWithoutExtension(PytorchTestCase):
     def test_mp3(self):
         """MP3 file without extension can be loaded
 
-        Originally, we added `format` argument for this case, but now we use FFmpeg
-        for MP3 decoding, which works even without `format` argument.
         https://github.com/pytorch/audio/issues/1040
 
         The file was generated with the following command
             ffmpeg -f lavfi -i "sine=frequency=1000:duration=5" -ar 16000 -f mp3 test_noext
         """
         path = get_asset_path("mp3_without_ext")
-        _, sr = sox_io_backend.load(path)
+        _, sr = sox_io_backend.load(path, format="mp3")
         assert sr == 16000
 
 

diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py
@@ -7,33 +7,6 @@
 from .common import AudioMetaData
 
 
-# Note: need to comply TorchScript syntax -- need annotation and no f-string
-def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData:
-    raise RuntimeError("Failed to fetch metadata from {}".format(filepath))
-
-
-# Note: need to comply TorchScript syntax -- need annotation and no f-string
-def _fail_load(
-    filepath: str,
-    frame_offset: int = 0,
-    num_frames: int = -1,
-    normalize: bool = True,
-    channels_first: bool = True,
-    format: Optional[str] = None,
-) -> Tuple[torch.Tensor, int]:
-    raise RuntimeError("Failed to load audio from {}".format(filepath))
-
-
-if torchaudio._extension._FFMPEG_EXT is not None:
-    import torchaudio.io._compat as _compat
-
-    _fallback_info = _compat.info_audio
-    _fallback_load = _compat.load_audio
-else:
-    _fallback_info = _fail_info
-    _fallback_load = _fail_load
-
-
 @torchaudio._extension.fail_if_no_sox
 def info(
     filepath: str,
@@ -58,9 +31,7 @@ def info(
             raise RuntimeError("sox_io backend does not support file-like object.")
         filepath = os.fspath(filepath)
     sinfo = torch.ops.torchaudio.sox_io_get_info(filepath, format)
-    if sinfo is not None:
-        return AudioMetaData(*sinfo)
-    return _fallback_info(filepath, format)
+    return AudioMetaData(*sinfo)
 
 
 @torchaudio._extension.fail_if_no_sox
@@ -153,12 +124,9 @@ def load(
         if hasattr(filepath, "read"):
             raise RuntimeError("sox_io backend does not support file-like object.")
         filepath = os.fspath(filepath)
-    ret = torch.ops.torchaudio.sox_io_load_audio_file(
+    return torch.ops.torchaudio.sox_io_load_audio_file(
         filepath, frame_offset, num_frames, normalize, channels_first, format
     )
-    if ret is not None:
-        return ret
-    return _fallback_load(filepath, frame_offset, num_frames, normalize, channels_first, format)
 
 
 @torchaudio._extension.fail_if_no_sox

diff --git a/torchaudio/csrc/sox/effects.cpp b/torchaudio/csrc/sox/effects.cpp
@@ -89,18 +89,15 @@ auto apply_effects_file(
     c10::optional<bool> normalize,
     c10::optional<bool> channels_first,
     const c10::optional<std::string>& format)
-    -> c10::optional<std::tuple<torch::Tensor, int64_t>> {
+    -> std::tuple<torch::Tensor, int64_t> {
   // Open input file
   SoxFormat sf(sox_open_read(
       path.c_str(),
       /*signal=*/nullptr,
       /*encoding=*/nullptr,
       /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
 
-  if (static_cast<sox_format_t*>(sf) == nullptr ||
-      sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
-    return {};
-  }
+  validate_input_file(sf, path);
 
   const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision);
 

diff --git a/torchaudio/csrc/sox/effects.h b/torchaudio/csrc/sox/effects.h
@@ -22,7 +22,7 @@ auto apply_effects_file(
     c10::optional<bool> normalize,
     c10::optional<bool> channels_first,
     const c10::optional<std::string>& format)
-    -> c10::optional<std::tuple<torch::Tensor, int64_t>>;
+    -> std::tuple<torch::Tensor, int64_t>;
 
 } // namespace torchaudio::sox
 

diff --git a/torchaudio/csrc/sox/io.cpp b/torchaudio/csrc/sox/io.cpp
@@ -8,7 +8,7 @@ using namespace torch::indexing;
 
 namespace torchaudio::sox {
 
-c10::optional<MetaDataTuple> get_info_file(
+std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> get_info_file(
     const std::string& path,
     const c10::optional<std::string>& format) {
   SoxFormat sf(sox_open_read(
@@ -17,12 +17,9 @@ c10::optional<MetaDataTuple> get_info_file(
       /*encoding=*/nullptr,
       /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
 
-  if (static_cast<sox_format_t*>(sf) == nullptr ||
-      sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
-    return {};
-  }
+  validate_input_file(sf, path);
 
-  return std::forward_as_tuple(
+  return std::make_tuple(
       static_cast<int64_t>(sf->signal.rate),
       static_cast<int64_t>(sf->signal.length / sf->signal.channels),
       static_cast<int64_t>(sf->signal.channels),
@@ -58,7 +55,7 @@ std::vector<std::vector<std::string>> get_effects(
   return effects;
 }
 
-c10::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
+std::tuple<torch::Tensor, int64_t> load_audio_file(
     const std::string& path,
     const c10::optional<int64_t>& frame_offset,
     const c10::optional<int64_t>& num_frames,

diff --git a/torchaudio/csrc/sox/io.h b/torchaudio/csrc/sox/io.h
@@ -11,14 +11,11 @@ auto get_effects(
     const c10::optional<int64_t>& num_frames)
     -> std::vector<std::vector<std::string>>;
 
-using MetaDataTuple =
-    std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;
-
-c10::optional<MetaDataTuple> get_info_file(
+std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> get_info_file(
     const std::string& path,
     const c10::optional<std::string>& format);
 
-c10::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
+std::tuple<torch::Tensor, int64_t> load_audio_file(
     const std::string& path,
     const c10::optional<int64_t>& frame_offset,
     const c10::optional<int64_t>& num_frames,

diff --git a/torchaudio/csrc/sox/pybind/effects.cpp b/torchaudio/csrc/sox/pybind/effects.cpp
diff --git a/torchaudio/csrc/sox/pybind/effects.h b/torchaudio/csrc/sox/pybind/effects.h