From b52a3f4934048b137d6ef500b76785fd3d9edfe8 Mon Sep 17 00:00:00 2001 From: moto <855818+mthrok@users.noreply.github.com> Date: Thu, 27 Jul 2023 18:01:11 -0400 Subject: [PATCH] Remove ffmpeg fallback from sox_io backend In #2419, we added ffmpeg as fallback for sox_io backend. The was a warkaround for solving the issue with libmad removal. Now that we introduced `backend` argument to I/O functions, and libsox integration is moved to dynamic binding where users can use libsox with libmad integration, we do not need the workaround. This commit is based on reverting #2416 (fd7ace17938c74d0928987b9525dbe7799b328fa). --- .../backend/sox_io/info_test.py | 3 ++ torchaudio/backend/sox_io_backend.py | 36 ++----------------- torchaudio/csrc/sox/effects.cpp | 7 ++-- torchaudio/csrc/sox/effects.h | 2 +- torchaudio/csrc/sox/io.cpp | 11 +++--- torchaudio/csrc/sox/io.h | 7 ++-- torchaudio/csrc/sox/pybind/effects.cpp | 5 ++- torchaudio/csrc/sox/pybind/effects.h | 3 +- torchaudio/csrc/sox/pybind/io.cpp | 7 ++-- torchaudio/csrc/sox/pybind/io.h | 8 ++--- torchaudio/csrc/sox/utils.h | 4 +++ torchaudio/sox_effects/sox_effects.py | 5 +-- 12 files changed, 27 insertions(+), 71 deletions(-) diff --git a/test/torchaudio_unittest/backend/sox_io/info_test.py b/test/torchaudio_unittest/backend/sox_io/info_test.py index 9a04af6181a..ce35fdf467d 100644 --- a/test/torchaudio_unittest/backend/sox_io/info_test.py +++ b/test/torchaudio_unittest/backend/sox_io/info_test.py @@ -277,6 +277,7 @@ def test_htk(self): @skipIfNoSox +@skipIfNoSoxDecoder("opus") class TestInfoOpus(PytorchTestCase): @parameterized.expand( list( @@ -290,6 +291,8 @@ class TestInfoOpus(PytorchTestCase): ) def test_opus(self, bitrate, num_channels, compression_level): """`sox_io_backend.info` can check opus file correcty""" + import torchaudio + torchaudio.utils.sox_utils.set_verbosity(6) path = get_asset_path("io", f"{bitrate}_{compression_level}_{num_channels}ch.opus") info = sox_io_backend.info(path) assert info.sample_rate == 48000 diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py index 7908646438a..009041fc66d 100644 --- a/torchaudio/backend/sox_io_backend.py +++ b/torchaudio/backend/sox_io_backend.py @@ -7,33 +7,6 @@ from .common import AudioMetaData -# Note: need to comply TorchScript syntax -- need annotation and no f-string -def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData: - raise RuntimeError("Failed to fetch metadata from {}".format(filepath)) - - -# Note: need to comply TorchScript syntax -- need annotation and no f-string -def _fail_load( - filepath: str, - frame_offset: int = 0, - num_frames: int = -1, - normalize: bool = True, - channels_first: bool = True, - format: Optional[str] = None, -) -> Tuple[torch.Tensor, int]: - raise RuntimeError("Failed to load audio from {}".format(filepath)) - - -if torchaudio._extension._FFMPEG_EXT is not None: - import torchaudio.io._compat as _compat - - _fallback_info = _compat.info_audio - _fallback_load = _compat.load_audio -else: - _fallback_info = _fail_info - _fallback_load = _fail_load - - @torchaudio._extension.fail_if_no_sox def info( filepath: str, @@ -58,9 +31,7 @@ def info( raise RuntimeError("sox_io backend does not support file-like object.") filepath = os.fspath(filepath) sinfo = torch.ops.torchaudio.sox_io_get_info(filepath, format) - if sinfo is not None: - return AudioMetaData(*sinfo) - return _fallback_info(filepath, format) + return AudioMetaData(*sinfo) @torchaudio._extension.fail_if_no_sox @@ -153,12 +124,9 @@ def load( if hasattr(filepath, "read"): raise RuntimeError("sox_io backend does not support file-like object.") filepath = os.fspath(filepath) - ret = torch.ops.torchaudio.sox_io_load_audio_file( + return torch.ops.torchaudio.sox_io_load_audio_file( filepath, frame_offset, num_frames, normalize, channels_first, format ) - if ret is not None: - return ret - return _fallback_load(filepath, frame_offset, num_frames, normalize, channels_first, format) @torchaudio._extension.fail_if_no_sox diff --git a/torchaudio/csrc/sox/effects.cpp b/torchaudio/csrc/sox/effects.cpp index a159663a109..2b4d851d95d 100644 --- a/torchaudio/csrc/sox/effects.cpp +++ b/torchaudio/csrc/sox/effects.cpp @@ -89,7 +89,7 @@ auto apply_effects_file( c10::optional normalize, c10::optional channels_first, const c10::optional& format) - -> c10::optional> { + -> std::tuple { // Open input file SoxFormat sf(sox_open_read( path.c_str(), @@ -97,10 +97,7 @@ auto apply_effects_file( /*encoding=*/nullptr, /*filetype=*/format.has_value() ? format.value().c_str() : nullptr)); - if (static_cast(sf) == nullptr || - sf->encoding.encoding == SOX_ENCODING_UNKNOWN) { - return {}; - } + validate_input_file(sf, path); const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision); diff --git a/torchaudio/csrc/sox/effects.h b/torchaudio/csrc/sox/effects.h index 70e59f887f0..a4974adf461 100644 --- a/torchaudio/csrc/sox/effects.h +++ b/torchaudio/csrc/sox/effects.h @@ -22,7 +22,7 @@ auto apply_effects_file( c10::optional normalize, c10::optional channels_first, const c10::optional& format) - -> c10::optional>; + -> std::tuple; } // namespace torchaudio::sox diff --git a/torchaudio/csrc/sox/io.cpp b/torchaudio/csrc/sox/io.cpp index b8aac89372c..f69b5c3d7c0 100644 --- a/torchaudio/csrc/sox/io.cpp +++ b/torchaudio/csrc/sox/io.cpp @@ -8,7 +8,7 @@ using namespace torch::indexing; namespace torchaudio::sox { -c10::optional get_info_file( +std::tuple get_info_file( const std::string& path, const c10::optional& format) { SoxFormat sf(sox_open_read( @@ -17,12 +17,9 @@ c10::optional get_info_file( /*encoding=*/nullptr, /*filetype=*/format.has_value() ? format.value().c_str() : nullptr)); - if (static_cast(sf) == nullptr || - sf->encoding.encoding == SOX_ENCODING_UNKNOWN) { - return {}; - } + validate_input_file(sf, path); - return std::forward_as_tuple( + return std::make_tuple( static_cast(sf->signal.rate), static_cast(sf->signal.length / sf->signal.channels), static_cast(sf->signal.channels), @@ -58,7 +55,7 @@ std::vector> get_effects( return effects; } -c10::optional> load_audio_file( +std::tuple load_audio_file( const std::string& path, const c10::optional& frame_offset, const c10::optional& num_frames, diff --git a/torchaudio/csrc/sox/io.h b/torchaudio/csrc/sox/io.h index 7ef84e48ad3..cf18bca5ed1 100644 --- a/torchaudio/csrc/sox/io.h +++ b/torchaudio/csrc/sox/io.h @@ -11,14 +11,11 @@ auto get_effects( const c10::optional& num_frames) -> std::vector>; -using MetaDataTuple = - std::tuple; - -c10::optional get_info_file( +std::tuple get_info_file( const std::string& path, const c10::optional& format); -c10::optional> load_audio_file( +std::tuple load_audio_file( const std::string& path, const c10::optional& frame_offset, const c10::optional& num_frames, diff --git a/torchaudio/csrc/sox/pybind/effects.cpp b/torchaudio/csrc/sox/pybind/effects.cpp index 9b9e04bb05f..d2d44fba9e6 100644 --- a/torchaudio/csrc/sox/pybind/effects.cpp +++ b/torchaudio/csrc/sox/pybind/effects.cpp @@ -30,8 +30,7 @@ auto apply_effects_fileobj( const std::vector>& effects, c10::optional normalize, c10::optional channels_first, - c10::optional format) - -> c10::optional> { + c10::optional format) -> std::tuple { // Prepare the buffer used throughout the lifecycle of SoxEffectChain. // // For certain format (such as FLAC), libsox keeps reading the content at @@ -112,7 +111,7 @@ auto apply_effects_fileobj( normalize.value_or(true), channels_first_); - return std::forward_as_tuple( + return std::make_tuple( tensor, static_cast(chain.getOutputSampleRate())); } diff --git a/torchaudio/csrc/sox/pybind/effects.h b/torchaudio/csrc/sox/pybind/effects.h index 1cdcef33307..375395e8c74 100644 --- a/torchaudio/csrc/sox/pybind/effects.h +++ b/torchaudio/csrc/sox/pybind/effects.h @@ -10,8 +10,7 @@ auto apply_effects_fileobj( const std::vector>& effects, c10::optional normalize, c10::optional channels_first, - c10::optional format) - -> c10::optional>; + c10::optional format) -> std::tuple; } // namespace torchaudio::sox diff --git a/torchaudio/csrc/sox/pybind/io.cpp b/torchaudio/csrc/sox/pybind/io.cpp index 0ccf8416a64..c885d16f0b8 100644 --- a/torchaudio/csrc/sox/pybind/io.cpp +++ b/torchaudio/csrc/sox/pybind/io.cpp @@ -10,7 +10,7 @@ namespace torchaudio::sox { auto get_info_fileobj(py::object fileobj, c10::optional format) - -> c10::optional { + -> std::tuple { // Prepare in-memory file object // When libsox opens a file, it also reads the header. // When opening a file there are two functions that might touch FILE* (and the @@ -63,7 +63,7 @@ auto get_info_fileobj(py::object fileobj, c10::optional format) return c10::optional{}; } - return std::forward_as_tuple( + return std::make_tuple( static_cast(sf->signal.rate), static_cast(sf->signal.length / sf->signal.channels), static_cast(sf->signal.channels), @@ -77,8 +77,7 @@ auto load_audio_fileobj( c10::optional num_frames, c10::optional normalize, c10::optional channels_first, - c10::optional format) - -> c10::optional> { + c10::optional format) -> std::tuple { auto effects = get_effects(frame_offset, num_frames); return apply_effects_fileobj( std::move(fileobj), diff --git a/torchaudio/csrc/sox/pybind/io.h b/torchaudio/csrc/sox/pybind/io.h index 02d874c350d..ccc760547e2 100644 --- a/torchaudio/csrc/sox/pybind/io.h +++ b/torchaudio/csrc/sox/pybind/io.h @@ -5,11 +5,8 @@ namespace torchaudio::sox { -using MetaDataTuple = - std::tuple; - auto get_info_fileobj(py::object fileobj, c10::optional format) - -> c10::optional; + -> std::tuple; auto load_audio_fileobj( py::object fileobj, @@ -17,8 +14,7 @@ auto load_audio_fileobj( c10::optional num_frames, c10::optional normalize, c10::optional channels_first, - c10::optional format) - -> c10::optional>; + c10::optional format) -> std::tuple; void save_audio_fileobj( py::object fileobj, diff --git a/torchaudio/csrc/sox/utils.h b/torchaudio/csrc/sox/utils.h index b1183659999..281b32b6103 100644 --- a/torchaudio/csrc/sox/utils.h +++ b/torchaudio/csrc/sox/utils.h @@ -51,6 +51,10 @@ struct SoxFormat { sox_format_t* fd_; }; +/// +/// Verify that input file is found, has known encoding, and not empty +void validate_input_file(const SoxFormat& sf, const std::string& path); + /// /// Verify that input Tensor is 2D, CPU and either uin8, int16, int32 or float32 void validate_input_tensor(const torch::Tensor&); diff --git a/torchaudio/sox_effects/sox_effects.py b/torchaudio/sox_effects/sox_effects.py index c343680b650..e7f8be74089 100644 --- a/torchaudio/sox_effects/sox_effects.py +++ b/torchaudio/sox_effects/sox_effects.py @@ -269,7 +269,4 @@ def apply_effects_file( "Please use torchaudio.io.AudioEffector." ) path = os.fspath(path) - ret = torch.ops.torchaudio.sox_effects_apply_effects_file(path, effects, normalize, channels_first, format) - if ret is not None: - return ret - raise RuntimeError("Failed to load audio from {}".format(path)) + return torch.ops.torchaudio.sox_effects_apply_effects_file(path, effects, normalize, channels_first, format)