Skip to content

Commit

Permalink
Remove ffmpeg fallback from sox_io backend
Browse files Browse the repository at this point in the history
In #2419, we added ffmpeg as fallback for sox_io backend.
The was a warkaround for solving the issue with libmad removal.

Now that we introduced `backend` argument to I/O functions,
and libsox integration is moved to dynamic binding where users can
use libsox with libmad integration, we do not need the workaround.

This commit is based on reverting #2416 (fd7ace1).
  • Loading branch information
mthrok committed Jul 28, 2023
1 parent 7368e33 commit 2121368
Show file tree
Hide file tree
Showing 18 changed files with 20 additions and 767 deletions.
19 changes: 0 additions & 19 deletions test/torchaudio_unittest/backend/dispatcher/sox/load_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,25 +318,6 @@ def test_sox(self, frame_offset, num_frames, channels_first, normalize):

self._test(torch.ops.torchaudio.sox_io_load_audio_file, frame_offset, num_frames, channels_first, normalize)

@nested_params(
[0, 1, 10, 100, 1000],
[-1, 1, 10, 100, 1000],
[True, False],
[True, False],
)
def test_ffmpeg(self, frame_offset, num_frames, channels_first, normalize):
"""The combination of properly changes the output tensor"""
from torchaudio.io._compat import load_audio, load_audio_fileobj

self._test(load_audio, frame_offset, num_frames, channels_first, normalize)

# test file-like obj
def func(path, *args):
with open(path, "rb") as fileobj:
return load_audio_fileobj(fileobj, *args)

self._test(func, frame_offset, num_frames, channels_first, normalize)


@skipIfNoSox
@skipIfNoExec("sox")
Expand Down
7 changes: 3 additions & 4 deletions test/torchaudio_unittest/backend/sox_io/info_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ def test_htk(self):


@skipIfNoSox
@skipIfNoSoxDecoder("opus")
class TestInfoOpus(PytorchTestCase):
@parameterized.expand(
list(
Expand Down Expand Up @@ -304,17 +305,15 @@ class TestLoadWithoutExtension(PytorchTestCase):
def test_mp3(self):
"""MP3 file without extension can be loaded
Originally, we added `format` argument for this case, but now we use FFmpeg
for MP3 decoding, which works even without `format` argument.
https://github.com/pytorch/audio/issues/1040
The file was generated with the following command
ffmpeg -f lavfi -i "sine=frequency=1000:duration=5" -ar 16000 -f mp3 test_noext
"""
path = get_asset_path("mp3_without_ext")
sinfo = sox_io_backend.info(path)
sinfo = sox_io_backend.info(path, format="mp3")
assert sinfo.sample_rate == 16000
assert sinfo.num_frames == 80000
assert sinfo.num_frames == 81216
assert sinfo.num_channels == 1
assert sinfo.bits_per_sample == 0 # bit_per_sample is irrelevant for compressed formats
assert sinfo.encoding == "MP3"
Expand Down
23 changes: 1 addition & 22 deletions test/torchaudio_unittest/backend/sox_io/load_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,40 +315,19 @@ def test_sox(self, frame_offset, num_frames, channels_first, normalize):

self._test(torch.ops.torchaudio.sox_io_load_audio_file, frame_offset, num_frames, channels_first, normalize)

@nested_params(
[0, 1, 10, 100, 1000],
[-1, 1, 10, 100, 1000],
[True, False],
[True, False],
)
def test_ffmpeg(self, frame_offset, num_frames, channels_first, normalize):
"""The combination of properly changes the output tensor"""
from torchaudio.io._compat import load_audio, load_audio_fileobj

self._test(load_audio, frame_offset, num_frames, channels_first, normalize)

# test file-like obj
def func(path, *args):
with open(path, "rb") as fileobj:
return load_audio_fileobj(fileobj, *args)

self._test(func, frame_offset, num_frames, channels_first, normalize)


@skipIfNoSox
class TestLoadWithoutExtension(PytorchTestCase):
def test_mp3(self):
"""MP3 file without extension can be loaded
Originally, we added `format` argument for this case, but now we use FFmpeg
for MP3 decoding, which works even without `format` argument.
https://github.com/pytorch/audio/issues/1040
The file was generated with the following command
ffmpeg -f lavfi -i "sine=frequency=1000:duration=5" -ar 16000 -f mp3 test_noext
"""
path = get_asset_path("mp3_without_ext")
_, sr = sox_io_backend.load(path)
_, sr = sox_io_backend.load(path, format="mp3")
assert sr == 16000


Expand Down
36 changes: 2 additions & 34 deletions torchaudio/backend/sox_io_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,6 @@
from .common import AudioMetaData


# Note: need to comply TorchScript syntax -- need annotation and no f-string
def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData:
raise RuntimeError("Failed to fetch metadata from {}".format(filepath))


# Note: need to comply TorchScript syntax -- need annotation and no f-string
def _fail_load(
filepath: str,
frame_offset: int = 0,
num_frames: int = -1,
normalize: bool = True,
channels_first: bool = True,
format: Optional[str] = None,
) -> Tuple[torch.Tensor, int]:
raise RuntimeError("Failed to load audio from {}".format(filepath))


if torchaudio._extension._FFMPEG_EXT is not None:
import torchaudio.io._compat as _compat

_fallback_info = _compat.info_audio
_fallback_load = _compat.load_audio
else:
_fallback_info = _fail_info
_fallback_load = _fail_load


@torchaudio._extension.fail_if_no_sox
def info(
filepath: str,
Expand All @@ -58,9 +31,7 @@ def info(
raise RuntimeError("sox_io backend does not support file-like object.")
filepath = os.fspath(filepath)
sinfo = torch.ops.torchaudio.sox_io_get_info(filepath, format)
if sinfo is not None:
return AudioMetaData(*sinfo)
return _fallback_info(filepath, format)
return AudioMetaData(*sinfo)


@torchaudio._extension.fail_if_no_sox
Expand Down Expand Up @@ -153,12 +124,9 @@ def load(
if hasattr(filepath, "read"):
raise RuntimeError("sox_io backend does not support file-like object.")
filepath = os.fspath(filepath)
ret = torch.ops.torchaudio.sox_io_load_audio_file(
return torch.ops.torchaudio.sox_io_load_audio_file(
filepath, frame_offset, num_frames, normalize, channels_first, format
)
if ret is not None:
return ret
return _fallback_load(filepath, frame_offset, num_frames, normalize, channels_first, format)


@torchaudio._extension.fail_if_no_sox
Expand Down
7 changes: 2 additions & 5 deletions torchaudio/csrc/sox/effects.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,15 @@ auto apply_effects_file(
c10::optional<bool> normalize,
c10::optional<bool> channels_first,
const c10::optional<std::string>& format)
-> c10::optional<std::tuple<torch::Tensor, int64_t>> {
-> std::tuple<torch::Tensor, int64_t> {
// Open input file
SoxFormat sf(sox_open_read(
path.c_str(),
/*signal=*/nullptr,
/*encoding=*/nullptr,
/*filetype=*/format.has_value() ? format.value().c_str() : nullptr));

if (static_cast<sox_format_t*>(sf) == nullptr ||
sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
return {};
}
validate_input_file(sf, path);

const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision);

Expand Down
2 changes: 1 addition & 1 deletion torchaudio/csrc/sox/effects.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ auto apply_effects_file(
c10::optional<bool> normalize,
c10::optional<bool> channels_first,
const c10::optional<std::string>& format)
-> c10::optional<std::tuple<torch::Tensor, int64_t>>;
-> std::tuple<torch::Tensor, int64_t>;

} // namespace torchaudio::sox

Expand Down
11 changes: 4 additions & 7 deletions torchaudio/csrc/sox/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ using namespace torch::indexing;

namespace torchaudio::sox {

c10::optional<MetaDataTuple> get_info_file(
std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> get_info_file(
const std::string& path,
const c10::optional<std::string>& format) {
SoxFormat sf(sox_open_read(
Expand All @@ -17,12 +17,9 @@ c10::optional<MetaDataTuple> get_info_file(
/*encoding=*/nullptr,
/*filetype=*/format.has_value() ? format.value().c_str() : nullptr));

if (static_cast<sox_format_t*>(sf) == nullptr ||
sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
return {};
}
validate_input_file(sf, path);

return std::forward_as_tuple(
return std::make_tuple(
static_cast<int64_t>(sf->signal.rate),
static_cast<int64_t>(sf->signal.length / sf->signal.channels),
static_cast<int64_t>(sf->signal.channels),
Expand Down Expand Up @@ -58,7 +55,7 @@ std::vector<std::vector<std::string>> get_effects(
return effects;
}

c10::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
std::tuple<torch::Tensor, int64_t> load_audio_file(
const std::string& path,
const c10::optional<int64_t>& frame_offset,
const c10::optional<int64_t>& num_frames,
Expand Down
7 changes: 2 additions & 5 deletions torchaudio/csrc/sox/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,11 @@ auto get_effects(
const c10::optional<int64_t>& num_frames)
-> std::vector<std::vector<std::string>>;

using MetaDataTuple =
std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;

c10::optional<MetaDataTuple> get_info_file(
std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> get_info_file(
const std::string& path,
const c10::optional<std::string>& format);

c10::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
std::tuple<torch::Tensor, int64_t> load_audio_file(
const std::string& path,
const c10::optional<int64_t>& frame_offset,
const c10::optional<int64_t>& num_frames,
Expand Down
119 changes: 0 additions & 119 deletions torchaudio/csrc/sox/pybind/effects.cpp

This file was deleted.

18 changes: 0 additions & 18 deletions torchaudio/csrc/sox/pybind/effects.h

This file was deleted.

Loading

0 comments on commit 2121368

Please sign in to comment.