Skip to content

Commit

Permalink
Remove ffmpeg fallback from sox_io backend
Browse files Browse the repository at this point in the history
In #2419, we added ffmpeg as fallback for sox_io backend.
The was a warkaround for solving the issue with libmad removal.

Now that we introduced `backend` argument to I/O functions,
and libsox integration is moved to dynamic binding where users can
use libsox with libmad integration, we do not need the workaround.

This commit is based on reverting #2416 (fd7ace1).
  • Loading branch information
mthrok committed Jul 28, 2023
1 parent 7368e33 commit b52a3f4
Show file tree
Hide file tree
Showing 12 changed files with 27 additions and 71 deletions.
3 changes: 3 additions & 0 deletions test/torchaudio_unittest/backend/sox_io/info_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ def test_htk(self):


@skipIfNoSox
@skipIfNoSoxDecoder("opus")
class TestInfoOpus(PytorchTestCase):
@parameterized.expand(
list(
Expand All @@ -290,6 +291,8 @@ class TestInfoOpus(PytorchTestCase):
)
def test_opus(self, bitrate, num_channels, compression_level):
"""`sox_io_backend.info` can check opus file correcty"""
import torchaudio
torchaudio.utils.sox_utils.set_verbosity(6)
path = get_asset_path("io", f"{bitrate}_{compression_level}_{num_channels}ch.opus")
info = sox_io_backend.info(path)
assert info.sample_rate == 48000
Expand Down
36 changes: 2 additions & 34 deletions torchaudio/backend/sox_io_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,6 @@
from .common import AudioMetaData


# Note: need to comply TorchScript syntax -- need annotation and no f-string
def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData:
raise RuntimeError("Failed to fetch metadata from {}".format(filepath))


# Note: need to comply TorchScript syntax -- need annotation and no f-string
def _fail_load(
filepath: str,
frame_offset: int = 0,
num_frames: int = -1,
normalize: bool = True,
channels_first: bool = True,
format: Optional[str] = None,
) -> Tuple[torch.Tensor, int]:
raise RuntimeError("Failed to load audio from {}".format(filepath))


if torchaudio._extension._FFMPEG_EXT is not None:
import torchaudio.io._compat as _compat

_fallback_info = _compat.info_audio
_fallback_load = _compat.load_audio
else:
_fallback_info = _fail_info
_fallback_load = _fail_load


@torchaudio._extension.fail_if_no_sox
def info(
filepath: str,
Expand All @@ -58,9 +31,7 @@ def info(
raise RuntimeError("sox_io backend does not support file-like object.")
filepath = os.fspath(filepath)
sinfo = torch.ops.torchaudio.sox_io_get_info(filepath, format)
if sinfo is not None:
return AudioMetaData(*sinfo)
return _fallback_info(filepath, format)
return AudioMetaData(*sinfo)


@torchaudio._extension.fail_if_no_sox
Expand Down Expand Up @@ -153,12 +124,9 @@ def load(
if hasattr(filepath, "read"):
raise RuntimeError("sox_io backend does not support file-like object.")
filepath = os.fspath(filepath)
ret = torch.ops.torchaudio.sox_io_load_audio_file(
return torch.ops.torchaudio.sox_io_load_audio_file(
filepath, frame_offset, num_frames, normalize, channels_first, format
)
if ret is not None:
return ret
return _fallback_load(filepath, frame_offset, num_frames, normalize, channels_first, format)


@torchaudio._extension.fail_if_no_sox
Expand Down
7 changes: 2 additions & 5 deletions torchaudio/csrc/sox/effects.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,15 @@ auto apply_effects_file(
c10::optional<bool> normalize,
c10::optional<bool> channels_first,
const c10::optional<std::string>& format)
-> c10::optional<std::tuple<torch::Tensor, int64_t>> {
-> std::tuple<torch::Tensor, int64_t> {
// Open input file
SoxFormat sf(sox_open_read(
path.c_str(),
/*signal=*/nullptr,
/*encoding=*/nullptr,
/*filetype=*/format.has_value() ? format.value().c_str() : nullptr));

if (static_cast<sox_format_t*>(sf) == nullptr ||
sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
return {};
}
validate_input_file(sf, path);

const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision);

Expand Down
2 changes: 1 addition & 1 deletion torchaudio/csrc/sox/effects.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ auto apply_effects_file(
c10::optional<bool> normalize,
c10::optional<bool> channels_first,
const c10::optional<std::string>& format)
-> c10::optional<std::tuple<torch::Tensor, int64_t>>;
-> std::tuple<torch::Tensor, int64_t>;

} // namespace torchaudio::sox

Expand Down
11 changes: 4 additions & 7 deletions torchaudio/csrc/sox/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ using namespace torch::indexing;

namespace torchaudio::sox {

c10::optional<MetaDataTuple> get_info_file(
std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> get_info_file(
const std::string& path,
const c10::optional<std::string>& format) {
SoxFormat sf(sox_open_read(
Expand All @@ -17,12 +17,9 @@ c10::optional<MetaDataTuple> get_info_file(
/*encoding=*/nullptr,
/*filetype=*/format.has_value() ? format.value().c_str() : nullptr));

if (static_cast<sox_format_t*>(sf) == nullptr ||
sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
return {};
}
validate_input_file(sf, path);

return std::forward_as_tuple(
return std::make_tuple(
static_cast<int64_t>(sf->signal.rate),
static_cast<int64_t>(sf->signal.length / sf->signal.channels),
static_cast<int64_t>(sf->signal.channels),
Expand Down Expand Up @@ -58,7 +55,7 @@ std::vector<std::vector<std::string>> get_effects(
return effects;
}

c10::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
std::tuple<torch::Tensor, int64_t> load_audio_file(
const std::string& path,
const c10::optional<int64_t>& frame_offset,
const c10::optional<int64_t>& num_frames,
Expand Down
7 changes: 2 additions & 5 deletions torchaudio/csrc/sox/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,11 @@ auto get_effects(
const c10::optional<int64_t>& num_frames)
-> std::vector<std::vector<std::string>>;

using MetaDataTuple =
std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;

c10::optional<MetaDataTuple> get_info_file(
std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> get_info_file(
const std::string& path,
const c10::optional<std::string>& format);

c10::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
std::tuple<torch::Tensor, int64_t> load_audio_file(
const std::string& path,
const c10::optional<int64_t>& frame_offset,
const c10::optional<int64_t>& num_frames,
Expand Down
5 changes: 2 additions & 3 deletions torchaudio/csrc/sox/pybind/effects.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ auto apply_effects_fileobj(
const std::vector<std::vector<std::string>>& effects,
c10::optional<bool> normalize,
c10::optional<bool> channels_first,
c10::optional<std::string> format)
-> c10::optional<std::tuple<torch::Tensor, int64_t>> {
c10::optional<std::string> format) -> std::tuple<torch::Tensor, int64_t> {
// Prepare the buffer used throughout the lifecycle of SoxEffectChain.
//
// For certain format (such as FLAC), libsox keeps reading the content at
Expand Down Expand Up @@ -112,7 +111,7 @@ auto apply_effects_fileobj(
normalize.value_or(true),
channels_first_);

return std::forward_as_tuple(
return std::make_tuple(
tensor, static_cast<int64_t>(chain.getOutputSampleRate()));
}

Expand Down
3 changes: 1 addition & 2 deletions torchaudio/csrc/sox/pybind/effects.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ auto apply_effects_fileobj(
const std::vector<std::vector<std::string>>& effects,
c10::optional<bool> normalize,
c10::optional<bool> channels_first,
c10::optional<std::string> format)
-> c10::optional<std::tuple<torch::Tensor, int64_t>>;
c10::optional<std::string> format) -> std::tuple<torch::Tensor, int64_t>;

} // namespace torchaudio::sox

Expand Down
7 changes: 3 additions & 4 deletions torchaudio/csrc/sox/pybind/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
namespace torchaudio::sox {

auto get_info_fileobj(py::object fileobj, c10::optional<std::string> format)
-> c10::optional<MetaDataTuple> {
-> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> {
// Prepare in-memory file object
// When libsox opens a file, it also reads the header.
// When opening a file there are two functions that might touch FILE* (and the
Expand Down Expand Up @@ -63,7 +63,7 @@ auto get_info_fileobj(py::object fileobj, c10::optional<std::string> format)
return c10::optional<MetaDataTuple>{};
}

return std::forward_as_tuple(
return std::make_tuple(
static_cast<int64_t>(sf->signal.rate),
static_cast<int64_t>(sf->signal.length / sf->signal.channels),
static_cast<int64_t>(sf->signal.channels),
Expand All @@ -77,8 +77,7 @@ auto load_audio_fileobj(
c10::optional<int64_t> num_frames,
c10::optional<bool> normalize,
c10::optional<bool> channels_first,
c10::optional<std::string> format)
-> c10::optional<std::tuple<torch::Tensor, int64_t>> {
c10::optional<std::string> format) -> std::tuple<torch::Tensor, int64_t> {
auto effects = get_effects(frame_offset, num_frames);
return apply_effects_fileobj(
std::move(fileobj),
Expand Down
8 changes: 2 additions & 6 deletions torchaudio/csrc/sox/pybind/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,16 @@

namespace torchaudio::sox {

using MetaDataTuple =
std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;

auto get_info_fileobj(py::object fileobj, c10::optional<std::string> format)
-> c10::optional<MetaDataTuple>;
-> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;

auto load_audio_fileobj(
py::object fileobj,
c10::optional<int64_t> frame_offset,
c10::optional<int64_t> num_frames,
c10::optional<bool> normalize,
c10::optional<bool> channels_first,
c10::optional<std::string> format)
-> c10::optional<std::tuple<torch::Tensor, int64_t>>;
c10::optional<std::string> format) -> std::tuple<torch::Tensor, int64_t>;

void save_audio_fileobj(
py::object fileobj,
Expand Down
4 changes: 4 additions & 0 deletions torchaudio/csrc/sox/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ struct SoxFormat {
sox_format_t* fd_;
};

///
/// Verify that input file is found, has known encoding, and not empty
void validate_input_file(const SoxFormat& sf, const std::string& path);

///
/// Verify that input Tensor is 2D, CPU and either uin8, int16, int32 or float32
void validate_input_tensor(const torch::Tensor&);
Expand Down
5 changes: 1 addition & 4 deletions torchaudio/sox_effects/sox_effects.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,4 @@ def apply_effects_file(
"Please use torchaudio.io.AudioEffector."
)
path = os.fspath(path)
ret = torch.ops.torchaudio.sox_effects_apply_effects_file(path, effects, normalize, channels_first, format)
if ret is not None:
return ret
raise RuntimeError("Failed to load audio from {}".format(path))
return torch.ops.torchaudio.sox_effects_apply_effects_file(path, effects, normalize, channels_first, format)

0 comments on commit b52a3f4

Please sign in to comment.