Extend torchaudio support to 2.1.x (#3)

* Update README.md * Update README.md * Update README.md * Update README.md * minor fixes for 4.1.0a1 (facebookresearch#552) * minor fixes for 4.1.0a1 print out the exception when calling callback ensures all threads can be stopped when interrupting separation add release data for 4.0.1 * Fix model_idx_in_bag always zero * fix linter * Fix can't separate empty audio * Calls callback when skipping empty audio * Add description for aborting * Does not ignore callback exception * Fix linter * Does not ignore exception * Disable torchaudio 2.2+ * Uses epsilon to deal with empty audio * Reraises exception in callback * Ensure the pool stops when encountering exception * Update windows.md for latest instructions * Minor documentation updates (facebookresearch#565) * Minor documentation updates * Update readme * Update api.md * Fix segment defined in bag can't override model * merge from adefossez/demucs * Update README.md * Extend torchaudio support to 2.1.x * Use correct import statement * Calculate FFT on CPU also when device is XPU (Intel GPU) --------- Co-authored-by: Alexandre Défossez <[email protected]> Co-authored-by: William Dye <[email protected]>
adefossez · Jan 12, 2024 · b3398e4 · b3398e4
1 parent a4decbd
commit b3398e4
Show file tree

Hide file tree

Showing 11 changed files with 39 additions and 14 deletions.
diff --git a/demucs/api.py b/demucs/api.py
@@ -22,6 +22,7 @@
 
 import subprocess
 
+from . import audio_legacy
 import torch as th
 import torchaudio as ta
 

diff --git a/demucs/audio.py b/demucs/audio.py
@@ -10,6 +10,7 @@
 import lameenc
 import julius
 import numpy as np
+from . import audio_legacy
 import torch
 import torchaudio as ta
 import typing as tp

diff --git a/demucs/audio_legacy.py b/demucs/audio_legacy.py
@@ -0,0 +1,17 @@
+# This file is to extend support for torchaudio 2.1
+
+import importlib
+import os
+import sys
+import warnings
+
+if not "torchaudio" in sys.modules:
+    os.environ["TORCHAUDIO_USE_BACKEND_DISPATCHER"] = "0"
+elif os.getenv("TORCHAUDIO_USE_BACKEND_DISPATCHER", default="1") == "1":
+    if sys.modules["torchaudio"].__version__ >= "2.1":
+        os.environ["TORCHAUDIO_USE_BACKEND_DISPATCHER"] = "0"
+        importlib.reload(sys.modules["torchaudio"])
+        warnings.warn(
+            "TORCHAUDIO_USE_BACKEND_DISPATCHER is set to 0 and torchaudio is reloaded.",
+            ImportWarning,
+        )
diff --git a/demucs/hdemucs.py b/demucs/hdemucs.py
@@ -776,16 +776,18 @@ def forward(self, mix):
         # demucs issue #435 ##432
         # NOTE: in this case z already is on cpu
         # TODO: remove this when mps supports complex numbers
-        x_is_mps = x.device.type == "mps"
-        if x_is_mps:
+        x_is_mps_xpu = x.device.type in ["mps", "xpu"]
+        x_device = x.device
+        if x_is_mps_xpu:
             x = x.cpu()
 
         zout = self._mask(z, x)
         x = self._ispec(zout, length)
 
         # back to mps device
-        if x_is_mps:
-            x = x.to('mps')
+        if x_is_mps_xpu:
+            x = x.to(x_device)
+
 
         if self.hybrid:
             xt = xt.view(B, S, -1, length)

diff --git a/demucs/htdemucs.py b/demucs/htdemucs.py
@@ -629,8 +629,9 @@ def forward(self, mix):
         # demucs issue #435 ##432
         # NOTE: in this case z already is on cpu
         # TODO: remove this when mps supports complex numbers
-        x_is_mps = x.device.type == "mps"
-        if x_is_mps:
+        x_is_mps_xpu = x.device.type in ["mps", "xpu"]
+        x_device = x.device
+        if x_is_mps_xpu:
             x = x.cpu()
 
         zout = self._mask(z, x)
@@ -643,8 +644,8 @@ def forward(self, mix):
             x = self._ispec(zout, length)
 
         # back to mps device
-        if x_is_mps:
-            x = x.to("mps")
+        if x_is_mps_xpu:
+            x = x.to(x_device)
 
         if self.use_train_segment:
             if self.training:

diff --git a/demucs/repitch.py b/demucs/repitch.py
@@ -9,6 +9,7 @@
 import subprocess as sp
 import tempfile
 
+from . import audio_legacy
 import torch
 import torchaudio as ta
 

diff --git a/demucs/spec.py b/demucs/spec.py
@@ -11,8 +11,8 @@
 def spectro(x, n_fft=512, hop_length=None, pad=0):
     *other, length = x.shape
     x = x.reshape(-1, length)
-    is_mps = x.device.type == 'mps'
-    if is_mps:
+    is_mps_xpu = x.device.type in ['mps', 'xpu']
+    if is_mps_xpu:
         x = x.cpu()
     z = th.stft(x,
                 n_fft * (1 + pad),
@@ -32,8 +32,8 @@ def ispectro(z, hop_length=None, length=None, pad=0):
     n_fft = 2 * freqs - 2
     z = z.view(-1, freqs, frames)
     win_length = n_fft // (1 + pad)
-    is_mps = z.device.type == 'mps'
-    if is_mps:
+    is_mps_xpu = z.device.type in ['mps', 'xpu']
+    if is_mps_xpu:
         z = z.cpu()
     x = th.istft(z,
                  n_fft,

diff --git a/demucs/train.py b/demucs/train.py
@@ -15,6 +15,7 @@
 import hydra
 from hydra.core.global_hydra import GlobalHydra
 from omegaconf import OmegaConf
+from . import audio_legacy
 import torch
 from torch import nn
 import torchaudio

diff --git a/demucs/wav.py b/demucs/wav.py
@@ -15,6 +15,7 @@
 
 import musdb
 import julius
+from . import audio_legacy
 import torch as th
 from torch import distributed
 import torchaudio as ta

diff --git a/requirements.txt b/requirements.txt
@@ -13,7 +13,7 @@ openunmix
 pyyaml
 submitit
 torch>=1.8.1
-torchaudio>=0.8,<2.1
+torchaudio>=0.8,<2.2
 tqdm
 treetable
 soundfile>=0.10.3;sys_platform=="win32"
diff --git a/requirements_minimal.txt b/requirements_minimal.txt
@@ -6,5 +6,5 @@ lameenc>=1.2
 openunmix
 pyyaml
 torch>=1.8.1
-torchaudio>=0.8,<2.1
+torchaudio>=0.8,<2.2
 tqdm