From 5d44a30c610d77c0a452ed2d3cfb46c8c9ac67db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Nekvinda?= Date: Mon, 18 Mar 2024 06:45:43 +0000 Subject: [PATCH 1/2] Fixing recording move to memory. --- lhotse/audio/recording.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/lhotse/audio/recording.py b/lhotse/audio/recording.py index 0c3ea2ebc..3136603e9 100644 --- a/lhotse/audio/recording.py +++ b/lhotse/audio/recording.py @@ -30,6 +30,7 @@ Pathlike, Seconds, SetContainingAnything, + SmartOpen, asdict_nonull, compute_num_samples, fastcopy, @@ -299,14 +300,16 @@ def _aslist(x): and (offset is None or isclose(offset, 0.0)) and (duration is None or isclose(duration, self.duration)) ): - memory_sources = [ - AudioSource( - type="memory", - channels=old_source.channels, - source=open(old_source.source, "rb").read(), + memory_sources = [] + for old_source in self.sources: + with SmartOpen.open(old_source.source, "rb") as f: + source = f.read() + memory_sources.append(AudioSource( + type="memory", + channels=old_source.channels, + source=source, + ) ) - for old_source in self.sources - ] return fastcopy(self, sources=memory_sources) # Case #2: user specified some subset of the recording, decode audio, From a0acf52797c8118fda40d54538d46341d8c11aca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Nekvinda?= Date: Tue, 19 Mar 2024 10:06:41 +0100 Subject: [PATCH 2/2] Check format for None --- lhotse/audio/recording.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lhotse/audio/recording.py b/lhotse/audio/recording.py index 3136603e9..4b6de4e74 100644 --- a/lhotse/audio/recording.py +++ b/lhotse/audio/recording.py @@ -295,10 +295,10 @@ def _aslist(x): return x # Case #1: no opts specified, read audio without decoding and move it in memory. - if all(opt is None for opt in (channels, offset, duration)) or ( + if format is None and (all(opt is None for opt in (channels, offset, duration)) or ( (channels is None or _aslist(channels) == self.channel_ids) and (offset is None or isclose(offset, 0.0)) - and (duration is None or isclose(duration, self.duration)) + and (duration is None or isclose(duration, self.duration))) ): memory_sources = [] for old_source in self.sources: