From 1f210f6da92098bcc7efc8970b058e184adbfbc6 Mon Sep 17 00:00:00 2001 From: straylight-annex-portal Date: Fri, 24 Nov 2023 16:28:15 +0900 Subject: [PATCH 1/4] wip --- examples/crosstalk-benchmark.rs | 321 ++++++++++++++++++ .../crosstalk-benchmark.json5 | 31 ++ 2 files changed, 352 insertions(+) create mode 100644 examples/crosstalk-benchmark.rs create mode 100644 examples/recording-configs/crosstalk-benchmark.json5 diff --git a/examples/crosstalk-benchmark.rs b/examples/crosstalk-benchmark.rs new file mode 100644 index 0000000..522690b --- /dev/null +++ b/examples/crosstalk-benchmark.rs @@ -0,0 +1,321 @@ +/// An example binary to help evaluate webrtc audio processing pipeline, in particular its echo +/// canceller. You can use it to record a sample with your audio setup, and you can run the +/// pipeline repeatedly using the sampled audio, to test different configurations of the pipeline. +/// +/// # Record a sample +/// +/// Play back a pre-recorded audio stream from your speakers, while recording the microphone +/// input as a WAV file. +/// +/// ``` +/// $ cargo run --example recording --features bundled --features derive_serde -- --config-file \ +/// examples/recording-configs/record-sample.json5 +/// ``` +/// +/// # Run the pipeline with the sample +/// +/// Run the audio processing pipeline with the recorded capture and render frames. You can then +/// analyze the capture-processed.wav to understand the effect produced by the pipeline. +/// +/// ``` +/// $ cargo run --example recording --features bundled --features derive_serde -- --config-file \ +/// examples/recording-configs/record-pipeline.json5 +/// ``` +use failure::{format_err, Error}; +use hound::{WavIntoSamples, WavReader, WavWriter}; +use portaudio::StreamCallbackResult; +use regex::Regex; +use serde::{Deserialize, Serialize}; +use std::{ + fs::{self, File}, + io::{BufReader, BufWriter}, + path::{Path, PathBuf}, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + thread, + time::Duration, +}; +use structopt::StructOpt; +use webrtc_audio_processing::*; + +const AUDIO_SAMPLE_RATE: u32 = 48_000; +const AUDIO_INTERLEAVED: bool = true; + +#[derive(Debug, StructOpt)] +struct Args { + /// Configuration file that stores JSON serialization of [`Option`] struct. + #[structopt(short, long)] + pub config_file: PathBuf, +} + +#[derive(Deserialize, Serialize, Default, Clone, Debug)] +struct CaptureOptions { + /// Name of the audio capture device. + device_name: String, + /// The number of audio capture channels. + num_channels: u16, + /// If specified, it reads the capture stream from the WAV file instead of the device. + source_path: Option, + /// If specified, it writes the capture stream to the WAV file before applying the processing. + preprocess_sink_path: Option, + /// If specified, it writes the capture stream to the WAV file after applying the processing. + postprocess_sink_path: Option, +} + +#[derive(Deserialize, Serialize, Default, Clone, Debug)] +struct RenderOptions { + /// Name of the audio playback device. + device_name: String, + /// The number of audio playback channels. + num_channels: u16, + /// If specified, it plays back the audio stream from the WAV file. Otherwise, a stream of + /// zeros are sent to the audio device. + source_path: Option, + /// If true, the output is muted. + #[serde(default)] + mute: bool, +} + +#[derive(Deserialize, Serialize, Default, Clone, Debug)] +struct PlaybackOptions { + /// Played from the tonari speakers as if comming from the far end. + far_end: RenderOptions, + /// Played from a testing speaker placed *in front of* tonari to simulate a local signal source like a person. + near_end: RenderOptions, +} + +#[derive(Deserialize, Serialize, Default, Clone, Debug)] +struct Options { + /// Options for audio capture / recording. + capture: CaptureOptions, + /// Options for audio render / playback. + playback: PlaybackOptions, + /// Configurations of the audio processing pipeline. + config: Config, +} + +fn match_device( + pa: &portaudio::PortAudio, + device_name: Regex, +) -> Result { + for device in (pa.devices()?).flatten() { + if device_name.is_match(device.1.name) { + return Ok(device.0); + } + } + Err(format_err!("Audio device matching \"{}\" not found.", device_name)) +} + +fn create_input_stream_settings( + pa: &portaudio::PortAudio, + opt: &CaptureOptions, +) -> Result, Error> { + let input_device = match_device(pa, Regex::new(&opt.device_name)?)?; + let input_device_info = &pa.device_info(input_device)?; + let input_params = portaudio::StreamParameters::::new( + input_device, + opt.num_channels as i32, + AUDIO_INTERLEAVED, + input_device_info.default_low_input_latency, + ); + + Ok(portaudio::InputStreamSettings::new( + input_params, + f64::from(AUDIO_SAMPLE_RATE), + NUM_SAMPLES_PER_FRAME as u32, + )) +} + +fn create_output_stream_settings( + pa: &portaudio::PortAudio, + opt: &RenderOptions, +) -> Result, Error> { + let output_device_far_end = match_device(pa, Regex::new(&opt.device_name)?)?; + let output_device_info = &pa.device_info(output_device_far_end)?; + let output_params = portaudio::StreamParameters::::new( + output_device_far_end, + opt.num_channels as i32, + AUDIO_INTERLEAVED, + output_device_info.default_low_output_latency, + ); + + Ok(portaudio::OutputStreamSettings::new( + output_params, + f64::from(AUDIO_SAMPLE_RATE), + NUM_SAMPLES_PER_FRAME as u32, + )) +} + +fn open_wav_writer(path: &Path, channels: u16) -> Result>, Error> { + let sink = hound::WavWriter::>::create( + path, + hound::WavSpec { + channels, + sample_rate: AUDIO_SAMPLE_RATE, + bits_per_sample: 32, + sample_format: hound::SampleFormat::Float, + }, + )?; + + Ok(sink) +} + +fn open_wav_reader(path: &Path) -> Result, f32>, Error> { + let reader = WavReader::>::open(path)?; + Ok(reader.into_samples()) +} + +// The destination array is an interleaved audio stream. +// Returns false if there are no more entries to read from the source. +fn copy_stream(source: &mut WavIntoSamples, f32>, dest: &mut [f32]) -> bool { + let mut dest_iter = dest.iter_mut(); + 'outer: for sample in source { + for channel in &sample { + *dest_iter.next().unwrap() = *channel; + if dest_iter.len() == 0 { + break 'outer; + } + } + } + + let source_eof = dest_iter.len() > 0; + + // Zero-fill the remainder of the destination array if we finish consuming + // the source. + for sample in dest_iter { + *sample = 0.0; + } + + !source_eof +} + +fn create_output_callback( + mut source: WavIntoSamples, f32>, + mut processor: Processor, + running: Arc, +) -> impl FnMut(portaudio::OutputStreamCallbackArgs) -> StreamCallbackResult + 'static { + move |portaudio::OutputStreamCallbackArgs { buffer, frames, .. }| { + assert_eq!(frames, NUM_SAMPLES_PER_FRAME as usize); + + let mut should_continue = true; + + if !copy_stream(&mut source, buffer) { + should_continue = false; + } + + processor.process_render_frame(buffer).unwrap(); + + // if mute { + // buffer.iter_mut().for_each(|m| *m = 0.0) + // } + + if should_continue { + portaudio::Continue + } else { + running.store(false, Ordering::SeqCst); + portaudio::Complete + } + } +} + +fn main() -> Result<(), Error> { + let args = Args::from_args(); + let opt: Options = json5::from_str(&fs::read_to_string(&args.config_file)?)?; + + let pa = portaudio::PortAudio::new()?; + + let mut processor = Processor::new(&InitializationConfig { + num_capture_channels: opt.capture.num_channels as i32, + num_render_channels: opt.playback.far_end.num_channels as i32, + ..Default::default() + })?; + + processor.set_config(opt.config.clone()); + + let running = Arc::new(AtomicBool::new(true)); + + let mut capture_preprocess_sink = if let Some(path) = &opt.capture.preprocess_sink_path { + Some(open_wav_writer(path, opt.capture.num_channels)?) + } else { + None + }; + let mut capture_postprocess_sink = if let Some(path) = &opt.capture.postprocess_sink_path { + Some(open_wav_writer(path, opt.capture.num_channels)?) + } else { + None + }; + let mut far_end_source = if let Some(path) = &opt.playback.far_end.source_path { + Some(open_wav_reader(path)?) + } else { + None + }; + let mut near_end_source = if let Some(path) = &opt.playback.near_end.source_path { + Some(open_wav_reader(path)?) + } else { + None + }; + + let input_stream_settings = create_input_stream_settings(&pa, &opt.capture)?; + // Allocate buffers outside the performance-sensitive audio loop. + let mut input_mut = + vec![0f32; NUM_SAMPLES_PER_FRAME as usize * opt.capture.num_channels as usize]; + let mut input_stream = pa.open_non_blocking_stream( + input_stream_settings, + move |portaudio::InputStreamCallbackArgs { buffer, frames, .. }| { + assert_eq!(frames, NUM_SAMPLES_PER_FRAME as usize); + + input_mut.copy_from_slice(buffer); + + if let Some(sink) = &mut capture_preprocess_sink { + for sample in &input_mut { + sink.write_sample(*sample).unwrap(); + } + } + + processor.process_capture_frame(&mut input_mut).unwrap(); + + if let Some(sink) = &mut capture_postprocess_sink { + for sample in &input_mut { + sink.write_sample(*sample).unwrap(); + } + } + + portaudio::Continue + }, + )?; + + let running = running.clone(); + + let far_end_stream_settings = create_output_stream_settings(&pa, &opt.playback.far_end)?; + let mut far_end_stream = pa.open_non_blocking_stream( + far_end_stream_settings, + create_output_callback(far_end_source.unwrap(), processor.clone(), running.clone()), + )?; + + let near_end_stream_settings = create_output_stream_settings(&pa, &opt.playback.near_end)?; + let mut near_end_stream = pa.open_non_blocking_stream( + near_end_stream_settings, + create_output_callback(near_end_source.unwrap(), processor.clone(), running.clone()), + )?; + + input_stream.start()?; + far_end_stream.start()?; + near_end_stream.start()?; + + ctrlc::set_handler({ + let running = running.clone(); + move || { + running.store(false, Ordering::SeqCst); + } + })?; + + while running.load(Ordering::SeqCst) { + thread::sleep(Duration::from_millis(10)); + } + + println!("{:#?}", processor.get_stats()); + + Ok(()) +} diff --git a/examples/recording-configs/crosstalk-benchmark.json5 b/examples/recording-configs/crosstalk-benchmark.json5 new file mode 100644 index 0000000..2ba5795 --- /dev/null +++ b/examples/recording-configs/crosstalk-benchmark.json5 @@ -0,0 +1,31 @@ +{ + capture: { + device_name: "UR44", + num_channels: 1, + preprocess_sink_path: "capture.wav", + postprocess_sink_path: "capture-processed.wav", + }, + playback: { + far_end: { + device_name: "UR44", + num_channels: 1, + source_path: "examples/captures/pure-speech-m22/2023-11-24_11-57-35_mixed_egress_mono.wav", + }, + near_end: { + device_name: " NFJ USB Audio", + num_channels: 1, + source_path: "examples/captures/pure-speech-m22/2023-11-24_11-57-35_raw_near_end.wav", + }, + }, + config: { + enable_transient_suppressor: false, + enable_high_pass_filter: true, + echo_cancellation: { + enabled: true, + suppression_level: "Moderate", + enable_extended_filter: false, + enable_delay_agnostic: false, + stream_delay_ms: 20, + }, + }, +} From ab1e1c3ab47332a454252a04f31c4d6fa4c6a9dd Mon Sep 17 00:00:00 2001 From: straylight-annex-portal Date: Fri, 24 Nov 2023 17:18:11 +0900 Subject: [PATCH 2/4] Add crosstalk-benchmark example --- .../crosstalk-benchmark.json5 | 4 +- examples/crosstalk-benchmark.rs | 97 ++++++------------- 2 files changed, 31 insertions(+), 70 deletions(-) rename examples/{recording-configs => }/crosstalk-benchmark.json5 (91%) diff --git a/examples/recording-configs/crosstalk-benchmark.json5 b/examples/crosstalk-benchmark.json5 similarity index 91% rename from examples/recording-configs/crosstalk-benchmark.json5 rename to examples/crosstalk-benchmark.json5 index 2ba5795..efaf427 100644 --- a/examples/recording-configs/crosstalk-benchmark.json5 +++ b/examples/crosstalk-benchmark.json5 @@ -12,9 +12,9 @@ source_path: "examples/captures/pure-speech-m22/2023-11-24_11-57-35_mixed_egress_mono.wav", }, near_end: { - device_name: " NFJ USB Audio", + device_name: "NFJ USB Audio", num_channels: 1, - source_path: "examples/captures/pure-speech-m22/2023-11-24_11-57-35_raw_near_end.wav", + source_path: "examples/captures/pure-speech-m22/2023-11-24_11-57-35_raw_near_end_mono.wav", }, }, config: { diff --git a/examples/crosstalk-benchmark.rs b/examples/crosstalk-benchmark.rs index 522690b..7e66f07 100644 --- a/examples/crosstalk-benchmark.rs +++ b/examples/crosstalk-benchmark.rs @@ -1,25 +1,8 @@ -/// An example binary to help evaluate webrtc audio processing pipeline, in particular its echo -/// canceller. You can use it to record a sample with your audio setup, and you can run the -/// pipeline repeatedly using the sampled audio, to test different configurations of the pipeline. -/// -/// # Record a sample -/// -/// Play back a pre-recorded audio stream from your speakers, while recording the microphone -/// input as a WAV file. -/// -/// ``` -/// $ cargo run --example recording --features bundled --features derive_serde -- --config-file \ -/// examples/recording-configs/record-sample.json5 -/// ``` -/// -/// # Run the pipeline with the sample -/// -/// Run the audio processing pipeline with the recorded capture and render frames. You can then -/// analyze the capture-processed.wav to understand the effect produced by the pipeline. +/// An example binary to help evaluate webrtc audio processing pipeline in a crosstalk scenario. /// /// ``` -/// $ cargo run --example recording --features bundled --features derive_serde -- --config-file \ -/// examples/recording-configs/record-pipeline.json5 +/// $ cargo run --example crosstalk-benchmark --features derive_serde -- \ +/// --config-file examples/crosstalk-benchmark.json5 /// ``` use failure::{format_err, Error}; use hound::{WavIntoSamples, WavReader, WavWriter}; @@ -56,8 +39,6 @@ struct CaptureOptions { device_name: String, /// The number of audio capture channels. num_channels: u16, - /// If specified, it reads the capture stream from the WAV file instead of the device. - source_path: Option, /// If specified, it writes the capture stream to the WAV file before applying the processing. preprocess_sink_path: Option, /// If specified, it writes the capture stream to the WAV file after applying the processing. @@ -72,17 +53,14 @@ struct RenderOptions { num_channels: u16, /// If specified, it plays back the audio stream from the WAV file. Otherwise, a stream of /// zeros are sent to the audio device. - source_path: Option, - /// If true, the output is muted. - #[serde(default)] - mute: bool, + source_path: PathBuf, } #[derive(Deserialize, Serialize, Default, Clone, Debug)] struct PlaybackOptions { - /// Played from the tonari speakers as if comming from the far end. + /// Played from the tonari speakers as if coming from the far end. far_end: RenderOptions, - /// Played from a testing speaker placed *in front of* tonari to simulate a local signal source like a person. + /// Played from a testing speaker placed *in front of* tonari to simulate a local sound source like a person. near_end: RenderOptions, } @@ -199,18 +177,10 @@ fn create_output_callback( move |portaudio::OutputStreamCallbackArgs { buffer, frames, .. }| { assert_eq!(frames, NUM_SAMPLES_PER_FRAME as usize); - let mut should_continue = true; - - if !copy_stream(&mut source, buffer) { - should_continue = false; - } + let should_continue = copy_stream(&mut source, buffer); processor.process_render_frame(buffer).unwrap(); - // if mute { - // buffer.iter_mut().for_each(|m| *m = 0.0) - // } - if should_continue { portaudio::Continue } else { @@ -236,33 +206,26 @@ fn main() -> Result<(), Error> { let running = Arc::new(AtomicBool::new(true)); - let mut capture_preprocess_sink = if let Some(path) = &opt.capture.preprocess_sink_path { - Some(open_wav_writer(path, opt.capture.num_channels)?) - } else { - None - }; - let mut capture_postprocess_sink = if let Some(path) = &opt.capture.postprocess_sink_path { - Some(open_wav_writer(path, opt.capture.num_channels)?) - } else { - None - }; - let mut far_end_source = if let Some(path) = &opt.playback.far_end.source_path { - Some(open_wav_reader(path)?) - } else { - None - }; - let mut near_end_source = if let Some(path) = &opt.playback.near_end.source_path { - Some(open_wav_reader(path)?) - } else { - None - }; + let mut capture_preprocess_sink = opt + .capture + .preprocess_sink_path + .as_ref() + .map(|path| open_wav_writer(path, opt.capture.num_channels)) + .transpose()?; + let mut capture_postprocess_sink = opt + .capture + .postprocess_sink_path + .as_ref() + .map(|path| open_wav_writer(path, opt.capture.num_channels)) + .transpose()?; + let far_end_source = open_wav_reader(&opt.playback.far_end.source_path)?; + let near_end_source = open_wav_reader(&opt.playback.near_end.source_path)?; let input_stream_settings = create_input_stream_settings(&pa, &opt.capture)?; - // Allocate buffers outside the performance-sensitive audio loop. - let mut input_mut = - vec![0f32; NUM_SAMPLES_PER_FRAME as usize * opt.capture.num_channels as usize]; - let mut input_stream = pa.open_non_blocking_stream( - input_stream_settings, + let mut input_stream = pa.open_non_blocking_stream(input_stream_settings, { + let mut processor = processor.clone(); + let mut input_mut = + vec![0f32; NUM_SAMPLES_PER_FRAME as usize * opt.capture.num_channels as usize]; move |portaudio::InputStreamCallbackArgs { buffer, frames, .. }| { assert_eq!(frames, NUM_SAMPLES_PER_FRAME as usize); @@ -283,21 +246,19 @@ fn main() -> Result<(), Error> { } portaudio::Continue - }, - )?; - - let running = running.clone(); + } + })?; let far_end_stream_settings = create_output_stream_settings(&pa, &opt.playback.far_end)?; let mut far_end_stream = pa.open_non_blocking_stream( far_end_stream_settings, - create_output_callback(far_end_source.unwrap(), processor.clone(), running.clone()), + create_output_callback(far_end_source, processor.clone(), running.clone()), )?; let near_end_stream_settings = create_output_stream_settings(&pa, &opt.playback.near_end)?; let mut near_end_stream = pa.open_non_blocking_stream( near_end_stream_settings, - create_output_callback(near_end_source.unwrap(), processor.clone(), running.clone()), + create_output_callback(near_end_source, processor.clone(), running.clone()), )?; input_stream.start()?; From bcf7c01d3b4350b1e4b5120be1cdd6fea1cfa6b3 Mon Sep 17 00:00:00 2001 From: Jen Tak Date: Tue, 5 Dec 2023 21:40:10 +0800 Subject: [PATCH 3/4] Improve crosstalk example description --- examples/crosstalk-benchmark.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/crosstalk-benchmark.rs b/examples/crosstalk-benchmark.rs index 7e66f07..fa653dc 100644 --- a/examples/crosstalk-benchmark.rs +++ b/examples/crosstalk-benchmark.rs @@ -1,5 +1,9 @@ /// An example binary to help evaluate webrtc audio processing pipeline in a crosstalk scenario. /// +/// It plays one track from tonari built-in speakers, another track from an external speaker +/// that is to be placed in front of tonari and then it records the mixed result and individual +/// processing steps done on it. +/// /// ``` /// $ cargo run --example crosstalk-benchmark --features derive_serde -- \ /// --config-file examples/crosstalk-benchmark.json5 From fc3d381f2af463ce1136e1ac8a26e54018b19544 Mon Sep 17 00:00:00 2001 From: Jen Tak Date: Tue, 5 Dec 2023 21:40:54 +0800 Subject: [PATCH 4/4] Move config file to config directory --- examples/crosstalk-benchmark.rs | 2 +- examples/{ => recording-configs}/crosstalk-benchmark.json5 | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename examples/{ => recording-configs}/crosstalk-benchmark.json5 (100%) diff --git a/examples/crosstalk-benchmark.rs b/examples/crosstalk-benchmark.rs index fa653dc..48303f5 100644 --- a/examples/crosstalk-benchmark.rs +++ b/examples/crosstalk-benchmark.rs @@ -6,7 +6,7 @@ /// /// ``` /// $ cargo run --example crosstalk-benchmark --features derive_serde -- \ -/// --config-file examples/crosstalk-benchmark.json5 +/// --config-file examples/recording-configs/crosstalk-benchmark.json5 /// ``` use failure::{format_err, Error}; use hound::{WavIntoSamples, WavReader, WavWriter}; diff --git a/examples/crosstalk-benchmark.json5 b/examples/recording-configs/crosstalk-benchmark.json5 similarity index 100% rename from examples/crosstalk-benchmark.json5 rename to examples/recording-configs/crosstalk-benchmark.json5