Skip to content

Commit

Permalink
Hacky fixups
Browse files Browse the repository at this point in the history
There were multiple bugs with the buffer copying and interpretation.
Turns out that the microphone input was NOT lying: it really is giving
us 4 channels of S16_LE, contra what the downstream ALSA endpoint has
(S32_LE).  That seems like a topology bug?

But the reference input WAS lying.  It was claiming 4 channels but
providing only two, leading to underruns.

Combined with buffer copying bugs, the end result was that we were
moving the right amount of bytes but in the wrong format.

I think this is right now, but it needs a bunch of cleanup still.
Commit so I have a reference to fall back on.
  • Loading branch information
andyross committed Dec 14, 2023
1 parent feb3d4b commit b9b99e7
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 47 deletions.
1 change: 0 additions & 1 deletion app/boards/intel_adsp_ace15_mtpm.conf
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,5 @@ CONFIG_PROBE_DMA_MAX=2
CONFIG_KCPS_DYNAMIC_CLOCK_CONTROL=y
CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING=y
CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_CHANNELS=2
CONFIG_GOOGLE_RTC_AUDIO_PROCESSING_MIC_BITS=32

CONFIG_COMP_STUBS=y
173 changes: 127 additions & 46 deletions src/audio/google/google_rtc_audio_processing.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ struct google_rtc_audio_processing_comp_data {
int aec_reference_source;
int raw_microphone_source;
struct comp_buffer *ref_comp_buffer;
int ref_frame_bytes;
int out_frame_bytes;
int ref_framesz;
int cap_framesz;
};

#if CONFIG_GOOGLE_RTC_AUDIO_PROCESSING_MIC_BITS == 16
Expand Down Expand Up @@ -591,6 +591,8 @@ static int google_rtc_audio_processing_prepare(struct processing_module *mod,
int out_chan = sink_get_channels(sinks[0]);
int out_rate = sink_get_rate(sinks[0]);

ref_chan = 2;

/* Too many channels is a soft failure, AEC treats only the first N */
if (ref_chan > REF_CHAN_MAX)
comp_warn(dev, "Too many ref channels: %d, truncating to %d",
Expand All @@ -604,6 +606,16 @@ static int google_rtc_audio_processing_prepare(struct processing_module *mod,
ret = -EINVAL;
}

/* But workaround: the ref source on MTL claims 4 channels,
* but only provides two. Use our kconfig-derived vlaue
* instead, but warn about it.
*/
if (ref_chan != REF_CHAN_MAX) {
comp_err(dev, "Incorrect ref channels %d, setting %d\n",
ref_chan, REF_CHAN_MAX);
ref_chan = REF_CHAN_MAX;
}

cd->num_aec_reference_channels = MIN(ref_chan, REF_CHAN_MAX);
cd->num_capture_channels = MIN(mic_chan, MIC_CHAN_MAX);

Expand All @@ -624,8 +636,7 @@ static int google_rtc_audio_processing_prepare(struct processing_module *mod,
comp_err(dev, "Mic stream must be %d bit samples\n",
CONFIG_GOOGLE_RTC_AUDIO_PROCESSING_MIC_BITS);

// FIXME: squash for now, the streams lie. See below.
//ret = -EINVAL;
ret = -EINVAL;
}

if (mic_fmt != out_fmt) {
Expand All @@ -636,10 +647,10 @@ static int google_rtc_audio_processing_prepare(struct processing_module *mod,
// FIXME: the streams have a bad format on MTL, so we can't
// use this API. Compute by hand.
//
//cd->ref_frame_bytes = source_get_frame_bytes(sources[cd->aec_reference_source]);
//cd->out_frame_bytes = sink_get_frame_bytes(sinks[0]);
cd->ref_frame_bytes = sizeof(mic_sample_t) * source_get_channels(sources[cd->aec_reference_source]);
cd->out_frame_bytes = cd->ref_frame_bytes;
//cd->ref_framesz = source_get_frame_bytes(sources[cd->aec_reference_source]);
//cd->cap_framesz = sink_get_frame_bytes(sinks[0]);
cd->ref_framesz = sizeof(ref_sample_t) * ref_chan;
cd->cap_framesz = sizeof(mic_sample_t) * mic_chan;

ret = GoogleRtcAudioProcessingSetStreamFormats(cd->state, mic_rate,
cd->num_capture_channels,
Expand Down Expand Up @@ -684,6 +695,9 @@ static int google_rtc_audio_processing_reset(struct processing_module *mod)
/* FunctionMostlyExistsToKeepLineLengthsUnderControl */
static inline void execute_aec(struct google_rtc_audio_processing_comp_data *cd)
{
static int tlast;
int t0 = k_cycle_get_32();

/* Note that reference input and mic output share the same
* buffer for efficiency
*/
Expand All @@ -693,86 +707,106 @@ static inline void execute_aec(struct google_rtc_audio_processing_comp_data *cd)
(const float **)cd->raw_mic_buffers,
cd->refout_buffers);
cd->buffered_frames = 0;

int t1 = k_cycle_get_32();
//printk("A took %d, dt %d\n", k_cyc_to_us_floor32(t1 - t0), k_cyc_to_us_floor32(t0 - tlast));
tlast = t0;
}

static void mic_in_copy(struct sof_source *src, int frames, float **dst_bufs, int frame0)
{
size_t chan = MIN(MIC_CHAN_MAX, source_get_channels(src));
size_t chan = source_get_channels(src);
//size_t chan = MIC_CHAN_MAX;
size_t samples = frames * chan;
size_t bytes = samples * sizeof(mic_sample_t);
const mic_sample_t *buf, *bufstart, *bufend;
float *dst[MIC_CHAN_MAX];
int i, c, err;
size_t bufsz;

for (i = 0; i < chan; i++)
__ASSERT(chan == 4, "");
__ASSERT(sizeof(mic_sample_t) == 2, "");

assert(MIC_CHAN_MAX <= chan); /* we truncate only */
__ASSERT(frames + frame0 <= NUM_FRAMES, "frames %d frame0 %d", frames, frame0);

for (i = 0; i < MIC_CHAN_MAX; i++)
dst[i] = &dst_bufs[i][frame0];

err = source_get_data(src, bytes, (void *)&buf, (void *)&bufstart, &bufsz);
assert(err == 0);
__ASSERT(err == 0, "err not zero: %d, bytes %d", err, bytes);
bufend = &bufstart[bufsz];

for (i = 0; i < frames; i++) {
for (c = 0; c < chan; c++) {
for (c = 0; c < MIC_CHAN_MAX; c++)
*dst[c]++ = mic_to_float(*buf++);
if (buf >= bufend)
buf = bufstart;
}
buf += chan - MIC_CHAN_MAX; /* skip unused channels */
if (buf >= bufend)
buf = bufstart;
}
source_release_data(src, bytes);
}

/* Nearly verbatim except for types. Needs macro/inlining attention */
static void ref_copy(struct sof_source *src, int frames, float **dst_bufs, int frame0)
{
size_t chan = MIN(REF_CHAN_MAX, source_get_channels(src));
// FIXME: MTL passes 4 channels, not 2
//size_t chan = source_get_channels(src);
size_t chan = REF_CHAN_MAX;
size_t samples = frames * chan;
size_t bytes = samples * sizeof(ref_sample_t);
const ref_sample_t *buf, *bufstart, *bufend;
float *dst[REF_CHAN_MAX];
int i, c, err;
size_t bufsz;

for (i = 0; i < chan; i++)
__ASSERT(chan == 2, "");
__ASSERT(sizeof(ref_sample_t) == 2, "");

for (i = 0; i < REF_CHAN_MAX; i++)
dst[i] = &dst_bufs[i][frame0];

err = source_get_data(src, bytes, (void *)&buf, (void *)&bufstart, &bufsz);
assert(err == 0);
bufend = &bufstart[bufsz];

for (i = 0; i < frames; i++) {
for (c = 0; c < chan; c++) {
for (c = 0; c < chan; c++)
*dst[c]++ = ref_to_float(*buf++);
if (buf >= bufend)
buf = bufstart;
}
if (buf >= bufend)
buf = bufstart;
}
source_release_data(src, bytes);
}

static void mic_out_copy(struct sof_sink *sink, int frames, float **src_bufs)
{
size_t chan = MIN(MIC_CHAN_MAX, sink_get_channels(sink));
size_t chan = sink_get_channels(sink);
size_t samples = frames * chan;
size_t bytes = samples * sizeof(mic_sample_t);
mic_sample_t *buf, *bufstart, *bufend;
int i, c, err;
size_t bufsz;
float *src[MIC_CHAN_MAX];

for (i = 0; i < chan; i++)
__ASSERT(chan == 4, "");
__ASSERT(sizeof(mic_sample_t) == 2, "");

for (i = 0; i < MIC_CHAN_MAX; i++)
src[i] = src_bufs[i];

//printk("sink %d free %d\n", bytes, sink_get_free_size(sink));
err = sink_get_buffer(sink, bytes, (void *)&buf, (void *)&bufstart, &bufsz);
assert(err == 0);
bufend = &bufstart[bufsz];

for (i = 0; i < frames; i++) {
for (c = 0; c < chan; c++) {
for (c = 0; c < MIC_CHAN_MAX; c++)
*buf++ = float_to_mic(*src[c]++);
if (buf >= bufend)
buf = bufstart;
}
for (/**/; c < chan; c++)
*buf++ = 0; /* clear unused channels */
if (buf >= bufend)
buf = bufstart;
}
sink_commit_buffer(sink, bytes);
}
Expand Down Expand Up @@ -803,36 +837,52 @@ static int mod_process(struct processing_module *mod, struct sof_source **source
if (!ref_ok)
bzero(refoutbuf, sizeof(refoutbuf));

int fmic = source_get_data_frames_available(mic);
int fref = source_get_data_frames_available(ref);
// FIXME: SOF lies about these values
//int fmic = source_get_data_frames_available(mic);
//int fref = source_get_data_frames_available(ref);
int fmic = source_get_data_available(mic) / cd->cap_framesz;
int fref = source_get_data_available(ref) / cd->ref_framesz;
int frames = ref_ok ? MIN(fmic, fref) : fmic;
int n, frames_rem;

#if 0
static int tlast;
int now = k_cycle_get_32();
printk("F%d (m %db r %db) dt %d\n", frames, source_get_data_available(mic), source_get_data_available(ref), k_cyc_to_us_floor32(now - tlast));
tlast = now;
if (fmic < 300) return 0; //DEBUG
#endif

/* If fref > fmic (common at pipeline startup if
* playback was already active), we should consume the early
* samples so AEC compares the most recent values.
*/
if (ref_ok && fref > fmic)
source_release_data(ref, (fref - fmic) * cd->ref_frame_bytes);
source_release_data(ref, (fref - fmic) * cd->ref_framesz);

for (frames_rem = frames; frames_rem; frames_rem -= n) {
n = MIN(frames_rem, cd->num_frames - cd->buffered_frames);

int si = cd->buffered_frames * source_get_channels(mic);

mic_in_copy(mic, n, cd->raw_mic_buffers, si);
mic_in_copy(mic, n, cd->raw_mic_buffers, cd->buffered_frames);

if (ref_ok)
ref_copy(ref, n, cd->refout_buffers, si);
ref_copy(ref, n, cd->refout_buffers, cd->buffered_frames);

cd->buffered_frames += n;

__ASSERT(cd->buffered_frames <= NUM_FRAMES, "bf %d", cd->buffered_frames);
if (cd->buffered_frames >= cd->num_frames) {
execute_aec(cd);
mic_out_copy(out, n, cd->refout_buffers);
__ASSERT(cd->buffered_frames == 0, "bf %d", cd->buffered_frames);
mic_out_copy(out, cd->num_frames, cd->refout_buffers);

/* Safety valve. */
if (sink_get_free_size(out) < cd->num_frames * cd->cap_framesz) {
//comp_warn(mod->dev, "Two AEC's in one process()\n");
break;
}
}
}

return 0;
}

Expand All @@ -841,28 +891,59 @@ static bool mod_is_ready_to_process(struct processing_module *mod,
struct sof_sink **sinks, int num_of_sinks)
{
struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod);
#if 0
static int count, cyc, last_cyc;
cyc = k_cycle_get_32();
if((count++ % 100) == 0)
printk("RDY CPU%d us %d\n", arch_curr_cpu()->id, k_cyc_to_us_floor32(cyc-last_cyc));
last_cyc = cyc;
#endif

/* AEC produces its output in a single 10ms chunk, so we need
* at least that much space in the output buffer. We're
* otherwise happy to process any amount of input; it's
* accumulated in a relatively cheap copy, so frontload that
* to the extent possible.
*/
return sink_get_free_size(sinks[0]) >= cd->num_frames * cd->cap_framesz;
#if 0
struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod);
struct sof_source *mic = sources[cd->raw_microphone_source];
struct sof_source *ref = sources[cd->aec_reference_source];
struct sof_sink *out = sinks[0];
bool ref_ok = ref_stream_active(cd);

/* this should source_get_min_available(ref_stream)!!!
* Currently the topology sets IBS incorrectly
*/
if (ref_ok && (source_get_data_available(ref)
< cd->num_frames * cd->ref_frame_bytes))
static int ref_need, mic_need, out_need;

if (!ref_need) {
ref_need = cd->num_frames * cd->ref_framesz;
mic_need = source_get_min_available(mic);
out_need = cd->num_frames * cd->cap_framesz;
}

static int count, cyc, last_cyc;
cyc = k_cycle_get_32();
if((count++ % 1000) == 0)
printk("RDY CPU%d us %d\n", arch_curr_cpu()->id, k_cyc_to_us_floor32(cyc-last_cyc));
last_cyc = cyc;

int nref = source_get_data_available(ref);
int nmic = source_get_data_available(mic);
int nout = sink_get_free_size(out);

#if 0
if (!ref_ok || !nref)
return false;

if (source_get_data_available(mic) < source_get_min_available(mic))
if (nmic == 0)
return false;
#endif

/* Output comes out all at once, the output sink much have
* space for the full block
*/
if (sink_get_free_size(out) < cd->num_frames * cd->out_frame_bytes)
if (nout < out_need)
return false;

return true;
#endif
}

static struct module_interface google_rtc_audio_processing_interface = {
Expand Down

0 comments on commit b9b99e7

Please sign in to comment.