Skip to content

Commit 39ace34

Browse files
gbernatxintellgirdwood
authored andcommitted
Audio: Volume: Fix problem with Hifi4
This patch fixes an issue with gain and peakvolume processing with s16 sample format and with sample rates where period is not multiple of four samples SIMD processing assumption. With 44.1 kHz rate the period is normally 44 frames but every 10th is 45. However, the value of 45 was not handled correctly. A Glitch appeared to output when this happened. The fixed code adds sample-by-sample volume process code for the remaining amount of up to 3 samples to process. The earlier workaround (87571f3 "audio: volume: disable HIFI4 optimizations") is reverted. Signed-off-by: Grzegorz Bernat <grzegorzx.bernat@intel.com> Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
1 parent 298687c commit 39ace34

3 files changed

Lines changed: 92 additions & 17 deletions

File tree

src/audio/volume/volume_hifi3_with_peakvol.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,7 @@ LOG_MODULE_DECLARE(volume_hifi3, CONFIG_SOF_LOG_LEVEL);
2121

2222
#include "volume.h"
2323

24-
// Hifi4 is disabled, see bug https://github.com/thesofproject/sof/issues/9213
25-
// Hifi5 is not there yet.
26-
#if SOF_USE_HIFI(3, VOLUME) || SOF_USE_HIFI(4, VOLUME) || SOF_USE_HIFI(5, VOLUME)
24+
#if SOF_USE_HIFI(3, VOLUME)
2725

2826
#if CONFIG_COMP_PEAK_VOL
2927

src/audio/volume/volume_hifi4.c

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu
346346
ae_f32x2 out_sample1 = AE_ZERO32();
347347
ae_f16x4 in_sample = AE_ZERO16();
348348
ae_f16x4 out_sample = AE_ZERO16();
349-
int i, n, m;
349+
int i, n, m, left;
350350
ae_f32x2 *buf;
351351
ae_f32x2 *buf_end;
352352
ae_f32x2 *vol;
@@ -381,7 +381,10 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu
381381
m = audio_stream_samples_without_wrap_s16(sink, out);
382382
n = MIN(m, n);
383383
inu = AE_LA64_PP(in);
384-
for (i = 0; i < n; i += 4) {
384+
m = n >> 2;
385+
left = n & 0x03;
386+
/* Process samples in blocks of 4*/
387+
for (i = 0; i < m; i++) {
385388
/* load first two volume gain */
386389
AE_L32X2_XC(volume0, vol, inc);
387390

@@ -410,10 +413,29 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu
410413

411414
/* store the output */
412415
out_sample = AE_ROUND16X4F32SSYM(out_sample0, out_sample1);
413-
// AE_SA16X4_IC(out_sample, outu, out);
414416
AE_SA16X4_IP(out_sample, outu, out);
415417
}
416418
AE_SA64POS_FP(outu, out);
419+
420+
/* Process remaining samples if n is not a multiple of 4*/
421+
for (i = 0; i < left; i++) {
422+
/* load volume gain */
423+
AE_L32_XC(volume0, (ae_f32 *)vol, sizeof(ae_f32));
424+
#if COMP_VOLUME_Q8_16
425+
/* Q8.16 to Q9.23 */
426+
volume0 = AE_SLAI32S(volume0, 7);
427+
#endif
428+
/* Load the input sample */
429+
AE_L16_IP(in_sample, (ae_f16 *)in, sizeof(ae_f16));
430+
/* Multiply the input sample */
431+
out_sample0 = AE_MULFP32X16X2RS_H(volume0, in_sample);
432+
/* Q9.23 to Q1.31 */
433+
out_sample0 = AE_SLAI32S(out_sample0, 8);
434+
/* store the output */
435+
out_sample = AE_ROUND16X4F32SSYM(out_sample0, out_sample0);
436+
AE_S16_0_IP(out_sample, (ae_f16 *)out, sizeof(ae_f16));
437+
}
438+
417439
samples -= n;
418440
bsource->consumed += VOL_S16_SAMPLES_TO_BYTES(n);
419441
bsink->size += VOL_S16_SAMPLES_TO_BYTES(n);
@@ -438,7 +460,7 @@ static void vol_passthrough_s16_to_s16(struct processing_module *mod,
438460
struct audio_stream *source = bsource->data;
439461
struct audio_stream *sink = bsink->data;
440462
ae_f16x4 in_sample = AE_ZERO16();
441-
int i, n, m;
463+
int i, n, m, left;
442464
ae_valign inu = AE_ZALIGN64();
443465
ae_valign outu = AE_ZALIGN64();
444466
ae_f16x4 *in = (ae_f16x4 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source)
@@ -456,12 +478,23 @@ static void vol_passthrough_s16_to_s16(struct processing_module *mod,
456478
m = audio_stream_samples_without_wrap_s16(sink, out);
457479
n = MIN(m, n);
458480
inu = AE_LA64_PP(in);
459-
for (i = 0; i < n; i += 4) {
481+
m = n >> 2;
482+
left = n & 0x03;
483+
for (i = 0; i < m; i++) {
460484
/* Load the input sample */
461485
AE_LA16X4_IP(in_sample, inu, in);
462486
AE_SA16X4_IP(in_sample, outu, out);
463487
}
464488
AE_SA64POS_FP(outu, out);
489+
490+
/* Process remaining samples if n is not a multiple of 4*/
491+
for (i = 0; i < left; i++) {
492+
/* Load the input sample */
493+
AE_L16_IP(in_sample, (ae_f16 *)in, sizeof(ae_f16));
494+
/* store the output */
495+
AE_S16_0_IP(in_sample, (ae_f16 *)out, sizeof(ae_f16));
496+
}
497+
465498
samples -= n;
466499
in = audio_stream_wrap(source, in);
467500
out = audio_stream_wrap(sink, out);

src/audio/volume/volume_hifi4_with_peakvol.c

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@ LOG_MODULE_DECLARE(volume_hifi4, CONFIG_SOF_LOG_LEVEL);
2121

2222
#include "volume.h"
2323

24-
// disabled and replaced by HIFI3, see issue https://github.com/thesofproject/sof/issues/9213
25-
#if 0 // SOF_USE_HIFI(4, VOLUME) || SOF_USE_HIFI(5, VOLUME)
24+
#if SOF_USE_HIFI(4, VOLUME) || SOF_USE_HIFI(5, VOLUME)
2625

2726
#if CONFIG_COMP_PEAK_VOL
2827
#include <xtensa/tie/xt_hifi4.h>
@@ -394,7 +393,7 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu
394393
ae_f32x2 out_sample1 = AE_ZERO32();
395394
ae_f16x4 in_sample = AE_ZERO16();
396395
ae_f16x4 out_sample = AE_ZERO16();
397-
int i, n, m;
396+
int i, n, m, left;
398397
ae_f32x2 *buf;
399398
ae_f32x2 *buf_end;
400399
ae_f32x2 *vol;
@@ -404,6 +403,8 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu
404403
+ bsource->consumed);
405404
ae_f16x4 *out = (ae_f16x4 *)audio_stream_wrap(sink, (char *)audio_stream_get_wptr(sink)
406405
+ bsink->size);
406+
ae_f16 *in1;
407+
ae_f16 *out1;
407408
const int channels_count = audio_stream_get_channels(sink);
408409
const int inc = sizeof(ae_f32x2);
409410
int samples = channels_count * frames;
@@ -435,7 +436,9 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu
435436
m = audio_stream_samples_without_wrap_s16(sink, out);
436437
n = MIN(m, n);
437438
inu = AE_LA64_PP(in);
438-
for (i = 0; i < n; i += 4) {
439+
m = n >> 2;
440+
left = n & 0x03;
441+
for (i = 0; i < m; i++) {
439442
/* load first two volume gain */
440443
AE_L32X2_XC(volume0, vol, inc);
441444

@@ -474,6 +477,30 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu
474477
AE_SA16X4_IP(out_sample, outu, out);
475478
}
476479
AE_SA64POS_FP(outu, out);
480+
481+
/* Process remaining samples if n is not a multiple of 4*/
482+
for (i = 0; i < left; i++) {
483+
/* load first volume gain */
484+
AE_L32_XC(volume0, (ae_f32 *)vol, sizeof(ae_f32));
485+
#if COMP_VOLUME_Q8_16
486+
/* Q8.16 to Q9.23 */
487+
volume0 = AE_SLAI32S(volume0, 7);
488+
#endif
489+
/* Load the input sample */
490+
AE_L16_IP(in_sample, (ae_f16 *)in, sizeof(ae_f16));
491+
/* calculate the peak volume*/
492+
AE_L32_XC1(temp, (ae_f32 *)peakvol, 0);
493+
temp = AE_MAXABS32S(AE_SEXT32X2D16_32(in_sample), temp);
494+
AE_S32_L_XC1(temp, (ae_f32 *)peakvol, sizeof(ae_f32));
495+
/* Multiply the input sample */
496+
out_sample0 = AE_MULFP32X16X2RS_H(volume0, in_sample);
497+
/* Q9.23 to Q1.31 */
498+
out_sample0 = AE_SLAI32S(out_sample0, 8);
499+
/* store the output */
500+
out_sample = AE_ROUND16X4F32SSYM(out_sample0, out_sample0);
501+
AE_S16_0_IP(out_sample, (ae_f16 *)out, sizeof(ae_f16));
502+
}
503+
477504
samples -= n;
478505
in = audio_stream_wrap(source, in);
479506
out = audio_stream_wrap(sink, out);
@@ -505,7 +532,7 @@ static void vol_passthrough_s16_to_s16(struct processing_module *mod,
505532
struct audio_stream *source = bsource->data;
506533
struct audio_stream *sink = bsink->data;
507534
ae_f16x4 in_sample = AE_ZERO16();
508-
int i, n, m;
535+
int i, n, m, left;
509536
ae_valign inu = AE_ZALIGN64();
510537
ae_valign outu = AE_ZALIGN64();
511538
ae_f16x4 *in = (ae_f16x4 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source)
@@ -518,7 +545,7 @@ static void vol_passthrough_s16_to_s16(struct processing_module *mod,
518545
ae_f32x2 temp;
519546
ae_f32x2 *peakvol = (ae_f32x2 *)cd->peak_vol;
520547

521-
/* Set peakvol(which stores the peak volume data 4 times) as circular buffer */
548+
/* Set peakvol (which stores the peak volume data 4 times) as circular buffer */
522549
AE_SETCBEGIN1(cd->peak_vol);
523550
AE_SETCEND1(cd->peak_vol + channels_count * 4);
524551

@@ -528,7 +555,10 @@ static void vol_passthrough_s16_to_s16(struct processing_module *mod,
528555
m = audio_stream_samples_without_wrap_s16(sink, out);
529556
n = MIN(m, n);
530557
inu = AE_LA64_PP(in);
531-
for (i = 0; i < n; i += 4) {
558+
m = n >> 2;
559+
left = n & 0x03;
560+
/* Process samples in blocks of 4*/
561+
for (i = 0; i < m; i++) {
532562
/* Load the input sample */
533563
AE_LA16X4_IP(in_sample, inu, in);
534564
/* calculate the peak volume*/
@@ -543,12 +573,26 @@ static void vol_passthrough_s16_to_s16(struct processing_module *mod,
543573
AE_SA16X4_IP(in_sample, outu, out);
544574
}
545575
AE_SA64POS_FP(outu, out);
576+
577+
/* Process remaining samples if n is not a multiple of 4*/
578+
for (i = 0; i < left; i++) {
579+
/* Load the input sample */
580+
AE_L16_IP(in_sample, (ae_f16 *)in, sizeof(ae_f16));
581+
/* store the output */
582+
AE_S16_0_IP(in_sample, (ae_f16 *)out, sizeof(ae_f16));
583+
/* calculate the peak volume*/
584+
AE_L32_XC1(temp, (ae_f32 *)peakvol, 0);
585+
temp = AE_MAXABS32S(AE_SEXT32X2D16_10(in_sample), temp);
586+
AE_S32_L_XC1(temp, (ae_f32 *)peakvol, sizeof(ae_f32));
587+
}
588+
546589
samples -= n;
547-
in = audio_stream_wrap(source, in);
548-
out = audio_stream_wrap(sink, out);
590+
in = (ae_f16x4 *)audio_stream_wrap(source, in);
591+
out = (ae_f16x4 *)audio_stream_wrap(sink, out);
549592
bsource->consumed += VOL_S16_SAMPLES_TO_BYTES(n);
550593
bsink->size += VOL_S16_SAMPLES_TO_BYTES(n);
551594
}
595+
552596
for (i = 0; i < channels_count; i++) {
553597
m = MAX(cd->peak_vol[i], cd->peak_vol[i + channels_count]);
554598
m = MAX(m, cd->peak_vol[i + channels_count * 2]);

0 commit comments

Comments
 (0)