Math: IIR DF1: Add a simplified 4th order IIR process function

singalsu · kv2019i · commit f03b43b1e3ff · 2025-02-10T15:34:37.000+02:00
The 4th filter with two biquads in series is commonly used in
crossover and multiband DRC components. The omitting of outer
loop for parallel biquads and check for null coefficients and
use of fixed loop count of two makes the critical code faster.

Signed-off-by: Seppo Ingalsuo &lt;seppo.ingalsuo@linux.intel.com&gt;
diff --git a/src/include/sof/math/iir_df1.h b/src/include/sof/math/iir_df1.h
@@ -13,6 +13,7 @@
 #include <sof/common.h>
 
 #define IIR_DF1_NUM_STATE 4
+#define SOF_IIR_DF1_4TH_NUM_BIQUADS 2
 
 struct iir_state_df1 {
 	unsigned int biquads; /* Number of IIR 2nd order sections total */
@@ -34,8 +35,24 @@ void iir_init_delay_df1(struct iir_state_df1 *iir, int32_t **state);
 
 void iir_reset_df1(struct iir_state_df1 *iir);
 
+/**
+ * Calculate IIR filter consisting of biquads
+ * @param iir	IIR state with configured biquad coefficients and delay lines data
+ * @param x	Single s32 Q1.31 format input sample
+ * @return	Single s32 Q1.31 format output samples
+ */
 int32_t iir_df1(struct iir_state_df1 *iir, int32_t x);
 
+/**
+ * Calculate IIR filter consisting of biquads, special simplified version for
+ * 4th order filter with two biquads in series. Note: There are no checks for
+ * iir struct members.
+ * @param iir	IIR state with configured biquad coefficients and delay lines data
+ * @param x	Single s32 Q1.31 format input sample
+ * @return	Single s32 Q1.31 format output samples
+ */
+int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x);
+
 /* Inline functions */
 #if SOF_USE_MIN_HIFI(3, FILTER)
 #include "iir_df1_hifi3.h"
diff --git a/src/math/iir_df1_generic.c b/src/math/iir_df1_generic.c
@@ -109,4 +109,58 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
 }
 EXPORT_SYMBOL(iir_df1);
 
+int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x)
+{
+	int32_t in;
+	int32_t tmp;
+	int64_t acc;
+	int i;
+	int d = 0; /* Index to state */
+	int c = 0; /* Index to coefficient a2 */
+	int32_t *coefp = iir->coef;
+	int32_t *delay = iir->delay;
+
+	/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
+	/* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */
+	in = x;
+	for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) {
+		/* Compute output: Delay is Q3.61
+		 * Q2.30 x Q1.31 -> Q3.61
+		 * Shift Q3.61 to Q3.31 with rounding, saturate to Q1.31
+		 */
+		acc = ((int64_t)coefp[c]) * delay[d]; /* a2 * y(n - 2) */
+		acc += ((int64_t)coefp[c + 1]) * delay[d + 1]; /* a1 * y(n - 1) */
+		acc += ((int64_t)coefp[c + 2]) * delay[d + 2]; /* b2 * x(n - 2) */
+		acc += ((int64_t)coefp[c + 3]) * delay[d + 3]; /* b1 * x(n - 1) */
+		acc += ((int64_t)coefp[c + 4]) * in; /* b0 * x */
+		tmp = (int32_t)sat_int32(Q_SHIFT_RND(acc, 61, 31));
+
+		/* update the delay value */
+		delay[d] = delay[d + 1];
+		delay[d + 1] = tmp;
+		delay[d + 2] = delay[d + 3];
+		delay[d + 3] = in;
+
+		/* Apply gain Q2.14 x Q1.31 -> Q3.45 */
+		acc = ((int64_t)coefp[c + 6]) * tmp; /* Gain */
+
+		/* Apply biquad output shift right parameter
+		 * simultaneously with Q3.45 to Q3.31 conversion. Then
+		 * saturate to 32 bits Q1.31 and prepare for next
+		 * biquad.
+		 */
+		acc = Q_SHIFT_RND(acc, 45 + coefp[c + 5], 31);
+		in = sat_int32(acc);
+
+		/* Proceed to next biquad coefficients and delay
+		 * lines.
+		 */
+		c += SOF_EQ_IIR_NBIQUAD;
+		d += IIR_DF1_NUM_STATE;
+	}
+	/* Output of previous section is in variable in */
+	return in;
+}
+EXPORT_SYMBOL(iir_df1_4th);
+
 #endif
diff --git a/src/math/iir_df1_hifi3.c b/src/math/iir_df1_hifi3.c
@@ -126,4 +126,72 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
 }
 EXPORT_SYMBOL(iir_df1);
 
+int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x)
+{
+	ae_int64 acc;
+	ae_valign coef_align;
+	ae_int32x2 coef_a2a1;
+	ae_int32x2 coef_b2b1;
+	ae_int32x2 coef_b0;
+	ae_int32x2 gain;
+	ae_int32x2 shift;
+	ae_int32x2 delay_y2y1;
+	ae_int32x2 delay_x2x1;
+	ae_int32 in;
+	ae_int32 tmp;
+	ae_int32x2 *coefp;
+	ae_int32x2 *delayp;
+	int32_t *delay_update;
+	int i;
+
+	/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
+	coefp = (ae_int32x2 *)&iir->coef[0];
+	delayp = (ae_int32x2 *)&iir->delay[0];
+	in = x;
+	for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) {
+		/* Compute output: Delay is kept Q17.47 while multiply
+		 * instruction gives Q2.30 x Q1.31 -> Q18.46. Need to
+		 * shift delay line values right by one for same align
+		 * as MAC. Store to delay line need to be shifted left
+		 * by one similarly.
+		 */
+		coef_align = AE_LA64_PP(coefp);
+		AE_LA32X2_IP(coef_a2a1, coef_align, coefp);
+		AE_LA32X2_IP(coef_b2b1, coef_align, coefp);
+		AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4);
+		AE_L32_IP(shift, (ae_int32 *)coefp, 4);
+		AE_L32_IP(gain, (ae_int32 *)coefp, 4);
+
+		AE_L32X2_IP(delay_y2y1, delayp, 8);
+		AE_L32X2_IP(delay_x2x1, delayp, 8);
+
+		acc = AE_MULF32R_HH(coef_a2a1, delay_y2y1); /* a2 * y(n - 2) */
+		AE_MULAF32R_LL(acc, coef_a2a1, delay_y2y1); /* a1 * y(n - 1) */
+		AE_MULAF32R_HH(acc, coef_b2b1, delay_x2x1); /* b2 * x(n - 2) */
+		AE_MULAF32R_LL(acc, coef_b2b1, delay_x2x1); /* b1 * x(n - 1) */
+		AE_MULAF32R_HH(acc, coef_b0, in); /*  b0 * x  */
+		acc = AE_SLAI64S(acc, 1); /* Convert to Q17.47 */
+		tmp = AE_ROUND32F48SSYM(acc); /* Round to Q1.31 */
+
+		/* update the state value */
+		delay_update = (int32_t *)delayp - 4;
+		delay_update[0] = delay_update[1];
+		delay_update[1] = tmp;
+		delay_update[2] = delay_update[3];
+		delay_update[3] = in;
+
+		/* Apply gain Q18.14 x Q1.31 -> Q34.30 */
+		acc = AE_MULF32R_HH(gain, tmp); /* Gain */
+		acc = AE_SLAI64S(acc, 17); /* Convert to Q17.47 */
+
+		/* Apply biquad output shift right parameter and then
+		 * round and saturate to 32 bits Q1.31.
+		 */
+		acc = AE_SRAA64(acc, shift);
+		in = AE_ROUND32F48SSYM(acc);
+	}
+	return in;
+}
+EXPORT_SYMBOL(iir_df1_4th);
+
 #endif
diff --git a/src/math/iir_df1_hifi4.c b/src/math/iir_df1_hifi4.c
@@ -119,4 +119,65 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
 }
 EXPORT_SYMBOL(iir_df1);
 
+int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x)
+{
+	ae_valign coef_align;
+	ae_valign data_r_align;
+	ae_valign data_w_align = AE_ZALIGN64();
+	ae_f64 acc;
+	ae_int32x2 delay_y2y1;
+	ae_int32x2 delay_x2x1;
+	ae_int32x2 coef_a2a1;
+	ae_int32x2 coef_b2b1;
+	ae_int32x2 coef_b0;
+	ae_int32x2 gain;
+	ae_int32x2 shift;
+	ae_int32 in;
+	ae_int32x2 *coefp = (ae_int32x2 *)iir->coef;
+	ae_int32x2 *delay_r  = (ae_int32x2 *)iir->delay;
+	ae_int32x2 *delay_w = delay_r;
+	int i;
+
+	/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
+	/* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */
+	data_r_align = AE_LA64_PP(delay_r);
+	in = x;
+	for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) {
+		/* Load data */
+		AE_LA32X2_IP(delay_y2y1, data_r_align, delay_r);
+		AE_LA32X2_IP(delay_x2x1, data_r_align, delay_r);
+
+		/* Load coefficients */
+		coef_align = AE_LA64_PP(coefp);
+		AE_LA32X2_IP(coef_a2a1, coef_align, coefp);
+		AE_LA32X2_IP(coef_b2b1, coef_align, coefp);
+		AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4);
+		AE_L32_IP(shift, (ae_int32 *)coefp, 4);
+		AE_L32_IP(gain, (ae_int32 *)coefp, 4);
+
+		acc = AE_MULF32RA_HH(coef_b0, in);		  /* acc = b0 * in */
+		AE_MULAAFD32RA_HH_LL(acc, coef_a2a1, delay_y2y1); /* + a2 * y2 + a1 * y1 */
+		AE_MULAAFD32RA_HH_LL(acc, coef_b2b1, delay_x2x1); /* + b2 * x2 + b1 * x1 */
+		AE_PKSR32(delay_y2y1, acc, 1);		     /* y2 = y1, y1 = acc(q1.31) */
+		delay_x2x1 = AE_SEL32_LL(delay_x2x1, in);    /* x2 = x1, x1 = in */
+
+		/* Store data */
+		AE_SA32X2_IP(delay_y2y1, data_w_align, delay_w);
+		AE_SA32X2_IP(delay_x2x1, data_w_align, delay_w);
+
+		/* Apply gain */
+		acc = AE_MULF32R_LL(gain, delay_y2y1);	/* acc = gain * y1 */
+		acc = AE_SLAI64S(acc, 17);		/* Convert to Q17.47 */
+
+		/* Apply biquad output shift right parameter and then
+		 * round and saturate to 32 bits Q1.31.
+		 */
+		acc = AE_SRAA64(acc, shift);
+		in = AE_ROUND32F48SSYM(acc);
+	}
+	AE_SA64POS_FP(data_w_align, delay_w);
+	return in;
+}
+EXPORT_SYMBOL(iir_df1_4th);
+
 #endif
diff --git a/src/math/iir_df1_hifi5.c b/src/math/iir_df1_hifi5.c
@@ -116,4 +116,63 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
 }
 EXPORT_SYMBOL(iir_df1);
 
+int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x)
+{
+	ae_valignx2 coef_align;
+	ae_valignx2 data_r_align;
+	ae_valignx2 data_w_align = AE_ZALIGN128();
+	ae_f64 acc;
+	ae_int32x2 delay_y2y1;
+	ae_int32x2 delay_x2x1;
+	ae_int32x2 coef_a2a1;
+	ae_int32x2 coef_b2b1;
+	ae_int32x2 coef_b0;
+	ae_int32x2 gain;
+	ae_int32x2 shift;
+	ae_int32 in;
+	ae_int32x4 *coefp = (ae_int32x4 *)iir->coef;
+	ae_int32x4 *delay_r  = (ae_int32x4 *)iir->delay;
+	ae_int32x4 *delay_w = delay_r;
+	int i;
+
+	/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
+	/* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */
+	data_r_align = AE_LA128_PP(delay_r);
+	in = x;
+	for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) {
+		/* Load data */
+		AE_LA32X2X2_IP(delay_y2y1, delay_x2x1, data_r_align, delay_r);
+
+		/* Load coefficients */
+		coef_align = AE_LA128_PP(coefp);
+		AE_LA32X2X2_IP(coef_a2a1, coef_b2b1, coef_align, coefp);
+		AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4);
+		AE_L32_IP(shift, (ae_int32 *)coefp, 4);
+		AE_L32_IP(gain, (ae_int32 *)coefp, 4);
+
+		acc = AE_MULF32RA_HH(coef_b0, in);		  /* acc = b0 * in */
+		AE_MULAAFD32RA_HH_LL(acc, coef_a2a1, delay_y2y1); /* + a2 * y2 + a1 * y1 */
+		AE_MULAAFD32RA_HH_LL(acc, coef_b2b1, delay_x2x1); /* + b2 * x2 + b1 * x1 */
+		AE_PKSR32(delay_y2y1, acc, 1);		     /* y2 = y1, y1 = acc(q1.31) */
+		delay_x2x1 = AE_SEL32_LL(delay_x2x1, in);   /* x2 = x1, x1 = in */
+
+		/* Store data */
+		AE_SA32X2X2_IP(delay_y2y1, delay_x2x1, data_w_align, delay_w);
+
+		/* Apply gain */
+		acc = AE_MULF32R_LL(gain, delay_y2y1);	/* acc = gain * y1 */
+		acc = AE_SLAI64S(acc, 17);		/* Convert to Q17.47 */
+
+		/* Apply biquad output shift right parameter and then
+		 * round and saturate to 32 bits Q1.31.
+		 */
+		acc = AE_SRAA64(acc, shift);
+		in = AE_ROUND32F48SSYM(acc);
+	}
+
+	AE_SA128POS_FP(data_w_align, delay_w);
+	return in;
+}
+EXPORT_SYMBOL(iir_df1_4th);
+
 #endif