Skip to content

Commit f03b43b

Browse files
singalsukv2019i
authored andcommitted
Math: IIR DF1: Add a simplified 4th order IIR process function
The 4th filter with two biquads in series is commonly used in crossover and multiband DRC components. The omitting of outer loop for parallel biquads and check for null coefficients and use of fixed loop count of two makes the critical code faster. Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
1 parent b5e8e45 commit f03b43b

5 files changed

Lines changed: 259 additions & 0 deletions

File tree

src/include/sof/math/iir_df1.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <sof/common.h>
1414

1515
#define IIR_DF1_NUM_STATE 4
16+
#define SOF_IIR_DF1_4TH_NUM_BIQUADS 2
1617

1718
struct iir_state_df1 {
1819
unsigned int biquads; /* Number of IIR 2nd order sections total */
@@ -34,8 +35,24 @@ void iir_init_delay_df1(struct iir_state_df1 *iir, int32_t **state);
3435

3536
void iir_reset_df1(struct iir_state_df1 *iir);
3637

38+
/**
39+
* Calculate IIR filter consisting of biquads
40+
* @param iir IIR state with configured biquad coefficients and delay lines data
41+
* @param x Single s32 Q1.31 format input sample
42+
* @return Single s32 Q1.31 format output samples
43+
*/
3744
int32_t iir_df1(struct iir_state_df1 *iir, int32_t x);
3845

46+
/**
47+
* Calculate IIR filter consisting of biquads, special simplified version for
48+
* 4th order filter with two biquads in series. Note: There are no checks for
49+
* iir struct members.
50+
* @param iir IIR state with configured biquad coefficients and delay lines data
51+
* @param x Single s32 Q1.31 format input sample
52+
* @return Single s32 Q1.31 format output samples
53+
*/
54+
int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x);
55+
3956
/* Inline functions */
4057
#if SOF_USE_MIN_HIFI(3, FILTER)
4158
#include "iir_df1_hifi3.h"

src/math/iir_df1_generic.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,4 +109,58 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
109109
}
110110
EXPORT_SYMBOL(iir_df1);
111111

112+
int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x)
113+
{
114+
int32_t in;
115+
int32_t tmp;
116+
int64_t acc;
117+
int i;
118+
int d = 0; /* Index to state */
119+
int c = 0; /* Index to coefficient a2 */
120+
int32_t *coefp = iir->coef;
121+
int32_t *delay = iir->delay;
122+
123+
/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
124+
/* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */
125+
in = x;
126+
for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) {
127+
/* Compute output: Delay is Q3.61
128+
* Q2.30 x Q1.31 -> Q3.61
129+
* Shift Q3.61 to Q3.31 with rounding, saturate to Q1.31
130+
*/
131+
acc = ((int64_t)coefp[c]) * delay[d]; /* a2 * y(n - 2) */
132+
acc += ((int64_t)coefp[c + 1]) * delay[d + 1]; /* a1 * y(n - 1) */
133+
acc += ((int64_t)coefp[c + 2]) * delay[d + 2]; /* b2 * x(n - 2) */
134+
acc += ((int64_t)coefp[c + 3]) * delay[d + 3]; /* b1 * x(n - 1) */
135+
acc += ((int64_t)coefp[c + 4]) * in; /* b0 * x */
136+
tmp = (int32_t)sat_int32(Q_SHIFT_RND(acc, 61, 31));
137+
138+
/* update the delay value */
139+
delay[d] = delay[d + 1];
140+
delay[d + 1] = tmp;
141+
delay[d + 2] = delay[d + 3];
142+
delay[d + 3] = in;
143+
144+
/* Apply gain Q2.14 x Q1.31 -> Q3.45 */
145+
acc = ((int64_t)coefp[c + 6]) * tmp; /* Gain */
146+
147+
/* Apply biquad output shift right parameter
148+
* simultaneously with Q3.45 to Q3.31 conversion. Then
149+
* saturate to 32 bits Q1.31 and prepare for next
150+
* biquad.
151+
*/
152+
acc = Q_SHIFT_RND(acc, 45 + coefp[c + 5], 31);
153+
in = sat_int32(acc);
154+
155+
/* Proceed to next biquad coefficients and delay
156+
* lines.
157+
*/
158+
c += SOF_EQ_IIR_NBIQUAD;
159+
d += IIR_DF1_NUM_STATE;
160+
}
161+
/* Output of previous section is in variable in */
162+
return in;
163+
}
164+
EXPORT_SYMBOL(iir_df1_4th);
165+
112166
#endif

src/math/iir_df1_hifi3.c

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,4 +126,72 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
126126
}
127127
EXPORT_SYMBOL(iir_df1);
128128

129+
int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x)
130+
{
131+
ae_int64 acc;
132+
ae_valign coef_align;
133+
ae_int32x2 coef_a2a1;
134+
ae_int32x2 coef_b2b1;
135+
ae_int32x2 coef_b0;
136+
ae_int32x2 gain;
137+
ae_int32x2 shift;
138+
ae_int32x2 delay_y2y1;
139+
ae_int32x2 delay_x2x1;
140+
ae_int32 in;
141+
ae_int32 tmp;
142+
ae_int32x2 *coefp;
143+
ae_int32x2 *delayp;
144+
int32_t *delay_update;
145+
int i;
146+
147+
/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
148+
coefp = (ae_int32x2 *)&iir->coef[0];
149+
delayp = (ae_int32x2 *)&iir->delay[0];
150+
in = x;
151+
for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) {
152+
/* Compute output: Delay is kept Q17.47 while multiply
153+
* instruction gives Q2.30 x Q1.31 -> Q18.46. Need to
154+
* shift delay line values right by one for same align
155+
* as MAC. Store to delay line need to be shifted left
156+
* by one similarly.
157+
*/
158+
coef_align = AE_LA64_PP(coefp);
159+
AE_LA32X2_IP(coef_a2a1, coef_align, coefp);
160+
AE_LA32X2_IP(coef_b2b1, coef_align, coefp);
161+
AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4);
162+
AE_L32_IP(shift, (ae_int32 *)coefp, 4);
163+
AE_L32_IP(gain, (ae_int32 *)coefp, 4);
164+
165+
AE_L32X2_IP(delay_y2y1, delayp, 8);
166+
AE_L32X2_IP(delay_x2x1, delayp, 8);
167+
168+
acc = AE_MULF32R_HH(coef_a2a1, delay_y2y1); /* a2 * y(n - 2) */
169+
AE_MULAF32R_LL(acc, coef_a2a1, delay_y2y1); /* a1 * y(n - 1) */
170+
AE_MULAF32R_HH(acc, coef_b2b1, delay_x2x1); /* b2 * x(n - 2) */
171+
AE_MULAF32R_LL(acc, coef_b2b1, delay_x2x1); /* b1 * x(n - 1) */
172+
AE_MULAF32R_HH(acc, coef_b0, in); /* b0 * x */
173+
acc = AE_SLAI64S(acc, 1); /* Convert to Q17.47 */
174+
tmp = AE_ROUND32F48SSYM(acc); /* Round to Q1.31 */
175+
176+
/* update the state value */
177+
delay_update = (int32_t *)delayp - 4;
178+
delay_update[0] = delay_update[1];
179+
delay_update[1] = tmp;
180+
delay_update[2] = delay_update[3];
181+
delay_update[3] = in;
182+
183+
/* Apply gain Q18.14 x Q1.31 -> Q34.30 */
184+
acc = AE_MULF32R_HH(gain, tmp); /* Gain */
185+
acc = AE_SLAI64S(acc, 17); /* Convert to Q17.47 */
186+
187+
/* Apply biquad output shift right parameter and then
188+
* round and saturate to 32 bits Q1.31.
189+
*/
190+
acc = AE_SRAA64(acc, shift);
191+
in = AE_ROUND32F48SSYM(acc);
192+
}
193+
return in;
194+
}
195+
EXPORT_SYMBOL(iir_df1_4th);
196+
129197
#endif

src/math/iir_df1_hifi4.c

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,4 +119,65 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
119119
}
120120
EXPORT_SYMBOL(iir_df1);
121121

122+
int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x)
123+
{
124+
ae_valign coef_align;
125+
ae_valign data_r_align;
126+
ae_valign data_w_align = AE_ZALIGN64();
127+
ae_f64 acc;
128+
ae_int32x2 delay_y2y1;
129+
ae_int32x2 delay_x2x1;
130+
ae_int32x2 coef_a2a1;
131+
ae_int32x2 coef_b2b1;
132+
ae_int32x2 coef_b0;
133+
ae_int32x2 gain;
134+
ae_int32x2 shift;
135+
ae_int32 in;
136+
ae_int32x2 *coefp = (ae_int32x2 *)iir->coef;
137+
ae_int32x2 *delay_r = (ae_int32x2 *)iir->delay;
138+
ae_int32x2 *delay_w = delay_r;
139+
int i;
140+
141+
/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
142+
/* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */
143+
data_r_align = AE_LA64_PP(delay_r);
144+
in = x;
145+
for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) {
146+
/* Load data */
147+
AE_LA32X2_IP(delay_y2y1, data_r_align, delay_r);
148+
AE_LA32X2_IP(delay_x2x1, data_r_align, delay_r);
149+
150+
/* Load coefficients */
151+
coef_align = AE_LA64_PP(coefp);
152+
AE_LA32X2_IP(coef_a2a1, coef_align, coefp);
153+
AE_LA32X2_IP(coef_b2b1, coef_align, coefp);
154+
AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4);
155+
AE_L32_IP(shift, (ae_int32 *)coefp, 4);
156+
AE_L32_IP(gain, (ae_int32 *)coefp, 4);
157+
158+
acc = AE_MULF32RA_HH(coef_b0, in); /* acc = b0 * in */
159+
AE_MULAAFD32RA_HH_LL(acc, coef_a2a1, delay_y2y1); /* + a2 * y2 + a1 * y1 */
160+
AE_MULAAFD32RA_HH_LL(acc, coef_b2b1, delay_x2x1); /* + b2 * x2 + b1 * x1 */
161+
AE_PKSR32(delay_y2y1, acc, 1); /* y2 = y1, y1 = acc(q1.31) */
162+
delay_x2x1 = AE_SEL32_LL(delay_x2x1, in); /* x2 = x1, x1 = in */
163+
164+
/* Store data */
165+
AE_SA32X2_IP(delay_y2y1, data_w_align, delay_w);
166+
AE_SA32X2_IP(delay_x2x1, data_w_align, delay_w);
167+
168+
/* Apply gain */
169+
acc = AE_MULF32R_LL(gain, delay_y2y1); /* acc = gain * y1 */
170+
acc = AE_SLAI64S(acc, 17); /* Convert to Q17.47 */
171+
172+
/* Apply biquad output shift right parameter and then
173+
* round and saturate to 32 bits Q1.31.
174+
*/
175+
acc = AE_SRAA64(acc, shift);
176+
in = AE_ROUND32F48SSYM(acc);
177+
}
178+
AE_SA64POS_FP(data_w_align, delay_w);
179+
return in;
180+
}
181+
EXPORT_SYMBOL(iir_df1_4th);
182+
122183
#endif

src/math/iir_df1_hifi5.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,4 +116,63 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
116116
}
117117
EXPORT_SYMBOL(iir_df1);
118118

119+
int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x)
120+
{
121+
ae_valignx2 coef_align;
122+
ae_valignx2 data_r_align;
123+
ae_valignx2 data_w_align = AE_ZALIGN128();
124+
ae_f64 acc;
125+
ae_int32x2 delay_y2y1;
126+
ae_int32x2 delay_x2x1;
127+
ae_int32x2 coef_a2a1;
128+
ae_int32x2 coef_b2b1;
129+
ae_int32x2 coef_b0;
130+
ae_int32x2 gain;
131+
ae_int32x2 shift;
132+
ae_int32 in;
133+
ae_int32x4 *coefp = (ae_int32x4 *)iir->coef;
134+
ae_int32x4 *delay_r = (ae_int32x4 *)iir->delay;
135+
ae_int32x4 *delay_w = delay_r;
136+
int i;
137+
138+
/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
139+
/* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */
140+
data_r_align = AE_LA128_PP(delay_r);
141+
in = x;
142+
for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) {
143+
/* Load data */
144+
AE_LA32X2X2_IP(delay_y2y1, delay_x2x1, data_r_align, delay_r);
145+
146+
/* Load coefficients */
147+
coef_align = AE_LA128_PP(coefp);
148+
AE_LA32X2X2_IP(coef_a2a1, coef_b2b1, coef_align, coefp);
149+
AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4);
150+
AE_L32_IP(shift, (ae_int32 *)coefp, 4);
151+
AE_L32_IP(gain, (ae_int32 *)coefp, 4);
152+
153+
acc = AE_MULF32RA_HH(coef_b0, in); /* acc = b0 * in */
154+
AE_MULAAFD32RA_HH_LL(acc, coef_a2a1, delay_y2y1); /* + a2 * y2 + a1 * y1 */
155+
AE_MULAAFD32RA_HH_LL(acc, coef_b2b1, delay_x2x1); /* + b2 * x2 + b1 * x1 */
156+
AE_PKSR32(delay_y2y1, acc, 1); /* y2 = y1, y1 = acc(q1.31) */
157+
delay_x2x1 = AE_SEL32_LL(delay_x2x1, in); /* x2 = x1, x1 = in */
158+
159+
/* Store data */
160+
AE_SA32X2X2_IP(delay_y2y1, delay_x2x1, data_w_align, delay_w);
161+
162+
/* Apply gain */
163+
acc = AE_MULF32R_LL(gain, delay_y2y1); /* acc = gain * y1 */
164+
acc = AE_SLAI64S(acc, 17); /* Convert to Q17.47 */
165+
166+
/* Apply biquad output shift right parameter and then
167+
* round and saturate to 32 bits Q1.31.
168+
*/
169+
acc = AE_SRAA64(acc, shift);
170+
in = AE_ROUND32F48SSYM(acc);
171+
}
172+
173+
AE_SA128POS_FP(data_w_align, delay_w);
174+
return in;
175+
}
176+
EXPORT_SYMBOL(iir_df1_4th);
177+
119178
#endif

0 commit comments

Comments
 (0)