Initial 48t 3/2 downsample filter

Allan-xmos · Allan-xmos · commit 9dcaf2354f8b · 2026-03-11T16:10:23.000Z
diff --git a/.gitignore b/.gitignore
@@ -41,3 +41,4 @@
 doc/_build/pdf/*.pdf
 **/doc/pdf/*.pdf
 **/.vscode/*
+**/autogen/**
diff --git a/lib_src/src/fixed_factor_vpu_voice/asm/push_s32_24t.S b/lib_src/src/fixed_factor_vpu_voice/asm/push_s32_24t.S
@@ -0,0 +1,56 @@
+// Copyright 2023-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#define FUNCTION_NAME push_s32_24t
+
+#define NSTACKWORDS     (0)
+
+#define state           r0
+#define new_samp        r1
+#define indx            r2
+#define _32             r3
+
+.text
+.issue_mode dual
+.globl FUNCTION_NAME;
+.type FUNCTION_NAME,@function
+.align 16
+.cc_top FUNCTION_NAME.function,FUNCTION_NAME
+
+FUNCTION_NAME: // (int32_t * state, int32_t new_samp)
+        dualentsp NSTACKWORDS
+#if (defined(__XS3A__)) // Only available for XS3 with VPU
+
+        // r4 - r10 are not used here
+
+        // Setting up the vpu and a poiter to the state[15]
+    {   ldc _32, 32                     ;   ldc r11, 0                      }
+    {   ldc indx, 15                    ;   vsetc r11                       }
+    {   ldaw r11, state[indx]           ;                                   } // r11 -> st[15 - 23]
+
+#undef indx
+#define buff            r2
+
+    {   add buff, r11, 4                ;   vldr r11[0]                     } // buff -> st[16 - 24] // vR has st[15 - 23]
+    {   sub r11, r11, _32               ;   vstr buff[0]                    } // r11  -> st[7  - 15] // buff has vR
+
+    {   add buff, r11, 4                ;   vldr r11[0]                     } // buff -> st[8  - 16] // vR has st[7  - 15]
+    {   sub r11, r11, _32               ;   vstr buff[0]                    } // r11  -> st[-1 -  7] // buff has vR
+
+    {   add buff, r11, 4                ;   vldr r11[0]                     } // buff -> st[0  -  8] // vR has st[-1 -  7]
+    {                                   ;   vstr buff[0]                    } // buff has vR
+
+        // put new_samp in state[0]
+    {                                   ;   stw new_samp, state[0]          }
+
+#endif // Only available for XS3 with VPU
+        retsp NSTACKWORDS
+
+.cc_bottom FUNCTION_NAME.function; 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords; 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores; 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers; 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends; 
+
+#undef FUNCTION_NAME
+
diff --git a/lib_src/src/fixed_factor_vpu_voice/src_low_level.h b/lib_src/src/fixed_factor_vpu_voice/src_low_level.h
@@ -14,6 +14,16 @@
  */
 int32_t conv_s32_24t(const int32_t * samples, const int32_t * coef);
 
+/**
+ * @brief Perforns VPU-optimised ring buffer shift for s32 type integers
+ * 
+ * @param state     State that keep previous samples
+ * @param new_samp  New sample to put in the state
+ * @note Both state and coef has to have 24 values int32_t in them
+ * @note Both state and coef have to be 8 bit aligned
+ */
+void push_s32_24t(int32_t * state, int32_t new_samp);
+
 /**
  * @brief Perforns VPU-optimised FIR filtering for s32 type integers
  * 
diff --git a/lib_src/src/fixed_factor_vpu_voice/src_poly.h b/lib_src/src/fixed_factor_vpu_voice/src_poly.h
@@ -151,4 +151,29 @@ static inline void src_rat_3_2_96t_us(int32_t samp_in[2], int32_t samp_out[3], c
 
 /**@}*/ // END: addtogroup src_rat_3_2_96t_us
 
+
+/**
+ * \addtogroup src_rat_2_3_48t_ds src_rat_2_3_48t_ds
+ *
+ * The public API for using SRC.
+ * @{
+ */
+
+/**
+ * @brief Performs VPU-optimised 48 taps polyphase rational factor 2/3 downsampling
+ * 
+ * @param samp_in       Values to be downsampled
+ * @param samp_out      Downsampled output
+ * @param coefs_ds      Two-phase FIR coefficients array with [2][24] dimensions
+ * @param state_ds      FIR state array with 48 elements in it
+ */
+static inline void src_rat_2_3_48t_ds(int32_t samp_in[3], int32_t samp_out[2], const int32_t coefs_ds[2][24], int32_t state_ds[24])
+{
+    push_s32_24t(state_ds, samp_in[0]);
+    samp_out[0] = fir_s32_24t(state_ds, coefs_ds[0], samp_in[1]) * 2;
+    samp_out[1] = fir_s32_24t(state_ds, coefs_ds[1], samp_in[2]) * 2;
+}
+
+/**@}*/ // END: addtogroup src_rat_2_3_48t_ds
+
 #endif // _SRC_POLY_VPU_H_
diff --git a/lib_src/src/fixed_factor_vpu_voice/src_rat_fir_48t_ds_coefs.c b/lib_src/src/fixed_factor_vpu_voice/src_rat_fir_48t_ds_coefs.c
@@ -0,0 +1,28 @@
+// Copyright 2023-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+/*********************************/
+/* AUTOGENERATED. DO NOT MODIFY! */
+/*********************************/
+
+// Use src_rat_fir_gen.py script to regenerate this file
+// python src_rat_fir_gen.py -gc True -nt 48
+
+#include "src_rat_fir_48t_ds_coefs.h"
+#include <stdint.h>
+
+/** q30 coefficients to use for the 48 -> 32 kHz polyphase rational factor 48t downsampling */
+const int32_t ALIGNMENT(8) src_rat_fir_48t_ds_coefs[SRC_RAT_FIR_48T_DS_NUM_PHASES][SRC_RAT_FIR_48T_DS_TAPS_PER_PHASE] = {
+    {
+          -836279,     -1511853,      4633855,     -3331393,     -5205286,     14094115,
+         -9447157,    -13998500,     38329634,    -27797034,    -47620793,    226309753,
+        342129244,     66484670,    -71415051,     23608611,     15631718,    -23045498,
+          8619065,      5719490,     -8365728,      2961045,      1793971,     -2267583,
+    },
+    {
+         -2267583,      1793971,      2961045,     -8365728,      5719490,      8619065,
+        -23045498,     15631718,     23608611,    -71415051,     66484670,    342129244,
+        226309753,    -47620793,    -27797034,     38329634,    -13998500,     -9447157,
+         14094115,     -5205286,     -3331393,      4633855,     -1511853,      -836279,
+    },
+};
diff --git a/lib_src/src/fixed_factor_vpu_voice/src_rat_fir_48t_ds_coefs.h b/lib_src/src/fixed_factor_vpu_voice/src_rat_fir_48t_ds_coefs.h
@@ -0,0 +1,31 @@
+// Copyright 2023-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+/*********************************/
+/* AUTOGENERATED. DO NOT MODIFY! */
+/*********************************/
+
+// Use src_rat_fir_gen.py script to regenerate this file
+// python src_rat_fir_gen.py -gc True -nt 48
+
+#ifndef _SRC_RAT_48T_DS_COEFS_H_
+#define _SRC_RAT_48T_DS_COEFS_H_
+
+#include <stdint.h>
+
+#ifndef ALIGNMENT
+#  ifdef __xcore__
+#    define ALIGNMENT(N)  __attribute__((aligned (N)))
+#  else
+#    define ALIGNMENT(N)
+#  endif
+#endif
+
+#define SRC_RAT_FIR_48T_DS_NUM_TAPS (48)
+#define SRC_RAT_FIR_48T_DS_NUM_PHASES (2)
+#define SRC_RAT_FIR_48T_DS_TAPS_PER_PHASE (24)
+
+/** q30 coefficients to use for the 48 -> 32 kHz polyphase rational factor 48t downsampling */
+extern const int32_t src_rat_fir_48t_ds_coefs[SRC_RAT_FIR_48T_DS_NUM_PHASES][SRC_RAT_FIR_48T_DS_TAPS_PER_PHASE];
+
+#endif // _SRC_RAT_48T_DS_COEFS_H_
diff --git a/python/fixed_factor_vpu_voice/src_rat_fir_gen.py b/python/fixed_factor_vpu_voice/src_rat_fir_gen.py
@@ -220,6 +220,102 @@ def generate_c_file(output_path, mixed_taps_ds, mixed_taps_us, total_num_taps =
                                     'coefs_ds':coefs_ds,
                                     'coefs_us':coefs_us})
 
+def gen_coefs_48t_ds():
+    """
+    Generate 2-phase polyphase DS coefficients for 48t (24 taps/phase) 48->32 kHz SRC.
+    Uses the same filter design as gen_coefs but skips the 96t-specific passband check.
+
+    Returns:
+        taps[48] in float
+        poly_ds[2][24] in float
+        poly_ds_int[2][24] in int32
+    """
+    total_num_taps_ds = 48
+    num_phases_ds = 2
+    lpf = signal.firwin2(total_num_taps_ds, [0, 15000, 17000, 0.5 * fs], [1, 1, 0, 0],
+                         window=("kaiser", 3.2), fs=fs)
+    poly_ds, poly_ds_int = mix_coefs(lpf, num_phases_ds)
+    return lpf, poly_ds, poly_ds_int
+
+
+def generate_48t_ds_header_file(output_path, filename=None):
+    header_template = """\
+// Copyright 2023-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+/*********************************/
+/* AUTOGENERATED. DO NOT MODIFY! */
+/*********************************/
+
+// Use src_rat_fir_gen.py script to regenerate this file
+// python src_rat_fir_gen.py -gc True -nt 48
+
+#ifndef _SRC_RAT_48T_DS_COEFS_H_
+#define _SRC_RAT_48T_DS_COEFS_H_
+
+#include <stdint.h>
+
+#ifndef ALIGNMENT
+#  ifdef __xcore__
+#    define ALIGNMENT(N)  __attribute__((aligned (N)))
+#  else
+#    define ALIGNMENT(N)
+#  endif
+#endif
+
+#define SRC_RAT_FIR_48T_DS_NUM_TAPS (48)
+#define SRC_RAT_FIR_48T_DS_NUM_PHASES (2)
+#define SRC_RAT_FIR_48T_DS_TAPS_PER_PHASE (24)
+
+/** q30 coefficients to use for the 48 -> 32 kHz polyphase rational factor 48t downsampling */
+extern const int32_t src_rat_fir_48t_ds_coefs[SRC_RAT_FIR_48T_DS_NUM_PHASES][SRC_RAT_FIR_48T_DS_TAPS_PER_PHASE];
+
+#endif // _SRC_RAT_48T_DS_COEFS_H_
+"""
+    if filename is None:
+        filename = "src_rat_fir_48t_ds_coefs.h"
+    header_path = Path(output_path) / filename
+    with open(header_path, "w") as header_file:
+        header_file.write(header_template)
+
+
+def generate_48t_ds_c_file(output_path, mixed_taps_ds_48t):
+    tph_ds = 24
+    num_phases_ds = 2
+    c_template = """\
+// Copyright 2023-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+/*********************************/
+/* AUTOGENERATED. DO NOT MODIFY! */
+/*********************************/
+
+// Use src_rat_fir_gen.py script to regenerate this file
+// python src_rat_fir_gen.py -gc True -nt 48
+
+#include "src_rat_fir_48t_ds_coefs.h"
+#include <stdint.h>
+
+/** q30 coefficients to use for the 48 -> 32 kHz polyphase rational factor 48t downsampling */
+const int32_t ALIGNMENT(8) src_rat_fir_48t_ds_coefs[SRC_RAT_FIR_48T_DS_NUM_PHASES][SRC_RAT_FIR_48T_DS_TAPS_PER_PHASE] = {
+%(coefs_ds)s
+};
+"""
+    coefs_ds = ''
+    for phase in range(num_phases_ds):
+        coefs_ds += '    {\n    '
+        for tap in range(tph_ds):
+            coefs_ds += ' ' + str(mixed_taps_ds_48t[phase][tap]).rjust(12) + ','
+            if (((tap + 1) % 6) == 0):
+                coefs_ds += '\n    '
+        coefs_ds += '},\n'
+
+    filename = "src_rat_fir_48t_ds_coefs.c"
+    c_path = Path(output_path) / filename
+    with open(c_path, "w") as c_file:
+        c_file.write(c_template % {'coefs_ds': coefs_ds})
+
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("Generate FIR coefficiens for a 48 - 32 kHz polyphase SRC")
     parser.add_argument('--output_dir','-o', help='output path for filter files')
diff --git a/tests/sim_tests/CMakeLists.txt b/tests/sim_tests/CMakeLists.txt
@@ -12,3 +12,4 @@ add_subdirectory(unity_gain_voice_test)
 add_subdirectory(us3_voice_test)
 add_subdirectory(vpu_ff3_test)
 add_subdirectory(vpu_rat_test)
+add_subdirectory(vpu_rat_48t_test)
diff --git a/tests/sim_tests/test_voice_vpu_rat.py b/tests/sim_tests/test_voice_vpu_rat.py
@@ -149,6 +149,46 @@ def test_src_vpu_rat_prepare():
     build_c(poly_ds_int, poly_us_int)
 
 
+def build_c_48t(poly_ds_48t_int):
+    coeffs_path = Path(__file__).resolve().parent / "vpu_rat_48t_test" / "autogen"
+    coeffs_path.mkdir(exist_ok=True, parents=True)
+    gf.generate_48t_ds_header_file(coeffs_path)
+    gf.generate_48t_ds_c_file(coeffs_path, poly_ds_48t_int)
+    # Note: The autogenerated files are not compiled as part of the application. The copies of these committed in lib_src are compiled instead
+    build_firmware_xcommon_cmake(Path(__file__).parent / "vpu_rat_48t_test")
+
+
+def run_c_48t(fc_ex):
+    file_path = Path(__file__).parent
+    testname = "vpu_rat_48t_test"
+    xe = file_path / testname / "bin" / f"{testname}.xe"
+    app = f"xsim {xe}"
+    subprocess.check_output(app.split())
+
+    sig_bin = Path("sig_c_32k.bin")
+    assert sig_bin.is_file(), "could not find output bin"
+    sig32k_int = np.fromfile(sig_bin, dtype=np.int32)
+
+    thdn, freq = THDN_and_freq(sig32k_int.astype(np.float64), 32000)
+    print(f"C 32k THDN: {thdn}, fc: {freq}")
+    assert_thdn_and_fc(thdn, freq, -60, fc_ex)
+
+    sig_bin = Path("sig_c_48k.bin")
+    assert sig_bin.is_file(), "could not find output bin"
+    sig48k_int = np.fromfile(sig_bin, dtype=np.int32)
+
+    thdn, freq = THDN_and_freq(sig48k_int.astype(np.float64), 48000)
+    print(f"C 48k THDN: {thdn}, fc: {freq}")
+    assert_thdn_and_fc(thdn, freq, -50, fc_ex)
+
+
+@pytest.mark.prepare
+def test_src_vpu_rat_48t_prepare():
+    print(f"Preparing rat 48t test")
+    _, _, poly_ds_48t_int = gf.gen_coefs_48t_ds()  # 2 phases x 24 taps, uses push_s32_24t and fir_s32_24t
+    build_c_48t(poly_ds_48t_int)
+
+
 @pytest.mark.parametrize(
     "test_freq", [
         100, 14000
@@ -166,3 +206,36 @@ def test_src_vpu_rat(test_freq):
         sig32k = downsample(sig_fl, taps, poly_ds, test_freq)
         upsample(sig32k, taps, poly_us, test_freq)
         run_c(test_freq)
+
+
+@pytest.mark.parametrize(
+    "test_freq", [
+        100, 14000
+        ]
+)
+@pytest.mark.main
+def test_src_vpu_rat_48t(test_freq):
+    """Test src_rat_2_3_48t_ds (exercises push_s32_24t and fir_s32_24t).
+    Downsamples 48k->32k using 48t DS filter; upsamples back 32k->48k using existing 96t US filter.
+    """
+    print(f"Testing 48t rat DS {test_freq} Hz sinewave")
+    total_num_taps_us = 8 * 2 * 3 * 2  # 96 taps for US (reuse existing 96t design)
+    taps_ds, poly_ds_48t, poly_ds_48t_int = gf.gen_coefs_48t_ds()  # 2 phases x 24 taps per phase
+    taps_us, _, _, poly_us, poly_us_int = gf.gen_coefs(total_num_taps_us)
+
+    working_dir = Path(tempfile.mkdtemp(prefix="test_src_vpu_rat_48t", dir=Path(__file__).parent))
+    with tmp_dir(working_dir):
+        sig_fl, sig_int = get_sig(test_freq)
+
+        # Python-side downsampling check with bounds appropriate for a 48t filter
+        sig32k = np.zeros(len(sig_fl) * 2 // 3)
+        buff = np.zeros(len(sig_fl) * 2)
+        buff[0::2] = sig_fl
+        buff = signal.convolve(buff, taps_ds, "same", "direct") * 2
+        sig32k = buff[0::3]
+        thdn, freq = THDN_and_freq(sig32k.astype(np.float64), 32000)
+        print(f"PY 32k THDN: {thdn}, fc: {freq}")
+        assert_thdn_and_fc(thdn, freq, -60, test_freq)
+
+        upsample(sig32k, taps_us, poly_us, test_freq)
+        run_c_48t(test_freq)
diff --git a/tests/sim_tests/vpu_rat_48t_test/CMakeLists.txt b/tests/sim_tests/vpu_rat_48t_test/CMakeLists.txt
@@ -0,0 +1,21 @@
+cmake_minimum_required(VERSION 3.21)
+include($ENV{XMOS_CMAKE_PATH}/xcommon.cmake)
+
+if(NOT BUILD_NATIVE)
+project(vpu_rat_48t_test)
+
+set(APP_HW_TARGET XK-EVK-XU316)
+
+set(APP_PCA_ENABLE ON)
+
+set(APP_COMPILER_FLAGS      "-g"
+                            "-O3"
+                            "-mno-dual-issue"
+)
+
+include(${CMAKE_CURRENT_LIST_DIR}/../../../examples/deps.cmake)
+
+set(XMOS_SANDBOX_DIR ${CMAKE_CURRENT_LIST_DIR}/../../../../)
+
+XMOS_REGISTER_APP()
+endif()
diff --git a/tests/sim_tests/vpu_rat_48t_test/src/fileio_app.c b/tests/sim_tests/vpu_rat_48t_test/src/fileio_app.c
diff --git a/tests/utils/src_test_utils.py b/tests/utils/src_test_utils.py