Skip to content

Commit 54d83c2

Browse files
authored
Merge pull request #113 from ed-xmos/feature/asrc_wrapper
ASRC Task
2 parents adceee4 + 0f6fa19 commit 54d83c2

22 files changed

Lines changed: 1556 additions & 15 deletions

CHANGELOG.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ lib_src change log
1010
* ADDED: Support for XCommon CMake build system
1111
* FIXED: Function pointer annotation avoid stack corruption when using
1212
multiple instances of SSRC or ASRC.
13+
* ADDED: ASRC task wrapper to simplify integration of ASRC blocks
1314

1415
* Changes to dependencies:
1516

CMakeLists.txt

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ if((${CMAKE_SYSTEM_PROCESSOR} STREQUAL XCORE_XS3A) OR (${CMAKE_SYSTEM_PROCESSOR}
3232
lib_src/src/fixed_factor_of_3_voice/*.c
3333
lib_src/src/multirate_hifi/*.c
3434
)
35+
file(GLOB LIB_C_SOURCES_FIFO lib_src/src/*.c
36+
)
3537
file(GLOB_RECURSE LIB_XC_SOURCES lib_src/src/fixed_factor_of_3/*.xc
3638
lib_src/src/fixed_factor_of_3_voice/*.xc
3739
lib_src/src/multirate_hifi/*.xc
@@ -40,9 +42,10 @@ if((${CMAKE_SYSTEM_PROCESSOR} STREQUAL XCORE_XS3A) OR (${CMAKE_SYSTEM_PROCESSOR}
4042
lib_src/src/fixed_factor_of_3_voice/*.S
4143
lib_src/src/multirate_hifi/*.S
4244
)
43-
45+
4446
## Create library target
4547
add_library(lib_src STATIC EXCLUDE_FROM_ALL ${LIB_C_SOURCES}
48+
${LIB_C_SOURCES_FIFO}
4649
${LIB_C_SOURCES_XS3}
4750
${LIB_ASM_SOURCES}
4851
${LIB_XC_SOURCES_XS3}
@@ -65,15 +68,17 @@ if((${CMAKE_SYSTEM_PROCESSOR} STREQUAL XCORE_XS3A) OR (${CMAKE_SYSTEM_PROCESSOR}
6568
${ADDITIONAL_INCLUDE_DIRS_XS3}
6669
)
6770

71+
target_compile_options(lib_src
72+
PRIVATE
73+
-O3
74+
-g
75+
)
76+
6877
target_link_libraries(lib_src
6978
PUBLIC
7079
# defined in fwk_core
7180
framework_core_legacy_compat
7281
)
73-
74-
target_compile_options(lib_src
75-
PRIVATE
76-
-O3
77-
-g
78-
)
82+
83+
7984
endif()

Jenkinsfile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,6 @@ pipeline {
112112
stage('Get repo') {
113113
steps {
114114
runningOn(env.NODE_NAME)
115-
sh "mkdir ${REPO}"
116115
// source checks require the directory
117116
// name to be the same as the repo name
118117
dir("${REPO}") {
@@ -151,10 +150,16 @@ pipeline {
151150
sh "pip install -e ${WORKSPACE}/xtagctl"
152151
withXTAG(["XCORE-AI-EXPLORER"]) { adapterIDs ->
153152
sh "xtagctl reset ${adapterIDs[0]}"
153+
// Do asynch FIFO test
154154
dir("tests/asynchronous_fifo_asrc_test") {
155155
sh "xmake -j"
156156
sh "xrun --xscope --adapter-id " + adapterIDs[0] + " bin/asynchronous_fifo_asrc_test.xe"
157157
}
158+
// ASRC Task tests
159+
dir("tests") {
160+
localRunPytest('-k "asrc_task" -vv')
161+
archiveArtifacts artifacts: "*.png", allowEmptyArchive: true
162+
}
158163
}
159164
}
160165
}

doc/Doxyfile.inc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
PROJECT_NAME = lib_src
44
PROJECT_BRIEF = "Synchronous and Asynchronous Rate Conversion Documentation"
55

6-
INPUT = ../lib_src/api ../lib_src/src ../lib_src/src/fixed_factor_vpu_voice
6+
INPUT = ../lib_src/api ../lib_src/src ../lib_src/src/fixed_factor_vpu_voice ../lib_src/src/asrc_task
77

8-
PREDEFINED = C_API= EXTERN_C= UNSAFE=
8+
PREDEFINED = C_API= EXTERN_C= UNSAFE= __DOXYGEN__=1
99

doc/asrc_task/asrc_task.rst

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
ASRC Task
2+
---------
3+
4+
Introduction
5+
............
6+
7+
The ASRC library provides a function call that operates on blocks of samples whereas typical XMOS audio IO libraries provide streaming audio one sample at a time. The ASRC task wraps up the core ASRC function with all of the other lower level APIs (eg. FIFO) and required logic. It provides a simple-to-use and generic ASRC conversion block suitable for integration into practical designs. It is fully re-entrant permitting multiple instances within a project supporting multiple (or bi-directional) sample rates and audio clock domain bridges.
8+
9+
Operation
10+
.........
11+
12+
The ASRC task handles bridging between two asynchronous audio sources. It has an input side and output side. The input samples are provided over a channel allowing the source to be placed on a different XCORE tile if needed. The output side sample interface is via an asynchronous FIFO meaning the consumer must reside on the same XCORE tile as the ASRC.
13+
14+
.. figure:: images/ASRC_block_threads.png
15+
:scale: 80 %
16+
:alt: ASRC Thread Usage
17+
18+
19+
Both input and output interfaces must specify the nominal sample rate required and additionally the input must specify a channel count. The output channel count will be set to the same as the input channel count automatically once the ASRC has automatically configured itself. A timestamp indicating the time of the production of the last input sample and the consumption of the first output sample must also be supplied which allows the ASRC to calculate the rate and phase difference. Each time either the input or output nominal sample rate or the channel count changes the ASRC subsystem automatically re-configures itself and restarts with the new settings.
20+
21+
The ASRC Task supports the following nominal sample rates for input and output:
22+
23+
- 44.1 kHz
24+
- 48 kHz
25+
- 88.2 kHz
26+
- 96 kHz
27+
- 176.4 kHz
28+
- 192 kHz
29+
30+
Because the required compute for multi-channel systems may exceed the performance limit of a single thread, the ASRC subsystem is able to make use of multiple threads in parallel to achieve the required conversion within the sample time period. It uses a dynamic fork and join architecture to share the ASRC workload across multiple threads each time a batch of samples is processed. The threads must all reside on the same tile as the ASRC task due to them sharing input and output buffers. The workload and buffer partitioning is dynamically computed by the ASRC task at stream startup and is constrained by the user at compile time to set maximum limits of both channel count and worker threads.
31+
32+
The number of threads that are required depends on the required channel count and sample rates required. Higher sample rates require more MIPS. The amount of thread MHz (and consequently how many threads) required can be *roughly* calculated using the following formulae:
33+
34+
- Total thread MHz required for xcore.ai systems = 0.15 * Max channel count * (Max SR input kHz + Max SR output kHz)
35+
- Total thread MHz required for XCORE-200 systems = 0.3 * Max channel count * (Max SR input kHz + Max SR output kHz)
36+
37+
The difference between the performance requirement between the two architectures is due to xcore.ai supporting a Vector Processing Unit (VPU) which allows acceleration of the internal filters used by the ASRC. For example:
38+
39+
- A two channel system supporting up to 192kHz input and output will require about (0.15 * (192 + 192) * 2) ~= 115 thread MHz. This means a single thread (assuming no more than 5 active threads on an xcore.ai device with a 600MHz clock) will likely be capable of handling this stream.
40+
41+
- An eight channel system consisting of either 44.1kHz or 48kHz input with maximum output rate of 192kHz will require about (0.15 * (48 + 192) * 8) ~= 288 thread MHz. This can adequately be provided by four threads (assuming up to 8 active threads on an xcore.ai device with a 600MHz clock).
42+
43+
In reality the amount of thread MHz needed will be lower than the above formulae suggest since subsequent ASRC channels after the first can share some of the calculations. This results in about at 10% performance requirement reduction per additional channel per worker thread. Increasing the input frame size in the ASRC task may also reduce the MHz requirement a few % at the cost of larger buffers and a slight latency increase.
44+
45+
.. warning::
46+
Exceeding the processing time available by specifying a channel count, input/output rates, number of worker threads or device clock speed may result in at best choppy audio or a blocked ASRC task if the overrun is persistent.
47+
48+
It is strongly recommended that you test the system for your desired channel count and input and output sample rates. An optional timing calculation and check is provided in the ASRC to allow characterisation at run-time which can be found in the `asrc_task.c` source code.
49+
50+
The low level ASRC processing function call API accepts a minimum input frame size of four whereas most XMOS audio interfaces provide a single sample period frame. The ASRC subsystem integrates a serial to block back to serial conversion to support this. The input side works by stealing cycles from the ASRC using an interrupt and notifies the main ASRC loop using a single channel end when a complete frame of double buffered is available to process. The ASRC output side is handled by the asynchronous FIFO which supports a block `put` with single sample `get` and thus provides de-serialisation intrinsically.
51+
52+
53+
API & Usage
54+
...........
55+
56+
The ASRC Task consists of a forever loop task to which various data structures must be declared and passed. The following items must be passed in:
57+
58+
- A pointer to instance of the `asrc_in_out_t` structure which contains buffers, stream information and ASRC task state.
59+
- A pointer to the FIFO used at the output side of the ASRC task.
60+
- The length of the FIFO passed in above.
61+
62+
63+
In addition the following two functions may be declared in a user `C` file (note XC does not handle function pointers):
64+
65+
- The callback function from ASRC task which receives samples over a channel from the producer.
66+
- A callback initialisation function which registers the callback function into the `asrc_in_out_t` struct
67+
68+
If these are not defined, then a default receive implementation will be used which is matched with the send_asrc_input_samples_default() function on the user's producer side. This should be sufficient for typical usage.
69+
70+
An example of calling the ASRC task form and ``XC`` main function is provided below. Note use of `unsafe` permitting the compiler to allow shared memory structures to be accessed by more than one thread::
71+
72+
chan c_producer;
73+
74+
// FIFO and ASRC I/O declaration. Unsafe to allow producer and consumer to access it from XC
75+
#define FIFO_LENGTH (SRC_MAX_NUM_SAMPS_OUT * 3) // Half full is target so *2 is nominal size but we need wiggle room at startup
76+
int64_t array[ASYNCHRONOUS_FIFO_INT64_ELEMENTS(FIFO_LENGTH, MAX_ASRC_CHANNELS_TOTAL)];
77+
78+
unsafe{
79+
// IO struct for ASRC must be passed to both asrc_proc and consumer
80+
asrc_in_out_t asrc_io = {{{0}}};
81+
asrc_in_out_t * unsafe asrc_io_ptr = &asrc_io;
82+
asynchronous_fifo_t * unsafe fifo = (asynchronous_fifo_t *)array;
83+
setup_asrc_io_custom_callback(asrc_io_ptr); // Optional user rx function
84+
85+
par
86+
{
87+
producer(c_producer);
88+
asrc_task(c_producer, asrc_io_ptr, fifo, FIFO_LENGTH);
89+
consumer(asrc_io_ptr, fifo);
90+
91+
}
92+
} // unsafe region
93+
94+
95+
An example of the user-defined `C` function for receiving the input samples is shown below along with the user callback registration function. The `receive_asrc_input_samples()` function must be as short as possible because it steals cycles from the ASRC task operation. Because this function is not called until the first channel word is received from the producer, the `chanend_in_word()` operations will happen straight away and not block::
96+
97+
#include "asrc_task.h"
98+
99+
ASRC_TASK_ISR_CALLBACK_ATTR // This is required for proper stack usage calculation by the compiler.
100+
unsigned receive_asrc_input_samples(chanend_t c_producer, asrc_in_out_t *asrc_io, unsigned *new_input_rate){
101+
static unsigned asrc_in_counter = 0;
102+
103+
// Receive stream info from producer
104+
*new_input_rate = chanend_in_word(c_producer);
105+
asrc_io->input_timestamp = chanend_in_word(c_producer);
106+
asrc_io->input_channel_count = chanend_in_word(c_producer);
107+
108+
// Pack into array properly LRLRLRLR or 123412341234 etc.
109+
for(int i = 0; i < asrc_io->input_channel_count; i++){
110+
int idx = i + asrc_io->input_channel_count * asrc_in_counter;
111+
asrc_io->input_samples[asrc_io->input_write_idx][idx] = chanend_in_word(c_producer);
112+
}
113+
114+
// Keep track of frame block to ASRC task
115+
if(++asrc_in_counter == SRC_N_IN_SAMPLES){
116+
asrc_in_counter = 0;
117+
}
118+
119+
return asrc_in_counter;
120+
}
121+
122+
// Register the above function for ASRC task
123+
void setup_asrc_io_custom_callback(asrc_in_out_t *asrc_io){
124+
init_asrc_io_callback(asrc_io, receive_asrc_input_samples);
125+
}
126+
127+
128+
Note that the producing side of the above transaction must match the channel protocol. For this example, the producer must send the following items across the channel in order:
129+
130+
- The nominal input sample rate.
131+
- The input time stamp of the last sample received.
132+
- The input channel count of the current frame.
133+
- The samples from 0..n.
134+
135+
Because a `streaming` channel is used the back-pressure on the producer side will be very low because the channel outputs will be buffered and the receive callback will always respond to the received words.
136+
137+
This callback function helps bridge between `sample based` systems and the block-based nature of the underlying ASRC functions without consuming an extra thread.
138+
139+
The API for ASRC task is shown below:
140+
141+
.. doxygengroup:: src_asrc_task
142+
:content-only:
143+

0 commit comments

Comments
 (0)