sof-test/tools/capture-test.py at f717aa3e373520464a4c3794d61fb4685e5be82e · thesofproject/sof-test · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
#!/usr/bin/env python3
# SPDX-License-Identifier: BSD-3-Clause
# Copyright 2024 Google LLC
# Author: Andy Ross <andyross@google.com>
import os
import re
import sys
import time
import struct
import random
import argparse
import ctypes as C

HELP_TEXT="""
Simple dependency-free ALSA test rig for PCM capture analysis.

Just drop this script on a test device to run it.  No tools to build,
no dependencies to install.  Confirmed to run on Python 3.8+ with
nothing more than the core libraries and a working libasound.so.2
visible to the runtime linker.

When run without arguments, the tool will record from the capture
device for the specified duration, then emit the resulting samples
back out the playback device without processing (except potentially to
convert the sample format from s32_le to s16_le if needed, and to
discard any channels beyond those supported by the playback device).

Passing --chirp-test enables a playback-to-capture latency detector:
the tool will emit a short ~6 kHz wave packet via ALSA's mmap
interface (which allows measuring and correcting for the buffer
latency from the userspace process) and simultaneously loop on short
reads from the capture device looking for the moment it arrives.

Passing --echo-test enables a capture-while-playback test.  The script
will play a specified .wav file ("noise.wav" by default) for the
specified duration, while simultaneously capturing, and report the
"power" (in essentially arbitrary units, but it's linear with actual
signal energy assuming the sample space is itself linear) of the
captured data to stdout at the end of the test.

The tool supports a "--disable-rtnr" for the specific case of RTNR
noise reduction, which has been observed to suppress the chirp test
occasionally.  It does not otherwise change the runtime configuration
of the ALSA device and expects it to be configured by the user for the
specific case under test.
"""

def parse_opts():
    global opts # pylint: disable=global-statement
    ap = argparse.ArgumentParser(description=HELP_TEXT,
                                 formatter_class=argparse.RawDescriptionHelpFormatter)
    ap.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
    ap.add_argument("--disable-rtnr", action="store_true", help="Disable RTNR noise reduction")
    ap.add_argument("-c", "--card", type=int, default=0, help="ALSA card index")
    ap.add_argument("--pcm", type=int, default=16, help="Output ALSA PCM index")
    ap.add_argument("--cap", type=int, default=18, help="Capture ALSA PCM index")
    ap.add_argument("--rate", type=int, default=48000, help="Sample rate")
    ap.add_argument("--chan", type=int, default=2, help="Output channel count")
    ap.add_argument("--bufsz", type=int, default=2048, help="Buffer size in frames")
    ap.add_argument("--capchan", type=int,
                      help="Capture channel count (if different from output)")
    ap.add_argument("--capbits", type=int, default=16, help="Capture sample bits (16 or 32)")
    ap.add_argument("--capmap", type=str, default="01",
                    help="Capture channel map (as string, e.g. '23' to select 3rd/4th elems)")
    ap.add_argument("--noise", default="noise.wav",
                      help="WAV file containing 'noise' for capture")
    ap.add_argument("--duration", type=int, default=3, help="Capture duration (seconds)")
    ap.add_argument("--chirpcyc", type=int, default=120, help="Repetitions of chirp waveform")
    ap.add_argument("--chirp-test", action="store_true",
                      help="Test latency with synthesized audio")
    ap.add_argument("--echo-test", action="store_true", help="Test simultaneous capture/playback")

    opts = ap.parse_args()
    if not opts.capchan:
        opts.capchan = opts.chan
    opts.capmap = [int(x) for x in opts.capmap]
    opts.base_test = not (opts.chirp_test or opts.echo_test)

class ALSA:
    """
    Tiny ctypes stub.  Wraps the alsa API such that errno returns (at
    least ones that look like an errno) become OSErrors and don't need
    to be checked.  Includes a generalized alloc() that wraps all the
    _sizeof() predicates and allocates from the (safe/collected) python
    heap.  Provides a simple spot for putting (manually-derived)
    constants.  The ALSA C API is mostly-structless and quite simple, so
    this tends to work well without a lot of ctypes use except for an
    occasional constructed integer or byref() pointer.
    """
    PCM_STREAM_PLAYBACK = 0
    PCM_STREAM_CAPTURE = 1
    PCM_FORMAT_S16_LE = 2
    PCM_FORMAT_S32_LE = 10
    PCM_ACCESS_MMAP_INTERLEAVED = 0
    PCM_ACCESS_RW_INTERLEAVED = 3
    def __init__(self):
        self.lib = C.cdll.LoadLibrary("libasound.so.2")
    def __getattr__(self, name):
        fn = getattr(self.lib, name)
        if name.endswith("_name"): # These return strings!
            fn.restype = C.c_char_p
            return lambda *args: fn(*args).decode("utf-8")
        return lambda *args: ALSA.err_wrap(fn(*args))
    @staticmethod
    def err_wrap(ret):
        if -200 < ret < 0:
            raise OSError(os.strerror(-ret))
        return ret
    def alloc(self, typ):
        return (C.c_byte * getattr(self.lib, f"snd_{typ}_sizeof")())()
    # pylint: disable=too-few-public-methods
    class pcm_channel_area_t(C.Structure):
        _fields_ = [("addr", C.c_ulong), ("first", C.c_int), ("step", C.c_int)]

def pcm_init_stream(pcm, rate, chans, fmt, access):
    hwp = alsa.alloc("pcm_hw_params")
    alsa.snd_pcm_hw_params_any(pcm, hwp)
    alsa.snd_pcm_hw_params_set_format(pcm, hwp, fmt)
    alsa.snd_pcm_hw_params_set_channels(pcm, hwp, chans)
    alsa.snd_pcm_hw_params_set_rate(pcm, hwp, rate, 0)
    alsa.snd_pcm_hw_params_set_access(pcm, hwp, access)
    alsa.snd_pcm_hw_params_set_buffer_size(pcm, hwp, opts.bufsz)
    if opts.verbose:
        print("Set hw_params:")
        out = C.c_ulong(0)
        alsa.snd_output_buffer_open(C.byref(out))
        alsa.snd_pcm_hw_params_dump(hwp, out)
        buf = C.c_ulong(0)
        alsa.snd_output_buffer_string(out, C.byref(buf))
        print(C.string_at(buf.value).decode("ascii", errors="ignore"))

    alsa.snd_pcm_hw_params(pcm, hwp)

def ctl_disable_rtnr():
    """
    Noise reduction likes to squash our chirp on capture.  Walk the list
    of controls, looking for an RTNR enable control, if one exists, and
    set it to false.  Unbelievably cumbersome API to do this: call
    elem_list once on an empty struct to get the element count, then
    allocate, then call it again.  Then for each element we can check
    the name directly, but need to allocate an "id" struct to query an
    abstract identifier, that we use with a separately-allocated "value"
    (on which we set the dyncmically typed data) to send the command to
    the kernel.
    """
    dev = f"hw:{opts.card}".encode("ascii")
    ctl = C.c_ulong()
    alsa.snd_ctl_open(C.byref(ctl), dev, 0)
    elist = alsa.alloc("ctl_elem_list")
    alsa.snd_ctl_elem_list(ctl, elist)
    nelem = alsa.snd_ctl_elem_list_get_count(elist)
    alsa.snd_ctl_elem_list_alloc_space(elist, nelem)
    alsa.snd_ctl_elem_list(ctl, elist)
    for i in range(nelem):
        name = alsa.snd_ctl_elem_list_get_name(elist, i)
        if re.match(r'RTNR.*\s+rtnr_enable.*', name):
            print(f"Disabling control: {name}")
            eid = alsa.alloc("ctl_elem_id")
            val = alsa.alloc("ctl_elem_value")
            alsa.snd_ctl_elem_list_get_id(elist, i, C.byref(eid))
            alsa.snd_ctl_elem_value_set_id(val, eid)
            alsa.snd_ctl_elem_value_set_boolean(val, 0, False)
            alsa.snd_ctl_elem_write(ctl, val)
    alsa.snd_ctl_close(ctl)

def pcm_play_buf(data):
    data = bytearray(data)
    addr = C.addressof((C.c_byte * 1).from_buffer(data))
    off = 0
    n = int(len(data) / (2 * opts.chan))
    n = min(n, opts.rate * opts.duration)

    pcm = C.c_long(0)
    dev = f"hw:{opts.card},{opts.pcm}".encode("ascii")
    alsa.snd_pcm_open(C.byref(pcm), dev, alsa.PCM_STREAM_PLAYBACK, 0)
    pcm_init_stream(pcm, opts.rate, opts.chan, alsa.PCM_FORMAT_S16_LE,
                    alsa.PCM_ACCESS_RW_INTERLEAVED)
    while n > 0:
        f = alsa.snd_pcm_writei(pcm, C.c_ulong(addr + off), n)
        n -= f
        off += f
    alsa.snd_pcm_drain(pcm)
    alsa.snd_pcm_close(pcm)

def pcm_play_chirp():
    pcm = C.c_long(0)
    dev = f"hw:{opts.card},{opts.pcm}".encode("ascii")
    alsa.snd_pcm_open(C.byref(pcm), dev, alsa.PCM_STREAM_PLAYBACK, 0)
    pcm_init_stream(pcm, opts.rate, opts.chan, alsa.PCM_FORMAT_S16_LE,
                    alsa.PCM_ACCESS_MMAP_INTERLEAVED)

    (chirp, chirp_frames) = gen_chirp_s16le()

    # Reset the stream and queue up as much data as will fit in the
    # ring buffer
    area = alsa.pcm_channel_area_t()
    offset = C.c_ulong()
    frames = C.c_ulong(opts.rate)
    ring_frames = 0
    alsa.snd_pcm_prepare(pcm)
    alsa.snd_pcm_reset(pcm)
    while True:
        alsa.snd_pcm_avail_update(pcm)
        alsa.snd_pcm_mmap_begin(pcm, C.byref(area), C.byref(offset), C.byref(frames))
        committed = alsa.snd_pcm_mmap_commit(pcm, offset, frames)
        ring_frames += committed
        if committed == 0:
            break

    silence = bytes(2 * opts.chan * ring_frames)

    # Start up the stream, spin until there is space in the buffer,
    # write the chirp.  This minimizes client-side overhead like
    # stream startup.  Then immediately take a timestamp and write
    # silence for one full cycle (to be 100% sure the buffer can't
    # wrap and chirp twice).
    alsa.snd_pcm_start(pcm)
    while alsa.snd_pcm_avail(pcm) < chirp_frames:
        pass
    pre_buffered = ring_frames - alsa.snd_pcm_avail(pcm)
    f = alsa.snd_pcm_mmap_writei(pcm, chirp, chirp_frames)
    chirp_sent = time.perf_counter()
    assert f == chirp_frames

    n = 0
    while n < ring_frames:
        n += alsa.snd_pcm_mmap_writei(pcm, silence, ring_frames)
    alsa.snd_pcm_drain(pcm)
    alsa.snd_pcm_close(pcm)

    # Correct chirp_sent for buffered data!
    chirp_sent += pre_buffered / opts.rate
    return chirp_sent

def pcm_do_capture(duration):
    """
    Returns an array of tuples of (timestamp, bytes), no processing done
    here for performance reasons, just one heap allocation and copy.
    """
    pcm = C.c_long(0)
    fmt = alsa.PCM_FORMAT_S32_LE if opts.capbits == 32 else alsa.PCM_FORMAT_S16_LE
    capsz = 4 if opts.capbits == 32 else 2
    dev = f"hw:{opts.card},{opts.cap}".encode("ascii")
    alsa.snd_pcm_open(C.byref(pcm), dev, alsa.PCM_STREAM_CAPTURE, 0)
    pcm_init_stream(pcm, opts.rate, opts.capchan, fmt, alsa.PCM_ACCESS_RW_INTERLEAVED)
    frames_remaining = duration * opts.rate
    buf_frames = int(opts.rate / 1000) # 1ms blocks
    fsz = opts.capchan * capsz
    buf = bytearray(fsz * buf_frames)
    addr = C.c_ulong(C.addressof((C.c_byte * 1).from_buffer(buf)))
    buflist = []
    buf_frames = C.c_ulong(buf_frames)
    while frames_remaining > 0:
        f = alsa.snd_pcm_readi(pcm, addr, buf_frames)
        t = time.perf_counter()
        frames_remaining -= f
        buflist.append((t, bytes(buf[0:f * fsz])))
    return buflist

def gen_chirp_s16le():
    """
    A programmatically-detectable chirp/pop signal for testing latency.
    To minimize latency, we want the chirp to be low duration, high
    energy and high frequency.  This repeats an 8-sample square wave (6
    kHz at 48k sample rate).  Some devices can reproduce this well with
    as few as 8 repetitions (1.3ms), but on at least one mt8195 device
    it's unreliably audible unless repeated 128 times!  It's not caused
    by software in the DSP, more like a codec/amp feature (possibly
    related to power management, if we don't play other audio
    immediately before, it's even less reliable).
    """
    reps = 4
    chirp = b''
    for _ in range(opts.chirpcyc):
        n = opts.chan * reps
        vals = [-0x8000] * n + [0x7fff] * n
        chirp += struct.pack(f"{2*n}h", *vals)
    return (chirp, opts.chirpcyc * reps)

def cap_to_playback(buf):
    """
    Converts a byte array containing capture frames (which can have
    different sample format and channel count) to the playback format
    (always s16_le).  Also computes an "energy" value as the sum of
    absolute sample differences (in units of +/-1.0) over all result
    channels.  Returns both as a tuple.
    """
    capfmt = ('i' if opts.capbits == 32 else 'h') * opts.capchan
    capsz = opts.capchan * (4 if opts.capbits == 32 else 2)
    scale = 1 / (1 << (opts.capbits - 1))
    last_frame = []
    delta_sum = 0
    out_frames = []

    # NOTE: should consider low-passing the energy computation by
    # averaging ~N recent samples.  Otherwise high frequency noise can
    # dominate, which we don't really care about measuring (AEC can't
    # treat it, and it can plausibly create false positive chirp signals
    # loud enough).
    for i in range(0, len(buf), capsz):
        frame = struct.unpack(capfmt, buf[i:i+capsz]) # Decode
        frame = [frame[n] for n in opts.capmap]       # Select via channel map
        frame = [scale * x for x in frame]            # Convert to float
        if last_frame:
            delta_sum += sum(abs(last_frame[x] - frame[x]) for x in range(opts.chan))
        last_frame = frame
        iframe = [int(min(0x7fff, max(-0x8000, (1 << 15) * e))) for e in frame]
        out_frames.append(struct.pack(f'{opts.chan}h', *iframe))
    return (b''.join(out_frames), delta_sum)

def chirp_child(wpipe):
    for rec in pcm_do_capture(opts.duration):
        t = rec[0]
        (buf, energy) = cap_to_playback(rec[1])
        frames = len(buf) / (2 * opts.chan)

        # Normalize energy as "half-swing per sample" and check vs. a
        # threshold that will trigger if we get a 0.1 unit swing over
        # the 8-sample chirp waveform.
        #
        # NOTE: would be possible to do this analysis at the
        # individual sample layer for better time fidelity instead of
        # in 1ms chunks.
        energy = energy / (frames * opts.chan)
        if energy > (0.1/8):
            os.write(wpipe, f"{t}".encode("ascii"))
            return

def echo_child(wpipe):
    energy = 0
    for rec in pcm_do_capture(opts.duration):
        energy += cap_to_playback(rec[1])[1]

    # Normalize energy to "half-swing per second" here, just to make
    # essentially arbitrary numbers prettier (e.g. a typical pop music
    # track results in ~few-hundred values for "energy")
    energy /= (opts.duration * opts.chan)
    os.write(wpipe, f"{energy:.3f}".encode("ascii"))

def chirp_test():
    """
    Forks a child process to listen for the chirp and write back a
    time.perf_counter() value (which is an invariant clock across
    processes) through a pipe.
    """
    (rfd, wfd) = os.pipe()
    pid = os.fork()
    if pid == 0:
        chirp_child(wfd)
        sys.exit(0)

    # Randomly sleep for a bit to make aliasing bugs (e.g. noise being
    # detected as a chirp) visible as unreliable output.
    time.sleep(random.randint(1000, 2000)/1000)
    chirp_sent = pcm_play_chirp()

    os.waitpid(pid, 0)
    msg = os.read(rfd, 9999).decode("ascii")
    chirp_detected = float(msg)

    lat_ms = (chirp_detected - chirp_sent) * 1000
    print(f"Chirp latency: {lat_ms:.1f} ms")

def echo_test():
    """
    Similar to chirp test, but plays a .wav file while the child
    captures, and reports average capture energy (useful for testing mic
    gain and echo cancellation performance)
    """
    # Just slurps in the wav file and chops off the header, assuming
    # the user got the format and sampling rate correct.
    WAV_HDR_LEN = 44
    with open(opts.noise, "rb") as f:
        buf = f.read()[WAV_HDR_LEN:]

    (rfd, wfd) = os.pipe()
    pid = os.fork()
    if pid == 0:
        echo_child(wfd)
        sys.exit(0)

    pcm_play_buf(buf)

    os.waitpid(pid, 0)
    msg = os.read(rfd, 9999).decode("ascii")
    print("Capture energy:", msg)

def base_test():
    """
    Simplest test: Just capture opts.duration seconds worth of data,
    convert to playback format, and play it.
    """
    bufs = []
    energy = 0
    for rec in pcm_do_capture(opts.duration):
        crec = cap_to_playback(rec[1])
        bufs.append(crec[0])
        energy += crec[1]
    pcm_play_buf(b''.join(bufs))
    print(f"Energy {energy}")

def main():
    parse_opts()
    if opts.disable_rtnr:
        ctl_disable_rtnr()
    if opts.base_test:
        base_test()
    if opts.chirp_test:
        chirp_test()
    if opts.echo_test:
        echo_test()

opts = None
alsa = ALSA()
if __name__ == "__main__":
    main()