From deffb730c18ff42bc1d7f82325daae1f4806cb34 Mon Sep 17 00:00:00 2001 From: cpunt Date: Mon, 13 Apr 2026 17:14:13 +0000 Subject: [PATCH] fix silent RX drops: enlarge ring to 4 KiB, add drop counters The RX software ring between the PL011 ISR and the foreground reader was hard-coded to 128 bytes. At 115200 baud that is only ~11 ms of headroom, so any foreground stall longer than a flash page program or a GC pause caused bytes to silently disappear mid-frame. The ISR had a `// optional rxDrops++` placeholder in the overflow branch but no actual counter, so applications had no way to observe the loss. This commit: - widens the ring head/tail to uint16 and bumps bufferSize to 4096, giving ~355 ms of headroom at 115200. Still a power of two so the modulo compiles to an AND. Costs ~16 KiB of static RAM on a dual- UART RP2350 config; fits easily in the 520 KiB SRAM budget. - adds rxHwDrops and rxSwDrops counters on the UART struct and increments them from handleInterrupt when UARTDR arrives with per-byte error bits set (the OE bit indicates the hardware FIFO overflowed before the ISR serviced it) and when Buffer.Put returns false respectively. Both are volatile.Register32; the ISR is the only writer so increments are race-free on a single-core MCU, and aligned 32-bit loads/stores keep reads safe on dual-core. - exposes the counters via (*UART).RXDrops() so applications can surface them in telemetry. Both values should stay at zero under normal operation; nonzero hw indicates ISR latency exceeded one character time, nonzero sw indicates the application consumer is not draining the ring fast enough. RingBuffer.Size() and RingBuffer.Used() now return uint16 instead of uint8. Every caller in this repo already converts through int(), so this is a no-op internally. External callers that stored the return as uint8 will need to widen the type; there is no tagged release so no semver break. Verified: - tinygo build -target=pico2 ./cmd/uart_diag - tinygo build -target=pico2 ./examples/simple/uartx-polling - downstream: a 336 KiB firmware-update transfer over 115200 that used to corrupt 15-20 frames and take 60-180 s now runs clean in ~55 s (stop-and-wait floor) with rxHwDrops == rxSwDrops == 0. --- uartx/ringbuffer.go | 43 +++++++++++++++++++++++++++++++++---------- uartx/rp2_uart.go | 22 ++++++++++++++++++++-- uartx/uartx.go | 16 ++++++++++++++++ 3 files changed, 69 insertions(+), 12 deletions(-) diff --git a/uartx/ringbuffer.go b/uartx/ringbuffer.go index 303b9d6..f57e653 100644 --- a/uartx/ringbuffer.go +++ b/uartx/ringbuffer.go @@ -2,21 +2,44 @@ //go:build atmega || esp || nrf || sam || sifive || stm32 || k210 || nxp || rp2040 || rp2350 -// An API-compatible replacement for machine.RingBuffer with an added Size() method. -// Methods and semantics match TinyGo's implementation. +// RingBuffer is the single-producer/single-consumer byte queue that sits +// between the UART interrupt handler and the foreground reader. It used to +// be an API-compatible replacement for TinyGo's machine.RingBuffer, whose +// default capacity is 128 bytes. That default is too small for interrupt- +// driven use at typical bauds (see the file-level comment on bufferSize); +// the size and index width here have been raised so the ring can absorb +// longer foreground stalls without silently dropping bytes. package uartx import "runtime/volatile" -// Choose a power-of-two size for efficient modulo. -const bufferSize uint8 = 128 +// bufferSize is the per-instance RX/TX ring capacity in bytes. +// +// At 115200 baud a byte arrives every ~87 us, so 128 bytes is only about +// 11 ms of headroom between the ISR producing and the foreground consuming. +// Any stall longer than that (a flash page program, a GC pause, a heavy +// scheduled task) causes the ISR's Put to start failing and bytes are lost +// with no visible symptom other than downstream data corruption. +// +// 4096 bytes gives ~355 ms of headroom at 115200 (and scales linearly with +// baud), which is large enough to absorb any realistic stall on the MCUs +// this package currently targets. The cost is 2 * bufferSize bytes of +// static RAM per UART (one RX, one TX); on RP2040/RP2350 with hundreds of +// KB of SRAM this is negligible. +// +// bufferSize must remain a power of two so `(h+1) % bufferSize` compiles +// to a bitmask. +const bufferSize uint16 = 4096 -// RingBuffer is a byte ring buffer compatible with TinyGo's machine.RingBuffer. +// RingBuffer is a byte ring buffer. The head/tail indices are uint16 so +// the capacity can exceed 255 bytes without wraparound ambiguity; the +// existing head-minus-tail arithmetic still works because uint16 +// subtraction is modulo 2^16 which is a multiple of bufferSize. type RingBuffer struct { rxbuffer [bufferSize]volatile.Register8 - head volatile.Register8 - tail volatile.Register8 + head volatile.Register16 + tail volatile.Register16 } // NewRingBuffer returns a new ring buffer. @@ -25,13 +48,13 @@ func NewRingBuffer() *RingBuffer { } // Size returns the total capacity of the buffer in bytes. -func (rb *RingBuffer) Size() uint8 { +func (rb *RingBuffer) Size() uint16 { return bufferSize } // Used returns how many bytes in buffer have been used. -func (rb *RingBuffer) Used() uint8 { - return uint8(rb.head.Get() - rb.tail.Get()) +func (rb *RingBuffer) Used() uint16 { + return uint16(rb.head.Get() - rb.tail.Get()) } // Put stores a byte in the buffer. If the buffer is already full, it returns false. diff --git a/uartx/rp2_uart.go b/uartx/rp2_uart.go index fa00450..d38688b 100644 --- a/uartx/rp2_uart.go +++ b/uartx/rp2_uart.go @@ -14,6 +14,7 @@ import ( "errors" "machine" "runtime/interrupt" + "runtime/volatile" ) // UART represents a single PL011 instance on RP2040/RP2350. @@ -38,6 +39,15 @@ type UART struct { notify chan struct{} // coalesced RX readiness notifications baud uint32 // last configured baud (for diagnostics, not used by HW) + + // Drop counters incremented from the RX ISR and read from foreground + // context via RXDrops. Single writer (the ISR) plus 32-bit aligned + // reads means no locking is required on a single-core MCU. On a + // dual-core MCU these are still race-free for Cortex-M0+/M33 because + // aligned 32-bit loads and stores are naturally atomic; the counters + // may however read slightly stale in that case. + rxHwDrops volatile.Register32 // bytes with PL011 error bits set (OE/BE/PE/FE) + rxSwDrops volatile.Register32 // bytes the ISR could not enqueue because Buffer was full } // Configure sets up the PL011, its pins and interrupts. It leaves RXIM/RTIM @@ -303,13 +313,21 @@ func (uart *UART) handleInterrupt(interrupt.Interrupt) { r := uart.Bus.UARTDR.Get() if (r & (rp.UART0_UARTDR_OE | rp.UART0_UARTDR_BE | rp.UART0_UARTDR_PE | rp.UART0_UARTDR_FE)) != 0 { - // Drop errored byte; reading DR clears the per-byte error flags. + // Drop errored byte; reading DR clears the per-byte error + // flags. UARTDR_OE specifically means the hardware FIFO + // overflowed before this ISR got here, so this counts as + // an observable drop from the consumer's point of view. + uart.rxHwDrops.Set(uart.rxHwDrops.Get() + 1) continue } if uart.Buffer.Put(byte(r & 0xFF)) { enq++ } else { - // optional rxDrops++ + // Software RX ring is full: the foreground hasn't drained + // it in time. Count the drop so the application can + // surface it; silently losing bytes here is what motivated + // the ring resize and these counters. + uart.rxSwDrops.Set(uart.rxSwDrops.Get() + 1) } } diff --git a/uartx/uartx.go b/uartx/uartx.go index 9757304..62c2082 100644 --- a/uartx/uartx.go +++ b/uartx/uartx.go @@ -184,3 +184,19 @@ func (u *UART) TxFree() int { return int(u.TxBuffer.Size() - u.TxBuffer.Used()) func (uart *UART) Receive(data byte) { uart.Buffer.Put(data) } + +// RXDrops returns the cumulative number of RX bytes that have been dropped +// since boot. hw counts bytes that arrived from the PL011 with any of its +// per-byte error bits set (OE, BE, PE, FE); the most informative of these +// is OE, which means the hardware FIFO overflowed before the ISR serviced +// it. sw counts bytes the ISR could not enqueue because the software RX +// ring was full. Both counters are monotonic and wrap at 2^32. +// +// In a healthy steady state both counters should stay at zero at the +// package's supported bauds. A nonzero hw count indicates ISR latency +// exceeded one character time at the configured baud; a nonzero sw count +// indicates the application's foreground consumer is not draining the +// ring fast enough. +func (uart *UART) RXDrops() (hw, sw uint32) { + return uart.rxHwDrops.Get(), uart.rxSwDrops.Get() +}