Skip to content

Commit 80f2de8

Browse files
authored
dynamic buffer sizes
* fixed crc for CAPCOM loopback * dynamic buffer sizes * cache offset permanent * split buffers, smarter CRC
1 parent d835e4f commit 80f2de8

1 file changed

Lines changed: 118 additions & 93 deletions

File tree

src/dmdreader.cpp

Lines changed: 118 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#include "dmdreader.h"
22

33
#include <array>
4+
#include <cstdint>
5+
#include <cstdlib>
46

57
#include "crc32.h"
68
#include "dmd_counter.h"
@@ -11,29 +13,6 @@
1113
#include "hardware/pio.h"
1214
#include "loopback_renderer.h"
1315
#include "spi_slave_sender.pio.h"
14-
15-
// should CRC32 checksum be caculated and sent with each frame
16-
#define USE_CRC
17-
18-
// supress duplicate frames (implies USE_CRC)
19-
#define SUPRESS_DUPLICATES
20-
21-
/**
22-
* Glossary
23-
*
24-
* Plane
25-
* image with one bit data per pixel. This doesn't NOT mean it is stored with
26-
* 1bit/pixel
27-
*
28-
* Frame
29-
* image with potentially more than one bit per pixel
30-
*
31-
*/
32-
33-
#ifdef SUPRESS_DUPLICATES
34-
#define USE_CRC
35-
#endif
36-
3716
typedef struct buf32_t {
3817
uint8_t byte0;
3918
uint8_t byte1;
@@ -121,26 +100,42 @@ uint16_t source_bytesperframe;
121100
uint16_t source_lineoversampling;
122101
uint16_t source_dwordsperline;
123102
uint16_t source_mergeplanes;
103+
uint16_t offset[MAX_PLANESPERFRAME];
104+
105+
static uint8_t *alloc_aligned_buffer(size_t size, size_t alignment,
106+
void **base_out) {
107+
size_t effective_alignment =
108+
(alignment < alignof(void *)) ? alignof(void *) : alignment;
109+
void *base = malloc(size + effective_alignment - 1);
110+
if (!base) {
111+
return nullptr;
112+
}
113+
uintptr_t raw = reinterpret_cast<uintptr_t>(base);
114+
uintptr_t aligned = (raw + effective_alignment - 1) &
115+
~(static_cast<uintptr_t>(effective_alignment) - 1);
116+
if (base_out) {
117+
*base_out = base;
118+
}
119+
return reinterpret_cast<uint8_t *>(aligned);
120+
}
124121

125122
// the buffers need to be aligned to 4 byte because we work with uint32_t
126123
// pointers later. raw data read from DMD
127-
uint8_t planebuf1[MAX_WIDTH * MAX_HEIGHT * MAX_BITSPERPIXEL *
128-
MAX_PLANESPERFRAME / 8] __attribute__((aligned(4))) = {0};
129-
uint8_t planebuf2[MAX_WIDTH * MAX_HEIGHT * MAX_BITSPERPIXEL *
130-
MAX_PLANESPERFRAME / 8] __attribute__((aligned(4))) = {0};
131-
uint8_t *currentPlaneBuffer = planebuf2;
124+
uint8_t *planebuf1;
125+
uint8_t *planebuf2;
126+
uint8_t *currentPlaneBuffer;
127+
128+
// tmp buffer for oversampling etc.
129+
uint8_t *processingbuf;
132130

133131
// processed frame (merged planes)
134-
uint8_t framebuf1[MAX_WIDTH * MAX_HEIGHT * MAX_BITSPERPIXEL / 8 *
135-
MAX_OVERSAMPLING] __attribute__((aligned(8)));
136-
uint8_t framebuf2[MAX_WIDTH * MAX_HEIGHT * MAX_BITSPERPIXEL / 8 *
137-
MAX_OVERSAMPLING] __attribute__((aligned(8)));
138-
uint8_t framebuf3[MAX_WIDTH * MAX_HEIGHT * MAX_BITSPERPIXEL / 8 *
139-
MAX_OVERSAMPLING] __attribute__((aligned(8)));
140-
uint8_t *current_framebuf = framebuf1;
141-
uint8_t *framebuf_to_send = framebuf2;
142-
143-
uint32_t frame_crc;
132+
uint8_t *framebuf1;
133+
uint8_t *framebuf2;
134+
uint8_t *framebuf3;
135+
uint8_t *current_framebuf;
136+
uint8_t *framebuf_to_send;
137+
138+
uint32_t frame_crc = 0;
144139
uint32_t crc_previous_frame = 0;
145140
bool detected_0_1_0_1 = false;
146141
bool detected_1_0_0_0 = false;
@@ -266,22 +261,15 @@ void finish_spi() { digitalWrite(SPI0_CS, LOW); }
266261
* @param pixbuf a frame to send
267262
*/
268263
bool spi_send_pix(uint8_t *pixbuf, uint32_t crc32, bool skip_when_busy) {
269-
#ifdef USE_CRC
270264
block_header_t h = {.block_type = SPI_BLOCK_PIX_CRC};
271265
block_pix_crc_header_t ph = {};
272-
#else
273-
block_header_t h = {.block_type = SPI_BLOCK_PIX};
274-
block_pix_header_t ph = {};
275-
#endif
276266

277267
// round length to 4-byte blocks
278268
h.len = (((target_bytes + 3) / 4) * 4) + sizeof(h) + sizeof(ph);
279269
ph.columns = source_width;
280270
ph.rows = source_height;
281271
ph.bitsperpixel = target_bitsperpixel;
282-
#ifdef USE_CRC
283272
ph.crc32 = crc32;
284-
#endif
285273

286274
if (skip_when_busy) {
287275
if (spi_busy()) return false;
@@ -313,17 +301,17 @@ void spi_dma_handler() {
313301
* @return uint32_t Number of clocks per second
314302
*/
315303
uint32_t count_clock(uint pin) {
316-
uint offset;
304+
uint pio_offset;
317305
pio_claim_free_sm_and_add_program_for_gpio_range(
318-
&dmd_count_signal_program, &dmd_pio, &dmd_sm, &offset, pin, 1, true);
319-
dmd_counter_program_init(dmd_pio, dmd_sm, offset, pin);
306+
&dmd_count_signal_program, &dmd_pio, &dmd_sm, &pio_offset, pin, 1, true);
307+
dmd_counter_program_init(dmd_pio, dmd_sm, pio_offset, pin);
320308
pio_sm_set_enabled(dmd_pio, dmd_sm, true);
321309
delay(500);
322310
pio_sm_exec(dmd_pio, dmd_sm, pio_encode_in(pio_x, 32));
323311
uint32_t count = ~pio_sm_get(dmd_pio, dmd_sm);
324312
pio_sm_set_enabled(dmd_pio, dmd_sm, false);
325313
pio_remove_program_and_unclaim_sm(&dmd_count_signal_program, dmd_pio, dmd_sm,
326-
offset);
314+
pio_offset);
327315

328316
return count * 2;
329317
}
@@ -508,22 +496,17 @@ void dmd_dma_handler() {
508496
uint32_t *planebuf = (uint32_t *)currentPlaneBuffer;
509497
buf32_t *v;
510498
uint32_t res;
499+
// source_dwordsperframe is not the entire frame buffer if plane history is
500+
// used. So only the new plane data is fixed here.
511501
for (int i = 0; i < source_dwordsperframe; i++) {
512502
v = (buf32_t *)planebuf;
513503
res = (v->byte3 << 24) | (v->byte2 << 16) | (v->byte1 << 8) | (v->byte0);
514504
*planebuf = res;
515505
planebuf++;
516506
}
517507

518-
// Merge multiple planes to get the frame data.
519-
// Calculate offsets for the first pixel of each plane and cache these.
520-
uint16_t offset[MAX_PLANESPERFRAME];
521-
for (int i = 0; i < MAX_PLANESPERFRAME; i++) {
522-
offset[i] = i * source_dwordsperplane;
523-
}
524-
525508
// Get a 32bit pointer to the frame buffer to handle more pixels at once.
526-
uint32_t *framebuf = (uint32_t *)current_framebuf;
509+
uint32_t *framebuf = (uint32_t *)processingbuf;
527510

528511
bool source_shiftplanesatmerge = (source_mergeplanes == MERGEPLANES_ADDSHIFT);
529512

@@ -606,6 +589,8 @@ void dmd_dma_handler() {
606589
// Write second 8 pixel in lower 16 Bit.
607590
framebuf[out] |= v16;
608591
}
592+
} else if (2 == source_bitsperpixel && 4 == target_bitsperpixel) {
593+
// There's no syetem using this conversion yet, but let's have it ready
609594
}
610595
}
611596

@@ -668,13 +653,17 @@ void dmd_dma_handler() {
668653
}
669654
}
670655

671-
#ifdef USE_CRC
672-
frame_crc = crc32(0, current_framebuf, target_bytes);
673-
#endif
656+
memcpy(current_framebuf, processingbuf, loopback ? source_bytes : target_bytes);
657+
658+
frame_crc =
659+
crc32(0, current_framebuf, loopback ? source_bytes : target_bytes);
674660

675661
switch_buffers();
676662

677-
frame_received = true;
663+
if (frame_crc != crc_previous_frame) {
664+
crc_previous_frame = frame_crc;
665+
frame_received = true;
666+
}
678667
}
679668

680669
void dmdreader_error_blink(bool no_error) {
@@ -963,6 +952,49 @@ bool dmdreader_init(bool return_on_no_detection) {
963952
source_bytesperframe = source_bytesperplane * source_planesperframe;
964953
source_dwordsperline = source_width * source_bitsperpixel / 32;
965954

955+
if (!planebuf1) {
956+
size_t plane_bytes = source_bytesperplane * source_planesperframe;
957+
size_t dma_bytes = source_dwordsperframe * sizeof(uint32_t);
958+
if (dma_bytes > plane_bytes) {
959+
plane_bytes = dma_bytes;
960+
}
961+
962+
size_t processing_bytes = source_bytes * source_lineoversampling;
963+
964+
planebuf1 = alloc_aligned_buffer(plane_bytes, 4, nullptr);
965+
planebuf2 = alloc_aligned_buffer(plane_bytes, 4, nullptr);
966+
processingbuf = alloc_aligned_buffer(processing_bytes, 8, nullptr);
967+
framebuf1 = alloc_aligned_buffer(source_bytes, 8, nullptr);
968+
framebuf2 = alloc_aligned_buffer(source_bytes, 8, nullptr);
969+
size_t framebuf3_bytes = target_bytes;
970+
size_t loopback_render_bytes =
971+
source_width * source_height * 4 / 8; // 4bpp render buffer
972+
if (loopback_render_bytes > framebuf3_bytes) {
973+
framebuf3_bytes = loopback_render_bytes;
974+
}
975+
framebuf3 = alloc_aligned_buffer(framebuf3_bytes, 8, nullptr);
976+
977+
dmdreader_error_blink(planebuf1 && planebuf2 && processingbuf &&
978+
framebuf1 && framebuf2 && framebuf3);
979+
980+
memset(planebuf1, 0, plane_bytes);
981+
memset(planebuf2, 0, plane_bytes);
982+
memset(processingbuf, 0, processing_bytes);
983+
memset(framebuf1, 0, source_bytes);
984+
memset(framebuf2, 0, source_bytes);
985+
memset(framebuf3, 0, framebuf3_bytes);
986+
}
987+
988+
currentPlaneBuffer = planebuf2;
989+
current_framebuf = framebuf1;
990+
framebuf_to_send = framebuf2;
991+
992+
// Merge multiple planes to get the frame data.
993+
// Calculate offsets for the first pixel of each plane and cache these.
994+
for (int i = 0; i < MAX_PLANESPERFRAME; i++) {
995+
offset[i] = i * source_dwordsperplane;
996+
}
997+
966998
// DMA for DMD reader
967999
dmd_dma_channel = dma_claim_unused_channel(true);
9681000
dmd_dma_channel_cfg = dma_channel_get_default_config(dmd_dma_channel);
@@ -1005,10 +1037,11 @@ void dmdreader_spi_init() {
10051037
digitalWrite(SPI0_CS, LOW);
10061038

10071039
// initialize SPI slave PIO
1008-
uint offset;
1040+
uint pio_offset;
10091041
dmdreader_error_blink(pio_claim_free_sm_and_add_program_for_gpio_range(
1010-
&clocked_output_program, &spi_pio, &spi_sm, &offset, SPI_BASE, 4, true));
1011-
clocked_output_program_init(spi_pio, spi_sm, offset, SPI_BASE);
1042+
&clocked_output_program, &spi_pio, &spi_sm, &pio_offset, SPI_BASE, 4,
1043+
true));
1044+
clocked_output_program_init(spi_pio, spi_sm, pio_offset, SPI_BASE);
10121045

10131046
// DMA for SPI
10141047
spi_dma_channel = dma_claim_unused_channel(true);
@@ -1033,14 +1066,7 @@ void dmdreader_spi_init() {
10331066
bool dmdreader_spi_send() {
10341067
if (!loopback && frame_received) {
10351068
frame_received = false;
1036-
#ifdef SUPRESS_DUPLICATES
1037-
if (frame_crc != crc_previous_frame) {
1038-
spi_send_pix(framebuf_to_send, frame_crc, true);
1039-
crc_previous_frame = frame_crc;
1040-
}
1041-
#else
10421069
spi_send_pix(framebuf_to_send, frame_crc, true);
1043-
#endif
10441070

10451071
return true;
10461072
}
@@ -1056,38 +1082,37 @@ void dmdreader_loopback_init(uint8_t *buffer1, uint8_t *buffer2, Color color) {
10561082
loopback = true;
10571083
}
10581084

1059-
void dmdreader_loopback_stop() { loopback = false; }
1085+
void dmdreader_loopback_stop() {
1086+
free(framebuf3);
1087+
loopback = false;
1088+
}
10601089

10611090
uint8_t *dmdreader_loopback_render() {
10621091
uint64_t *frame4bit = (uint64_t *)framebuf3;
10631092

10641093
if (loopback && frame_received) {
10651094
frame_received = false;
1066-
if (frame_crc != crc_previous_frame) {
1067-
if (current_renderbuf == renderbuf1) {
1068-
current_renderbuf = renderbuf2;
1069-
} else {
1070-
current_framebuf = renderbuf1;
1071-
}
1095+
if (current_renderbuf == renderbuf1) {
1096+
current_renderbuf = renderbuf2;
1097+
} else {
1098+
current_renderbuf = renderbuf1;
1099+
}
10721100

1073-
auto func =
1074-
get_optimized_converter(source_width, source_height, monochromeColor);
1075-
if (func) {
1076-
if (2 == source_bitsperpixel) {
1077-
for (uint16_t i = 0; i < source_dwords; i++) {
1078-
frame4bit[i] =
1079-
convert_2bit_to_4bit_fast(((uint32_t *)framebuf_to_send)[i]);
1080-
}
1081-
func((uint32_t *)frame4bit, current_renderbuf);
1082-
} else {
1083-
func((uint32_t *)framebuf_to_send, current_renderbuf);
1101+
auto func =
1102+
get_optimized_converter(source_width, source_height, monochromeColor);
1103+
if (func) {
1104+
if (2 == source_bitsperpixel) {
1105+
for (uint16_t i = 0; i < source_dwords; i++) {
1106+
frame4bit[i] =
1107+
convert_2bit_to_4bit_fast(((uint32_t *)framebuf_to_send)[i]);
10841108
}
1109+
func((uint32_t *)frame4bit, current_renderbuf);
1110+
} else {
1111+
func((uint32_t *)framebuf_to_send, current_renderbuf);
10851112
}
1086-
1087-
crc_previous_frame = frame_crc;
1088-
1089-
return current_renderbuf;
10901113
}
1114+
1115+
return current_renderbuf;
10911116
}
10921117

10931118
return nullptr;

0 commit comments

Comments
 (0)