From 0dea98b59be0fcbade89317ec296910b1cdd20c5 Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Sat, 28 Mar 2026 07:18:18 -0700 Subject: [PATCH 1/6] examples: libmetal: amd_rpu: channel fields are used when possible Use members of channel struct when able as opposed to locals Signed-off-by: Ben Levinsky --- .../machine/remote/amd_rpu/platform_init.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/examples/libmetal/machine/remote/amd_rpu/platform_init.c b/examples/libmetal/machine/remote/amd_rpu/platform_init.c index fd26b226..ac676810 100644 --- a/examples/libmetal/machine/remote/amd_rpu/platform_init.c +++ b/examples/libmetal/machine/remote/amd_rpu/platform_init.c @@ -388,7 +388,6 @@ static inline int ipi_irq_handler(int vect_id, void *priv) int platform_init(struct channel_s *ch) { struct metal_init_params metal_param = XLNX_PLATFORM_METAL_INIT_PARAMS; - struct metal_io_region *io = NULL; int ret; enable_caches(); @@ -430,23 +429,24 @@ int platform_init(struct channel_s *ch) } /* wipe pending interrupts */ - io = metal_device_io_region(ipi_dev, 0); - if (!io) { + ch->ipi_io = metal_device_io_region(ipi_dev, 0); + if (!ch->ipi_io) { metal_err("REMOTE: Failed to map io region for %s.\n", ipi_dev->name); } else { /* disable IPI interrupt */ - metal_io_write32(io, XIPIPSU_IDR_OFFSET, IPI_MASK); + metal_io_write32(ch->ipi_io, XIPIPSU_IDR_OFFSET, IPI_MASK); /* clear old IPI interrupt */ - metal_io_write32(io, XIPIPSU_ISR_OFFSET, IPI_MASK); + metal_io_write32(ch->ipi_io, XIPIPSU_ISR_OFFSET, IPI_MASK); } - ch->ipi_io = io; ch->ipi_mask = IPI_MASK; + if (!ch->ipi_io) + return -ENODEV; /* disable IPI interrupt */ - metal_io_write32(io, XIPIPSU_IDR_OFFSET, IPI_MASK); + metal_io_write32(ch->ipi_io, XIPIPSU_IDR_OFFSET, IPI_MASK); /* clear old IPI interrupt */ - metal_io_write32(io, XIPIPSU_ISR_OFFSET, IPI_MASK); + metal_io_write32(ch->ipi_io, XIPIPSU_ISR_OFFSET, IPI_MASK); /* Get the IPI IRQ from the opened IPI device */ ch->irq_vector_id = (intptr_t)ipi_dev->irq_info; /* Register IPI irq handler */ @@ -485,6 +485,8 @@ int platform_init(struct channel_s *ch) void platform_cleanup(struct channel_s *ch) { + metal_io_write32(ch->ipi_io, XIPIPSU_IDR_OFFSET, ch->ipi_mask); + metal_irq_disable(ch->irq_vector_id); metal_irq_unregister(ch->irq_vector_id); memset(&ch, 0, sizeof(ch)); From dc2f5cadc0a640ebde607cc2da1341ac46f12f82 Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Sat, 28 Mar 2026 07:18:19 -0700 Subject: [PATCH 2/6] examples: libmetal: amd_rpu: order libraries alphabetically Cosmetic change for remote side - have libraries ordered alphabetically Signed-off-by: Ben Levinsky --- .../irq_shmem_demo/remote/irq_shmem_demo.c | 3 +-- .../machine/remote/amd_rpu/platform_init.c | 22 +++++++++---------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/examples/libmetal/demos/irq_shmem_demo/remote/irq_shmem_demo.c b/examples/libmetal/demos/irq_shmem_demo/remote/irq_shmem_demo.c index 12aeea57..e6459173 100644 --- a/examples/libmetal/demos/irq_shmem_demo/remote/irq_shmem_demo.c +++ b/examples/libmetal/demos/irq_shmem_demo/remote/irq_shmem_demo.c @@ -12,11 +12,10 @@ * Shared-memory partitioning details are documented in machine/remote/amd_rpu/ * README.md. */ +#include "common.h" #include -#include "common.h" - #define BUF_SIZE_MAX 512 #define SHUTDOWN "shutdown" diff --git a/examples/libmetal/machine/remote/amd_rpu/platform_init.c b/examples/libmetal/machine/remote/amd_rpu/platform_init.c index ac676810..4179bbea 100644 --- a/examples/libmetal/machine/remote/amd_rpu/platform_init.c +++ b/examples/libmetal/machine/remote/amd_rpu/platform_init.c @@ -4,21 +4,21 @@ * SPDX-License-Identifier: BSD-3-Clause */ -#include -#include -#include -#include -#include -#include -#include -#include +#include "common.h" -#include #include -#include #include +#include +#include -#include "common.h" +#include +#include +#include +#include +#include +#include +#include +#include #ifdef STDOUT_IS_16550 #include From 767fe1a58dc185bf02bfbdf1a767e251bcba6be1 Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Sat, 28 Mar 2026 07:18:20 -0700 Subject: [PATCH 3/6] examples: libmetal: remote: accounts for non-contiguous area As the host application now uses 3 shared mem areas that can be non-contiguous ensure that remote side accounts for this too. Signed-off-by: Ben Levinsky --- .../irq_shmem_demo/remote/irq_shmem_demo.c | 64 +++------ .../machine/remote/amd_rpu/platform_init.c | 136 ++++++++++++++---- .../amd_rpu/system/freertos/amp_demo_os.h | 2 + .../amd_rpu/system/generic/amp_demo_os.h | 2 + 4 files changed, 135 insertions(+), 69 deletions(-) diff --git a/examples/libmetal/demos/irq_shmem_demo/remote/irq_shmem_demo.c b/examples/libmetal/demos/irq_shmem_demo/remote/irq_shmem_demo.c index e6459173..437910a1 100644 --- a/examples/libmetal/demos/irq_shmem_demo/remote/irq_shmem_demo.c +++ b/examples/libmetal/demos/irq_shmem_demo/remote/irq_shmem_demo.c @@ -19,41 +19,12 @@ #define BUF_SIZE_MAX 512 #define SHUTDOWN "shutdown" -/* Shared memory offsets */ -#define SHM0_DESC_OFFSET 0x0 -#define SHM1_DESC_OFFSET SHM0_DESC_SIZE -#define SHM_PAYLOAD_OFFSET (SHM0_DESC_SIZE + SHM1_DESC_SIZE) - /* Shared memory descriptors offset */ #define SHM_DESC_AVAIL_OFFSET 0x00 #define SHM_DESC_USED_OFFSET 0x04 #define SHM_DESC_ADDR_ARRAY_OFFSET 0x08 -/* Descriptor 0 (Host to Remote) resides at SHM0_DESC_OFFSET. - * Descriptor 1 (Remote to Host) resides at SHM1_DESC_OFFSET. - * The payload carveout begins at SHM_PAYLOAD_OFFSET and is split evenly - * between RX (lower half) and TX (upper half). - * - * Note that H_TO_R_ is host to remote and R_TO_H_ is vice versa. - */ -#define SHM_DESC_OFFSET_H_TO_R SHM0_DESC_OFFSET -#define SHM_DESC_OFFSET_R_TO_H SHM1_DESC_OFFSET -#define SHM_PAYLOAD_H_TO_R SHM_PAYLOAD_RX_OFFSET -#define SHM_PAYLOAD_R_TO_H SHM_PAYLOAD_TX_OFFSET - -#define H_TO_R_DESC_ADDR_START \ - (SHM_DESC_OFFSET_H_TO_R + SHM_DESC_ADDR_ARRAY_OFFSET) -#define H_TO_R_DESC_ADDR_END \ - (SHM_DESC_OFFSET_H_TO_R + SHM0_DESC_SIZE) -#define R_TO_H_DESC_ADDR_START \ - (SHM_DESC_OFFSET_R_TO_H + SHM_DESC_ADDR_ARRAY_OFFSET) -#define R_TO_H_DESC_ADDR_END \ - (SHM_DESC_OFFSET_R_TO_H + SHM1_DESC_SIZE) - -#define H_TO_R_PAYLOAD_START SHM_PAYLOAD_H_TO_R -#define H_TO_R_PAYLOAD_END (SHM_PAYLOAD_H_TO_R + SHM_PAYLOAD_HALF_SIZE) -#define R_TO_H_PAYLOAD_START SHM_PAYLOAD_R_TO_H -#define R_TO_H_PAYLOAD_END (SHM_PAYLOAD_R_TO_H + SHM_PAYLOAD_HALF_SIZE) +#define SHM_DESC_ADDR_START SHM_DESC_ADDR_ARRAY_OFFSET #define PKGS_TOTAL 1024 /** @@ -69,8 +40,7 @@ */ int demo(void *arg) { - unsigned long tx_data_offset, rx_data_offset, rx_used_offset; - unsigned long tx_avail_offset, rx_avail_offset; + unsigned long tx_data_offset, rx_data_offset; unsigned long tx_addr_offset, rx_addr_offset; struct channel_s ch_s = {0x0}; struct channel_s *ch = &ch_s; @@ -107,32 +77,35 @@ int demo(void *arg) } /* Set tx/rx buffer address offset */ - tx_addr_offset = R_TO_H_DESC_ADDR_START; - rx_addr_offset = H_TO_R_DESC_ADDR_START; - tx_data_offset = R_TO_H_PAYLOAD_START; + tx_addr_offset = SHM_DESC_ADDR_START; + rx_addr_offset = SHM_DESC_ADDR_START; + tx_data_offset = SHM_PAYLOAD_TX_OFFSET; metal_info("REMOTE: Wait for echo test to start.\n"); rx_count = 0; while (1) { system_suspend(ch); - rx_avail = metal_io_read32(ch->shm_io, - SHM_DESC_OFFSET_H_TO_R + + rx_avail = metal_io_read32(ch->host_to_remote_desc_io, SHM_DESC_AVAIL_OFFSET); while (rx_count != rx_avail) { uint32_t buf_phy_addr; /* Get the buffer location from the rx addr array. */ - buf_phy_addr = metal_io_read32(ch->shm_io, rx_addr_offset); + buf_phy_addr = metal_io_read32(ch->host_to_remote_desc_io, + rx_addr_offset); rx_data_offset = metal_io_phys_to_offset(ch->shm_io, (metal_phys_addr_t)buf_phy_addr); if (rx_data_offset == METAL_BAD_OFFSET) { metal_err("REMOTE: [%u]failed to get rx offset: 0x%x, 0x%lx.\n", rx_count, buf_phy_addr, - metal_io_phys(ch->shm_io, rx_addr_offset)); + metal_io_phys(ch->host_to_remote_desc_io, + rx_addr_offset)); ret = -EINVAL; goto out; } rx_addr_offset += sizeof(buf_phy_addr); + if (rx_addr_offset >= SHM0_DESC_SIZE) + rx_addr_offset = SHM_DESC_ADDR_START; /* Read message header from shared memory */ ret = metal_io_block_read(ch->shm_io, rx_data_offset, lbuf, @@ -167,8 +140,8 @@ int demo(void *arg) payload = (char *)lbuf + sizeof(*msg_hdr); rx_count++; /* Increase rx used count to indicate received data was used. */ - metal_io_write32(ch->shm_io, - SHM_DESC_OFFSET_H_TO_R + SHM_DESC_USED_OFFSET, + metal_io_write32(ch->host_to_remote_desc_io, + SHM_DESC_USED_OFFSET, rx_count); /* Check if it is the shutdown message. */ @@ -198,13 +171,16 @@ int demo(void *arg) goto out; } - metal_io_write32(ch->shm_io, tx_addr_offset, buf_phy_addr); + metal_io_write32(ch->remote_to_host_desc_io, + tx_addr_offset, buf_phy_addr); tx_data_offset += sizeof(struct msg_hdr_s) + msg_hdr->len; tx_addr_offset += sizeof(uint32_t); + if (tx_addr_offset >= SHM1_DESC_SIZE) + tx_addr_offset = SHM_DESC_ADDR_START; /* Increase number of available buffers. */ - metal_io_write32(ch->shm_io, - SHM_DESC_OFFSET_R_TO_H + SHM_DESC_AVAIL_OFFSET, + metal_io_write32(ch->remote_to_host_desc_io, + SHM_DESC_AVAIL_OFFSET, rx_count); /* Kick IRQ to notify data is in shared buffer. */ diff --git a/examples/libmetal/machine/remote/amd_rpu/platform_init.c b/examples/libmetal/machine/remote/amd_rpu/platform_init.c index 4179bbea..302f2cba 100644 --- a/examples/libmetal/machine/remote/amd_rpu/platform_init.c +++ b/examples/libmetal/machine/remote/amd_rpu/platform_init.c @@ -34,8 +34,6 @@ #define DEFAULT_PAGE_SHIFT (-1UL) #define DEFAULT_PAGE_MASK (-1UL) -#define SHM_TOTAL_SIZE SHM0_DESC_SIZE + SHM1_DESC_SIZE + SHM_PAYLOAD_SIZE - /* Possible to control metal log build time */ #ifndef XLNX_METAL_LOG_LEVEL #define XLNX_METAL_LOG_LEVEL METAL_LOG_INFO @@ -57,16 +55,18 @@ extern struct metal_irq_controller xlnx_irq_cntr; const metal_phys_addr_t metal_phys[] = { IPI_BASE_ADDR, /**< base IPI address */ - SHM0_DESC_BASE, /**< shared memory base address */ + SHM0_DESC_BASE, /**< host to remote descriptor base address */ + SHM1_DESC_BASE, /**< remote to host descriptor base address */ + SHM_PAYLOAD_BASE, /**< shared payload base address */ TTC_BASE_ADDR, /**< base TTC address */ }; /* - * Define the metal device table for IPI, shared memory, and TTC devices. Linux - * uses device trees, but FreeRTOS relies on libmetal structures to describe the - * peripherals. Because these devices are memory mapped, we must expose their - * regions and interrupt information. The FreeRTOS memory map is flat, so the - * virtual and physical addresses are identical. + * Define the metal device table for IPI, descriptor, payload, and TTC devices. + * Linux uses device trees, but remote relies on libmetal structures to + * describe the peripherals. Because these devices are memory mapped, we must + * expose their regions and interrupt information. The FreeRTOS memory map is + * flat, so the virtual and physical addresses are identical. */ static struct metal_device metal_dev_table[] = { { @@ -90,15 +90,57 @@ static struct metal_device metal_dev_table[] = { .irq_info = (void *)IPI_IRQ_VECT_ID, }, { - /* Shared memory management device */ - .name = SHM_DEV_NAME, + /* Host to remote descriptor device */ + .name = SHM0_DESC_DEV_NAME, .bus = NULL, .num_regions = 1, .regions = { { .virt = (void *)SHM0_DESC_BASE, .physmap = &metal_phys[1], - .size = SHM_TOTAL_SIZE, + .size = SHM0_DESC_SIZE, + .page_shift = DEFAULT_PAGE_SHIFT, + .page_mask = DEFAULT_PAGE_MASK, + .mem_flags = NORM_SHARED_NCACHE | + PRIV_RW_USER_RW, + .ops = {NULL}, + } + }, + .node = {NULL}, + .irq_num = 0, + .irq_info = NULL, + }, + { + /* Remote to host descriptor device */ + .name = SHM1_DESC_DEV_NAME, + .bus = NULL, + .num_regions = 1, + .regions = { + { + .virt = (void *)SHM1_DESC_BASE, + .physmap = &metal_phys[2], + .size = SHM1_DESC_SIZE, + .page_shift = DEFAULT_PAGE_SHIFT, + .page_mask = DEFAULT_PAGE_MASK, + .mem_flags = NORM_SHARED_NCACHE | + PRIV_RW_USER_RW, + .ops = {NULL}, + } + }, + .node = {NULL}, + .irq_num = 0, + .irq_info = NULL, + }, + { + /* Shared payload device */ + .name = SHM_DEV_NAME, + .bus = NULL, + .num_regions = 1, + .regions = { + { + .virt = (void *)SHM_PAYLOAD_BASE, + .physmap = &metal_phys[3], + .size = SHM_PAYLOAD_SIZE, .page_shift = DEFAULT_PAGE_SHIFT, .page_mask = DEFAULT_PAGE_MASK, .mem_flags = NORM_SHARED_NCACHE | @@ -118,7 +160,7 @@ static struct metal_device metal_dev_table[] = { .regions = { { .virt = (void *)TTC_BASE_ADDR, - .physmap = &metal_phys[2], + .physmap = &metal_phys[4], .size = 0x1000, .page_shift = DEFAULT_PAGE_SHIFT, .page_mask = DEFAULT_PAGE_MASK, @@ -136,6 +178,8 @@ static struct metal_device metal_dev_table[] = { * Extern global variables */ struct metal_device *ipi_dev = NULL; +static struct metal_device *host_to_remote_desc_dev = NULL; +static struct metal_device *remote_to_host_desc_dev = NULL; struct metal_device *shm_dev = NULL; struct metal_device *ttc_dev = NULL; @@ -249,13 +293,30 @@ int open_metal_devices(void) { int ret; - /* Open shared memory device */ + /* Open payload device */ ret = metal_device_open(BUS_NAME, SHM_DEV_NAME, &shm_dev); if (ret) { metal_err("REMOTE: Failed to open device %s.\n", SHM_DEV_NAME); goto out; } + /* Open descriptor devices */ + ret = metal_device_open(BUS_NAME, SHM0_DESC_DEV_NAME, + &host_to_remote_desc_dev); + if (ret) { + metal_err("REMOTE: Failed to open device %s.\n", + SHM0_DESC_DEV_NAME); + goto out; + } + + ret = metal_device_open(BUS_NAME, SHM1_DESC_DEV_NAME, + &remote_to_host_desc_dev); + if (ret) { + metal_err("REMOTE: Failed to open device %s.\n", + SHM1_DESC_DEV_NAME); + goto out; + } + /* Open IPI device */ ret = metal_device_open(BUS_NAME, IPI_DEV_NAME, &ipi_dev); if (ret) { @@ -282,10 +343,17 @@ int open_metal_devices(void) */ void close_metal_devices(void) { - /* Close shared memory device */ + /* Close payload device */ if (shm_dev) metal_device_close(shm_dev); + /* Close descriptor devices */ + if (host_to_remote_desc_dev) + metal_device_close(host_to_remote_desc_dev); + + if (remote_to_host_desc_dev) + metal_device_close(remote_to_host_desc_dev); + /* Close IPI device */ if (ipi_dev) metal_device_close(ipi_dev); @@ -459,13 +527,37 @@ int platform_init(struct channel_s *ch) * Buffer clean up. Do this at start in case a * previous run was stopped midway. */ - io = metal_device_io_region(shm_dev, 0); - if (!io) + ch->host_to_remote_desc_io = metal_device_io_region(host_to_remote_desc_dev, 0); + if (!ch->host_to_remote_desc_io) { + metal_err("REMOTE: Failed to map io region for %s.\n", + host_to_remote_desc_dev->name); + return -ENODEV; + } + + ch->remote_to_host_desc_io = metal_device_io_region(remote_to_host_desc_dev, 0); + if (!ch->remote_to_host_desc_io) { + metal_err("REMOTE: Failed to map io region for %s.\n", + remote_to_host_desc_dev->name); + return -ENODEV; + } + + ch->shm_io = metal_device_io_region(shm_dev, 0); + if (!ch->shm_io) { metal_err("REMOTE: Failed to map io region for %s.\n", shm_dev->name); - else - metal_io_block_set(io, 0, 0, SHM_TOTAL_SIZE); + return -ENODEV; + } + + ret = metal_io_block_set(ch->host_to_remote_desc_io, 0, 0, SHM0_DESC_SIZE); + if (ret < 0) + return ret; + + ret = metal_io_block_set(ch->remote_to_host_desc_io, 0, 0, SHM1_DESC_SIZE); + if (ret < 0) + return ret; - ch->shm_io = io; + ret = metal_io_block_set(ch->shm_io, 0, 0, SHM_PAYLOAD_SIZE); + if (ret < 0) + return ret; /* Get TTC IO region */ ch->ttc_io = metal_device_io_region(ttc_dev, 0); @@ -474,12 +566,6 @@ int platform_init(struct channel_s *ch) return -ENODEV; } - /* Get the IPI IRQ from the opened IPI device */ - ch->irq_vector_id = (intptr_t)ipi_dev->irq_info; - - /* Register IPI irq handler */ - metal_irq_register(ch->irq_vector_id, ipi_irq_handler, ch); - return 0; } diff --git a/examples/libmetal/machine/remote/amd_rpu/system/freertos/amp_demo_os.h b/examples/libmetal/machine/remote/amd_rpu/system/freertos/amp_demo_os.h index 48a67bcb..06c4247e 100644 --- a/examples/libmetal/machine/remote/amd_rpu/system/freertos/amp_demo_os.h +++ b/examples/libmetal/machine/remote/amd_rpu/system/freertos/amp_demo_os.h @@ -19,6 +19,8 @@ #include "xil_printf.h" struct channel_s { + struct metal_io_region *host_to_remote_desc_io; /* host to remote descriptors */ + struct metal_io_region *remote_to_host_desc_io; /* remote to host descriptors */ struct metal_io_region *ipi_io; /* IPI metal i/o region */ struct metal_io_region *shm_io; /* Shared memory metal i/o region */ struct metal_io_region *ttc_io; /* TTC metal i/o region */ diff --git a/examples/libmetal/machine/remote/amd_rpu/system/generic/amp_demo_os.h b/examples/libmetal/machine/remote/amd_rpu/system/generic/amp_demo_os.h index d4d24a04..d0386350 100644 --- a/examples/libmetal/machine/remote/amd_rpu/system/generic/amp_demo_os.h +++ b/examples/libmetal/machine/remote/amd_rpu/system/generic/amp_demo_os.h @@ -17,6 +17,8 @@ #include "xil_printf.h" struct channel_s { + struct metal_io_region *host_to_remote_desc_io; /* host to remote descriptors */ + struct metal_io_region *remote_to_host_desc_io; /* remote to host descriptors */ struct metal_io_region *ipi_io; /* IPI metal i/o region */ struct metal_io_region *shm_io; /* Shared memory metal i/o region */ struct metal_io_region *ttc_io; /* TTC metal i/o region */ From c28b8fb9cdf3ed459d993c7df9188725b3b458ea Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Sat, 28 Mar 2026 07:18:21 -0700 Subject: [PATCH 4/6] examples: libmetal: amd_rpu: versal-net: set first 2G MPU region Set first 2G MPU Region as same flags as libmetal demo to ensure MPU regions dont get incorrectly created by BSP This will go away once overlapping region adjustment is fixed in BSP Signed-off-by: Ben Levinsky --- .../machine/remote/amd_rpu/platform_init.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/examples/libmetal/machine/remote/amd_rpu/platform_init.c b/examples/libmetal/machine/remote/amd_rpu/platform_init.c index 302f2cba..8d5b0552 100644 --- a/examples/libmetal/machine/remote/amd_rpu/platform_init.c +++ b/examples/libmetal/machine/remote/amd_rpu/platform_init.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -523,6 +524,23 @@ int platform_init(struct channel_s *ch) metal_irq_enable(ch->irq_vector_id); metal_io_write32(ch->ipi_io, XIPIPSU_IER_OFFSET, ch->ipi_mask); +#ifdef VERSAL_NET + /* + * Temporary workaround for a Cortex-R52 MPU overlapping-region bug. When + * libmetal remaps shared-memory windows inside the default DDR region, the + * overlap adjustment can mis-split the existing DDR MPU entry. Configure the + * full DDR range with the libmetal attributes up front to avoid that path. + * + * Remove this once the R52 MPU overlapping-region handling is fixed. + * + * Note this only applies for R52 based systems - hence the ifdef. + */ + if (Xil_SetMPURegion(0x0U, 0x7FFFFFFFU, + NORM_SHARED_NCACHE | PRIV_RW_USER_RW) != XST_SUCCESS) { + metal_err("REMOTE: Failed to set MPU Region for 0x%x.\n", 0x0U); + return -EINVAL; + } +#endif /* * Buffer clean up. Do this at start in case a * previous run was stopped midway. From a6eae196c067e943a8af0b4e8c333817c48403cc Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Wed, 8 Apr 2026 07:54:21 -0700 Subject: [PATCH 5/6] examples: libmetal: amd_rpu: fix memset call in cleanup Fix bug in call to memset(), pass struct channel_s *, not struct channel_s ** Signed-off-by: Ben Levinsky --- examples/libmetal/machine/remote/amd_rpu/platform_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/libmetal/machine/remote/amd_rpu/platform_init.c b/examples/libmetal/machine/remote/amd_rpu/platform_init.c index 8d5b0552..3a19e64c 100644 --- a/examples/libmetal/machine/remote/amd_rpu/platform_init.c +++ b/examples/libmetal/machine/remote/amd_rpu/platform_init.c @@ -592,7 +592,7 @@ void platform_cleanup(struct channel_s *ch) metal_io_write32(ch->ipi_io, XIPIPSU_IDR_OFFSET, ch->ipi_mask); metal_irq_disable(ch->irq_vector_id); metal_irq_unregister(ch->irq_vector_id); - memset(&ch, 0, sizeof(ch)); + memset(ch, 0, sizeof(*ch)); /* Close libmetal devices which have been opened */ close_metal_devices(); From b735d10ee85ed9539e623ee53bcd1f1251fe42c5 Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Tue, 14 Apr 2026 06:39:53 -0700 Subject: [PATCH 6/6] examples: libmetal: irq_shmem_demo: move channel state to common header Move the shared irq_shmem_demo channel definition into a demo-level header and keep platform-specific state behind a machine_ctx pointer. This removes the duplicated channel_s definitions from the host and remote OS headers and makes the separation between common transport state and platform-private state clearer. Also export the demo common include directory through CMake so the shared header can be included directly as irq_shmem_demo.h instead of using deep relative include paths. Signed-off-by: Ben Levinsky --- .../demos/irq_shmem_demo/CMakeLists.txt | 1 + .../irq_shmem_demo/common/irq_shmem_demo.h | 30 +++++++++++++++++++ .../irq_shmem_demo/host/irq_shmem_demo.c | 8 +++-- .../irq_shmem_demo/remote/irq_shmem_demo.c | 5 +++- .../machine/host/amd_linux_userspace/common.h | 18 ++++++----- .../host/amd_linux_userspace/platform_init.c | 8 +++-- .../machine/remote/amd_rpu/CMakeLists.txt | 4 ++- .../amd_rpu/system/freertos/amp_demo_os.h | 25 +++++++++------- .../amd_rpu/system/generic/amp_demo_os.h | 26 ++++++++-------- 9 files changed, 87 insertions(+), 38 deletions(-) create mode 100644 examples/libmetal/demos/irq_shmem_demo/common/irq_shmem_demo.h diff --git a/examples/libmetal/demos/irq_shmem_demo/CMakeLists.txt b/examples/libmetal/demos/irq_shmem_demo/CMakeLists.txt index 926f9925..7ea3ae45 100644 --- a/examples/libmetal/demos/irq_shmem_demo/CMakeLists.txt +++ b/examples/libmetal/demos/irq_shmem_demo/CMakeLists.txt @@ -1,3 +1,4 @@ # Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause +collect (PROJECT_INC_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/common") add_subdirectory(${ROLE}) diff --git a/examples/libmetal/demos/irq_shmem_demo/common/irq_shmem_demo.h b/examples/libmetal/demos/irq_shmem_demo/common/irq_shmem_demo.h new file mode 100644 index 00000000..b8524cf2 --- /dev/null +++ b/examples/libmetal/demos/irq_shmem_demo/common/irq_shmem_demo.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2026, Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __IRQ_SHMEM_DEMO_H__ +#define __IRQ_SHMEM_DEMO_H__ + +#include + +#include + +/* + * Common transport state shared by the host and remote implementations of the + * IRQ shared-memory demo. Platform- or OS-private wait state is kept behind + * machine_ctx to avoid duplicating this layout in each machine header. + */ +struct channel_s { + struct metal_io_region *host_to_remote_desc_io; /* host to remote descriptors */ + struct metal_io_region *remote_to_host_desc_io; /* remote to host descriptors */ + struct metal_io_region *ipi_io; /* IPI metal i/o region */ + struct metal_io_region *shm_io; /* Shared memory metal i/o region */ + struct metal_io_region *ttc_io; /* TTC metal i/o region */ + void *machine_ctx; /* Platform- or OS-private channel state */ + uint32_t ipi_mask; /* RPU IPI mask */ + int irq_vector_id; /* IRQ number. */ +}; + +#endif /* __IRQ_SHMEM_DEMO_H__ */ diff --git a/examples/libmetal/demos/irq_shmem_demo/host/irq_shmem_demo.c b/examples/libmetal/demos/irq_shmem_demo/host/irq_shmem_demo.c index 1af04996..589f8196 100644 --- a/examples/libmetal/demos/irq_shmem_demo/host/irq_shmem_demo.c +++ b/examples/libmetal/demos/irq_shmem_demo/host/irq_shmem_demo.c @@ -130,6 +130,7 @@ static inline void dump_buffer(void *buf, unsigned int len) */ static int irq_shmem_echo(struct channel_s *ch) { + struct channel_machine_ctx_s *machine = channel_machine_ctx(ch); struct metal_io_region *desc_host_to_remote = ch->host_to_remote_desc_io; struct metal_io_region *desc_remote_to_host = ch->remote_to_host_desc_io; struct metal_io_region *payload_io = ch->shm_io; @@ -241,7 +242,7 @@ static int irq_shmem_echo(struct channel_s *ch) while (i != PKGS_TOTAL) { - wait_for_notified(&ch->remote_nkicked); + wait_for_notified(&machine->remote_nkicked); rx_avail = metal_io_read32(desc_remote_to_host, rx_avail_offset); while (i != rx_avail) { @@ -376,7 +377,10 @@ static int irq_shmem_echo(struct channel_s *ch) int main(void) { - struct channel_s ch_s; + struct channel_machine_ctx_s ch_machine_s = {0}; + struct channel_s ch_s = { + .machine_ctx = &ch_machine_s, + }; int ret = 0; /* platform_init will set the OS agnostic channel information */ diff --git a/examples/libmetal/demos/irq_shmem_demo/remote/irq_shmem_demo.c b/examples/libmetal/demos/irq_shmem_demo/remote/irq_shmem_demo.c index 437910a1..6acbc8c3 100644 --- a/examples/libmetal/demos/irq_shmem_demo/remote/irq_shmem_demo.c +++ b/examples/libmetal/demos/irq_shmem_demo/remote/irq_shmem_demo.c @@ -40,9 +40,12 @@ */ int demo(void *arg) { + struct channel_machine_ctx_s ch_machine_s = {0}; unsigned long tx_data_offset, rx_data_offset; unsigned long tx_addr_offset, rx_addr_offset; - struct channel_s ch_s = {0x0}; + struct channel_s ch_s = { + .machine_ctx = &ch_machine_s, + }; struct channel_s *ch = &ch_s; bool platform_ready = false; uint32_t rx_count, rx_avail; diff --git a/examples/libmetal/machine/host/amd_linux_userspace/common.h b/examples/libmetal/machine/host/amd_linux_userspace/common.h index 9b15fb5a..a73f5ae8 100644 --- a/examples/libmetal/machine/host/amd_linux_userspace/common.h +++ b/examples/libmetal/machine/host/amd_linux_userspace/common.h @@ -16,6 +16,7 @@ #include +#include "irq_shmem_demo.h" #include "config.h" /* @@ -113,17 +114,18 @@ static inline void update_stat(struct metal_stat *pst, uint64_t val) pst->st_max = val; } -struct channel_s { - struct metal_io_region *host_to_remote_desc_io; /* host to remote descriptors */ - struct metal_io_region *remote_to_host_desc_io; /* remote to host descriptors */ - struct metal_io_region *ipi_io; /* IPI metal i/o region */ - struct metal_io_region *shm_io; /* Shared memory metal i/o region */ - struct metal_io_region *ttc_io; /* TTC metal i/o region */ +struct channel_machine_ctx_s { atomic_flag remote_nkicked; /* IRQ kick flag */ - uint32_t ipi_mask; /* RPU IPI mask */ - int irq_vector_id; /* IRQ number. */ }; +static inline struct channel_machine_ctx_s *channel_machine_ctx(struct channel_s *ch) +{ + metal_assert(ch); + metal_assert(ch->machine_ctx); + + return (struct channel_machine_ctx_s *)ch->machine_ctx; +} + /** * @ AMD RPU port for IRQ notification * @param[in] irq_io - IO region used for IRQ kick diff --git a/examples/libmetal/machine/host/amd_linux_userspace/platform_init.c b/examples/libmetal/machine/host/amd_linux_userspace/platform_init.c index ca075d24..8fc5acd8 100644 --- a/examples/libmetal/machine/host/amd_linux_userspace/platform_init.c +++ b/examples/libmetal/machine/host/amd_linux_userspace/platform_init.c @@ -94,6 +94,7 @@ int open_metal_devices(void) static int irq_isr(int vect_id, void *priv) { struct channel_s *ch = (struct channel_s *)priv; + struct channel_machine_ctx_s *machine = channel_machine_ctx(ch); struct metal_io_region *ipi_io = ch->ipi_io; uint32_t ipi_mask = IPI_MASK; uint64_t val = 1; @@ -105,7 +106,7 @@ static int irq_isr(int vect_id, void *priv) val = metal_io_read32(ipi_io, IPI_ISR_OFFSET); if (val & ipi_mask) { metal_io_write32(ipi_io, IPI_ISR_OFFSET, ipi_mask); - atomic_flag_clear(&ch->remote_nkicked); + atomic_flag_clear(&machine->remote_nkicked); return METAL_IRQ_HANDLED; } return METAL_IRQ_NOT_HANDLED; @@ -117,6 +118,7 @@ int platform_init(struct channel_s *ch) int ret; metal_assert(ch); + metal_assert(ch->machine_ctx); ret = metal_init(&init_param); if (ret) { @@ -125,8 +127,8 @@ int platform_init(struct channel_s *ch) } /* initialize remote_nkicked */ - ch->remote_nkicked = (atomic_flag)ATOMIC_FLAG_INIT; - atomic_flag_test_and_set(&ch->remote_nkicked); + channel_machine_ctx(ch)->remote_nkicked = (atomic_flag)ATOMIC_FLAG_INIT; + atomic_flag_test_and_set(&channel_machine_ctx(ch)->remote_nkicked); ret = open_metal_devices(); if (ret) { diff --git a/examples/libmetal/machine/remote/amd_rpu/CMakeLists.txt b/examples/libmetal/machine/remote/amd_rpu/CMakeLists.txt index 14333abf..0e0b3ae5 100644 --- a/examples/libmetal/machine/remote/amd_rpu/CMakeLists.txt +++ b/examples/libmetal/machine/remote/amd_rpu/CMakeLists.txt @@ -17,8 +17,9 @@ add_subdirectory (system) set (_elf_name ${DEMO}) set (source_dirs ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/system/${PROJECT_SYSTEM}) +collector_list (_inc_dirs PROJECT_INC_DIRS) link_directories(${source_dirs}) -include_directories(${source_dirs}) +include_directories(${source_dirs} ${_inc_dirs}) collector_list (_deps PROJECT_LIB_DEPS) @@ -60,5 +61,6 @@ target_link_libraries(${_elf_name}.elf PRIVATE -Wl,--start-group ${_deps} -Wl,--end-group) target_compile_definitions(${_elf_name}.elf PUBLIC ${USER_COMPILE_DEFINITIONS}) +target_include_directories(${_elf_name}.elf PUBLIC ${_inc_dirs}) target_include_directories(${_elf_name}.elf PUBLIC ${USER_INCLUDE_DIRECTORIES}) install (TARGETS ${_elf_name}.elf RUNTIME DESTINATION bin) diff --git a/examples/libmetal/machine/remote/amd_rpu/system/freertos/amp_demo_os.h b/examples/libmetal/machine/remote/amd_rpu/system/freertos/amp_demo_os.h index 06c4247e..76f2764b 100644 --- a/examples/libmetal/machine/remote/amd_rpu/system/freertos/amp_demo_os.h +++ b/examples/libmetal/machine/remote/amd_rpu/system/freertos/amp_demo_os.h @@ -14,21 +14,23 @@ #include #include +#include "irq_shmem_demo.h" #include "portmacro.h" #include "xil_printf.h" -struct channel_s { - struct metal_io_region *host_to_remote_desc_io; /* host to remote descriptors */ - struct metal_io_region *remote_to_host_desc_io; /* remote to host descriptors */ - struct metal_io_region *ipi_io; /* IPI metal i/o region */ - struct metal_io_region *shm_io; /* Shared memory metal i/o region */ - struct metal_io_region *ttc_io; /* TTC metal i/o region */ - uint32_t ipi_mask; /* RPU IPI mask */ +struct channel_machine_ctx_s { TaskHandle_t task; /* Demo task handle used for suspend/resume. */ - int irq_vector_id; /* IRQ number. */ }; +static inline struct channel_machine_ctx_s *channel_machine_ctx(struct channel_s *ch) +{ + metal_assert(ch); + metal_assert(ch->machine_ctx); + + return (struct channel_machine_ctx_s *)ch->machine_ctx; +} + /** * @brief amp_os_init() - set OS specific information in the channel * @@ -42,7 +44,7 @@ static inline int amp_os_init(struct channel_s *ch, void *arg) metal_assert(ch); metal_assert(task); - ch->task = task; + channel_machine_ctx(ch)->task = task; return 0; } @@ -71,11 +73,12 @@ static inline void system_suspend(struct channel_s *ch) static inline void system_resume(struct channel_s *ch) { BaseType_t yield_required; + struct channel_machine_ctx_s *machine = channel_machine_ctx(ch); metal_assert(ch); - metal_assert(ch->task); + metal_assert(machine->task); - yield_required = xTaskResumeFromISR(ch->task); + yield_required = xTaskResumeFromISR(machine->task); portYIELD_FROM_ISR(yield_required); } diff --git a/examples/libmetal/machine/remote/amd_rpu/system/generic/amp_demo_os.h b/examples/libmetal/machine/remote/amd_rpu/system/generic/amp_demo_os.h index d0386350..352648c4 100644 --- a/examples/libmetal/machine/remote/amd_rpu/system/generic/amp_demo_os.h +++ b/examples/libmetal/machine/remote/amd_rpu/system/generic/amp_demo_os.h @@ -14,19 +14,21 @@ #include #include +#include "irq_shmem_demo.h" #include "xil_printf.h" -struct channel_s { - struct metal_io_region *host_to_remote_desc_io; /* host to remote descriptors */ - struct metal_io_region *remote_to_host_desc_io; /* remote to host descriptors */ - struct metal_io_region *ipi_io; /* IPI metal i/o region */ - struct metal_io_region *shm_io; /* Shared memory metal i/o region */ - struct metal_io_region *ttc_io; /* TTC metal i/o region */ - uint32_t ipi_mask; /* RPU IPI mask */ - int irq_vector_id; /* IRQ number. */ +struct channel_machine_ctx_s { atomic_flag irq_pending; /* Lightweight wait primitive. */ }; +static inline struct channel_machine_ctx_s *channel_machine_ctx(struct channel_s *ch) +{ + metal_assert(ch); + metal_assert(ch->machine_ctx); + + return (struct channel_machine_ctx_s *)ch->machine_ctx; +} + /** * @brief amp_os_init() - initialize bare-metal rendezvous primitives * @@ -42,10 +44,10 @@ static inline int amp_os_init(struct channel_s *ch, void *arg) metal_assert(ch); - ch->irq_pending = (atomic_flag)ATOMIC_FLAG_INIT; + channel_machine_ctx(ch)->irq_pending = (atomic_flag)ATOMIC_FLAG_INIT; /* Start in the "waiting" state until the first interrupt arrives. */ - (void)atomic_flag_test_and_set(&ch->irq_pending); + (void)atomic_flag_test_and_set(&channel_machine_ctx(ch)->irq_pending); return 0; } @@ -62,7 +64,7 @@ static inline void system_suspend(struct channel_s *ch) { metal_assert(ch); - while (atomic_flag_test_and_set(&ch->irq_pending)) { + while (atomic_flag_test_and_set(&channel_machine_ctx(ch)->irq_pending)) { metal_asm volatile("wfi"); } } @@ -75,7 +77,7 @@ static inline void system_suspend(struct channel_s *ch) static inline void system_resume(struct channel_s *ch) { metal_assert(ch); - atomic_flag_clear(&ch->irq_pending); + atomic_flag_clear(&channel_machine_ctx(ch)->irq_pending); } #endif /* __AMP_DEMO_OS_H__ */