From 9724be258cd91339b4aa31349ab3a919b3676eb5 Mon Sep 17 00:00:00 2001 From: aottaviano Date: Wed, 6 Apr 2022 11:58:52 +0200 Subject: [PATCH] pulp-runtime: Add ARCHI_HAS_DMA_DEMUX property to mchan * If the cluster core demux and peripheral demux have a direct connection to the dma, allow the cluster cores to use this connection. --- include/hal/dma/mchan_v7.h | 222 +++++++++++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) diff --git a/include/hal/dma/mchan_v7.h b/include/hal/dma/mchan_v7.h index 7f07f81..d95dee5 100644 --- a/include/hal/dma/mchan_v7.h +++ b/include/hal/dma/mchan_v7.h @@ -267,9 +267,17 @@ static inline unsigned int plp_dma_status(); /// @cond IMPLEM #if defined(__riscv__) && !defined(RV_ISA_RV32) && !defined(__LLVM__) +#ifdef ARCHI_HAS_DMA_DEMUX +#define DMA_WRITE_DEMUX(value, offset) __builtin_pulp_OffsetedWrite((value), (int *)ARCHI_MCHAN_DEMUX_ADDR, (offset)) +#define DMA_READ_DEMUX(offset) __builtin_pulp_OffsetedRead((int *)ARCHI_MCHAN_DEMUX_ADDR, (offset)) +#endif // ARCHI_HAS_DMA_DEMUX #define DMA_WRITE(value, offset) __builtin_pulp_OffsetedWrite((value), (int *)ARCHI_MCHAN_EXT_ADDR, (offset)) #define DMA_READ(offset) __builtin_pulp_OffsetedRead((int *)ARCHI_MCHAN_EXT_ADDR, (offset)) #else +#ifdef ARCHI_HAS_DMA_DEMUX +#define DMA_WRITE_DEMUX(value, offset) pulp_write32(ARCHI_MCHAN_DEMUX_ADDR + (offset), (value)) +#define DMA_READ_DEMUX(value, offset) pulp_read32(ARCHI_MCHAN_DEMUX_ADDR + (offset)) +#endif // ARCHI_HAS_DMA_DEMUX #define DMA_WRITE(value, offset) pulp_write32(ARCHI_MCHAN_EXT_ADDR + (offset), (value)) #define DMA_READ(offset) pulp_read32(ARCHI_MCHAN_EXT_ADDR + (offset)) #endif @@ -278,10 +286,26 @@ static inline int plp_dma_counter_alloc() { return DMA_READ(MCHAN_CMD_OFFSET); } +static inline int plp_cl_dma_counter_alloc() { +#ifdef ARCHI_HAS_DMA_DEMUX + return DMA_READ_DEMUX(MCHAN_CMD_OFFSET); +#else // ARCHI_HAS_DMA_DEMUX + return plp_dma_counter_alloc(); +#endif // ARCHI_HAS_DMA_DEMUX +} + static inline void plp_dma_counter_free(int counter) { DMA_WRITE(1<>32), MCHAN_CMD_OFFSET); +#else + DMA_WRITE_DEMUX(extAddr, MCHAN_CMD_OFFSET); +#endif +#else // ARCHI_HAS_DMA_DEMUX + plp_dma_cmd_push(cmd, locAddr, extAddr); +#endif // ARCHI_HAS_DMA_DEMUX +} + static inline void plp_dma_cmd_push_2d(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr, unsigned int stride, unsigned int length) { plp_dma_cmd_push(cmd, locAddr, extAddr); DMA_WRITE(length, MCHAN_CMD_OFFSET); DMA_WRITE(stride, MCHAN_CMD_OFFSET); } +static inline void plp_cl_dma_cmd_push_2d(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr, unsigned int stride, unsigned int length) { +#ifdef ARCHI_HAS_DMA_DEMUX + plp_cl_dma_cmd_push(cmd, locAddr, extAddr); + DMA_WRITE_DEMUX(length, MCHAN_CMD_OFFSET); + DMA_WRITE_DEMUX(stride, MCHAN_CMD_OFFSET); +#else // ARCHI_HAS_DMA_DEMUX + plp_dma_cmd_push_2d(cmd, locAddr, extAddr, stride, length); +#endif // ARCHI_HAS_DMA_DEMUX +} + static inline int plp_dma_memcpy(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { unsigned int counter = plp_dma_counter_alloc(); unsigned int cmd = plp_dma_getCmd(ext2loc, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); @@ -323,6 +375,17 @@ static inline int plp_dma_memcpy(mchan_ext_t ext, unsigned int loc, unsigned sho return counter; } +static inline int plp_cl_dma_memcpy(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { +#ifdef ARCHI_HAS_DMA_DEMUX + unsigned int counter = plp_cl_dma_counter_alloc(); + unsigned int cmd = plp_cl_dma_getCmd(ext2loc, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); + plp_cl_dma_cmd_push(cmd, loc, ext); + return counter; +#else // ARCHI_HAS_DMA_DEMUX + return plp_dma_memcpy(ext, loc, size, ext2loc); +#endif // ARCHI_HAS_DMA_DEMUX +} + static inline int plp_dma_l1ToExt(mchan_ext_t ext, unsigned int loc, unsigned short size) { unsigned int counter = plp_dma_counter_alloc(); unsigned int cmd = plp_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); @@ -330,6 +393,17 @@ static inline int plp_dma_l1ToExt(mchan_ext_t ext, unsigned int loc, unsigned sh return counter; } +static inline int plp_cl_dma_l1ToExt(mchan_ext_t ext, unsigned int loc, unsigned short size) { +#ifdef ARCHI_HAS_DMA_DEMUX + unsigned int counter = plp_cl_dma_counter_alloc(); + unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); + plp_cl_dma_cmd_push(cmd, loc, ext); + return counter; +#else // ARCHI_HAS_DMA_DEMUX + return plp_dma_l1ToExt(ext, loc, size); +#endif // ARCHI_HAS_DMA_DEMUX +} + static inline int plp_dma_extToL1(unsigned int loc, mchan_ext_t ext, unsigned short size) { unsigned int counter = plp_dma_counter_alloc(); unsigned int cmd = plp_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); @@ -337,6 +411,17 @@ static inline int plp_dma_extToL1(unsigned int loc, mchan_ext_t ext, unsigned sh return counter; } +static inline int plp_cl_dma_extToL1(unsigned int loc, mchan_ext_t ext, unsigned short size) { +#ifdef ARCHI_HAS_DMA_DEMUX + unsigned int counter = plp_cl_dma_counter_alloc(); + unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); + plp_cl_dma_cmd_push(cmd, loc, ext); + return counter; +#else // ARCHI_HAS_DMA_DEMUX + return plp_dma_extToL1(loc, ext, size); +#endif // ARCHI_HAS_DMA_DEMUX +} + static inline int plp_dma_memcpy_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { unsigned int counter = plp_dma_counter_alloc(); unsigned int cmd = plp_dma_getCmd(ext2loc, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); @@ -344,6 +429,17 @@ static inline int plp_dma_memcpy_irq(mchan_ext_t ext, unsigned int loc, unsigned return counter; } +static inline int plp_cl_dma_memcpy_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { +#ifdef ARCHI_HAS_DMA_DEMUX + unsigned int counter = plp_cl_dma_counter_alloc(); + unsigned int cmd = plp_cl_dma_getCmd(ext2loc, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); + plp_cl_dma_cmd_push(cmd, loc, ext); + return counter; +#else // ARCHI_HAS_DMA_DEMUX + return plp_dma_memcpy_irq(ext, loc, size, ext2loc); +#endif // ARCHI_HAS_DMA_DEMUX +} + static inline int plp_dma_l1ToExt_irq(mchan_ext_t ext, unsigned int loc, unsigned short size) { unsigned int counter = plp_dma_counter_alloc(); unsigned int cmd = plp_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); @@ -351,6 +447,17 @@ static inline int plp_dma_l1ToExt_irq(mchan_ext_t ext, unsigned int loc, unsigne return counter; } +static inline int plp_cl_dma_l1ToExt_irq(mchan_ext_t ext, unsigned int loc, unsigned short size) { +#ifdef ARCHI_HAS_DMA_DEMUX + unsigned int counter = plp_cl_dma_counter_alloc(); + unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); + plp_cl_dma_cmd_push(cmd, loc, ext); + return counter; +#else // ARCHI_HAS_DMA_DEMUX + return plp_dma_l1ToExt_irq(ext, loc, size); +#endif // ARCHI_HAS_DMA_DEMUX +} + static inline int plp_dma_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigned short size) { unsigned int counter = plp_dma_counter_alloc(); unsigned int cmd = plp_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); @@ -358,17 +465,47 @@ static inline int plp_dma_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigne return counter; } +static inline int plp_cl_dma_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigned short size) { +#ifdef ARCHI_HAS_DMA_DEMUX + unsigned int counter = plp_cl_dma_counter_alloc(); + unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); + plp_cl_dma_cmd_push(cmd, loc, ext); + return counter; +#else // ARCHI_HAS_DMA_DEMUX + return plp_dma_extToL1_irq(loc, ext, size); +#endif // ARCHI_HAS_DMA_DEMUX +} + static inline void plp_dma_memcpy_2d_keepCounter(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { unsigned int cmd = plp_dma_getCmd(ext2loc, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); plp_dma_cmd_push_2d(cmd, loc, ext, stride, length); } +static inline void plp_cl_dma_memcpy_2d_keepCounter(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { +#ifdef ARCHI_HAS_DMA_DEMUX + unsigned int cmd = plp_cl_dma_getCmd(ext2loc, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); + plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); +#else // ARCHI_HAS_DMA_DEMUX + plp_dma_memcpy_2d_keepCounter(ext, loc, size, stride, length, ext2loc); +#endif // ARCHI_HAS_DMA_DEMUX +} + static inline int plp_dma_memcpy_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { unsigned int counter = plp_dma_counter_alloc(); plp_dma_memcpy_2d_keepCounter(ext, loc, size, stride, length, ext2loc); return counter; } +static inline int plp_cl_dma_memcpy_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { +#ifdef ARCHI_HAS_DMA_DEMUX + unsigned int counter = plp_cl_dma_counter_alloc(); + plp_cl_dma_memcpy_2d_keepCounter(ext, loc, size, stride, length, ext2loc); + return counter; +#else // ARCHI_HAS_DMA_DEMUX + return plp_dma_memcpy_2d(ext, loc, size, stride, length, ext2loc); +#endif // ARCHI_HAS_DMA_DEMUX +} + static inline int plp_dma_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { unsigned int counter = plp_dma_counter_alloc(); unsigned int cmd = plp_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); @@ -376,6 +513,17 @@ static inline int plp_dma_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned return counter; } +static inline int plp_cl_dma_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { +#ifdef ARCHI_HAS_DMA_DEMUX + unsigned int counter = plp_cl_dma_counter_alloc(); + unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); + plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + return counter; +#else // ARCHI_HAS_DMA_DEMUX + return plp_dma_l1ToExt_2d(ext, loc, size, stride, length); +#endif // ARCHI_HAS_DMA_DEMUX +} + static inline int plp_dma_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { unsigned int counter = plp_dma_counter_alloc(); unsigned int cmd = plp_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); @@ -383,6 +531,17 @@ static inline int plp_dma_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned return counter; } +static inline int plp_cl_dma_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { +#ifdef ARCHI_HAS_DMA_DEMUX + unsigned int counter = plp_cl_dma_counter_alloc(); + unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); + plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + return counter; +#else // ARCHI_HAS_DMA_DEMUX + return plp_dma_extToL1_2d(loc, ext, size, stride, length); +#endif // ARCHI_HAS_DMA_DEMUX +} + static inline int plp_dma_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { unsigned int counter = plp_dma_counter_alloc(); unsigned int cmd = plp_dma_getCmd(ext2loc, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); @@ -390,6 +549,17 @@ static inline int plp_dma_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsig return counter; } +static inline int plp_cl_dma_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { +#ifdef ARCHI_HAS_DMA_DEMUX + unsigned int counter = plp_cl_dma_counter_alloc(); + unsigned int cmd = plp_cl_dma_getCmd(ext2loc, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); + plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + return counter; +#else // ARCHI_HAS_DMA_DEMUX + return plp_dma_memcpy_2d_irq(ext, loc, size, stride, length, ext2loc); +#endif // ARCHI_HAS_DMA_DEMUX +} + static inline int plp_dma_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { unsigned int counter = plp_dma_counter_alloc(); unsigned int cmd = plp_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); @@ -397,6 +567,17 @@ static inline int plp_dma_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsi return counter; } +static inline int plp_cl_dma_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { +#ifdef ARCHI_HAS_DMA_DEMUX + unsigned int counter = plp_cl_dma_counter_alloc(); + unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); + plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + return counter; +#else // ARCHI_HAS_DMA_DEMUX + return plp_dma_l1ToExt_2d_irq(ext, loc, size, stride, length); +#endif // ARCHI_HAS_DMA_DEMUX +} + static inline int plp_dma_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { unsigned int counter = plp_dma_counter_alloc(); unsigned int cmd = plp_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); @@ -404,6 +585,17 @@ static inline int plp_dma_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsi return counter; } +static inline int plp_cl_dma_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { +#ifdef ARCHI_HAS_DMA_DEMUX + unsigned int counter = plp_cl_dma_counter_alloc(); + unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); + plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + return counter; +#else // ARCHI_HAS_DMA_DEMUX + return plp_dma_extToL1_2d_irq(loc, ext, size, stride, length); +#endif // ARCHI_HAS_DMA_DEMUX +} + static inline void plp_dma_barrier() { while(DMA_READ(MCHAN_STATUS_OFFSET) & 0xFFFF) { eu_evt_maskWaitAndClr(1<