[WIP] add pre-alpha iDMA drivers and #defines

This commit is contained in:
Georg Rutishauser 2024-06-27 17:43:53 +02:00
parent 2a505d42ae
commit 667eb3af97
7 changed files with 682 additions and 8 deletions

View file

@ -118,11 +118,13 @@
#define ARCHI_EU_DEMUX_OFFSET ( 0x00000 )
#define ARCHI_MCHAN_DEMUX_OFFSET ( 0x00400 )
#define ARCHI_IDMA_DEMUX_OFFSET ( 0x00400 )
#define ARCHI_DEMUX_PERIPHERALS_ADDR ( ARCHI_CLUSTER_ADDR + ARCHI_DEMUX_PERIPHERALS_OFFSET )
#define ARCHI_EU_DEMUX_ADDR ( ARCHI_DEMUX_PERIPHERALS_ADDR + ARCHI_EU_DEMUX_OFFSET )
#define ARCHI_MCHAN_DEMUX_ADDR ( ARCHI_DEMUX_PERIPHERALS_ADDR + ARCHI_MCHAN_DEMUX_OFFSET )
#define ARCHI_IDMA_DEMUX_ADDR ( ARCHI_DEMUX_PERIPHERALS_ADDR + ARCHI_IDMA_DEMUX_OFFSET )
#endif

View file

@ -79,6 +79,8 @@
#define RISCV_VERSION 4
#define MCHAN_VERSION 7
#define PADS_VERSION 2
#define IDMA_VERSION 2
/*
@ -91,6 +93,7 @@
#define ARCHI_CLUSTER_NB_PE 8
#endif
#define ARCHI_NB_CLUSTER 1
#define ARCHI_HAS_DMA_DEMUX 1
/*

View file

@ -37,7 +37,8 @@
#include "archi/chips/pulp_cluster/memory_map.h"
#include "archi/chips/pulp_cluster/apb_soc.h"
#include "archi/stdout/stdout_v3.h"
#include "archi/dma/mchan_v7.h"
//#include "archi/dma/mchan_v7.h"
#include "archi/dma/idma_v2.h"
#include "archi/udma/cpi/udma_cpi_v1.h"
#include "archi/udma/i2c/udma_i2c_v2.h"

304
include/archi/dma/idma_v2.h Normal file
View file

@ -0,0 +1,304 @@
// Generated register defines for idma_reg32_3d
// Copyright information found in source file:
// Copyright 2023 ETH Zurich and University of Bologna.
// Licensing information found in source file:
//
// SPDX-License-Identifier: SHL-0.51
#ifndef _IDMA_REG32_3D_REG_DEFS_
#define _IDMA_REG32_3D_REG_DEFS_
#ifdef __cplusplus
extern "C" {
#endif
// Number of dimensions available
#define IDMA_REG32_3D_PARAM_NUM_DIMS 3
// Register width
#define IDMA_REG32_3D_PARAM_REG_WIDTH 32
// Configuration Register for DMA settings
#define IDMA_REG32_3D_CONF_REG_OFFSET 0x0
#define IDMA_REG32_3D_CONF_DECOUPLE_AW_BIT 0
#define IDMA_REG32_3D_CONF_DECOUPLE_RW_BIT 1
#define IDMA_REG32_3D_CONF_SRC_REDUCE_LEN_BIT 2
#define IDMA_REG32_3D_CONF_DST_REDUCE_LEN_BIT 3
#define IDMA_REG32_3D_CONF_SRC_MAX_LLEN_MASK 0x7
#define IDMA_REG32_3D_CONF_SRC_MAX_LLEN_OFFSET 4
#define IDMA_REG32_3D_CONF_SRC_MAX_LLEN_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_CONF_SRC_MAX_LLEN_MASK, .index = IDMA_REG32_3D_CONF_SRC_MAX_LLEN_OFFSET })
#define IDMA_REG32_3D_CONF_DST_MAX_LLEN_MASK 0x7
#define IDMA_REG32_3D_CONF_DST_MAX_LLEN_OFFSET 7
#define IDMA_REG32_3D_CONF_DST_MAX_LLEN_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_CONF_DST_MAX_LLEN_MASK, .index = IDMA_REG32_3D_CONF_DST_MAX_LLEN_OFFSET })
#define IDMA_REG32_3D_CONF_ENABLE_ND_MASK 0x3
#define IDMA_REG32_3D_CONF_ENABLE_ND_OFFSET 10
#define IDMA_REG32_3D_CONF_ENABLE_ND_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_CONF_ENABLE_ND_MASK, .index = IDMA_REG32_3D_CONF_ENABLE_ND_OFFSET })
#define IDMA_REG32_3D_CONF_SRC_PROTOCOL_MASK 0x7
#define IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET 12
#define IDMA_REG32_3D_CONF_SRC_PROTOCOL_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_CONF_SRC_PROTOCOL_MASK, .index = IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET })
#define IDMA_REG32_3D_CONF_DST_PROTOCOL_MASK 0x7
#define IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET 15
#define IDMA_REG32_3D_CONF_DST_PROTOCOL_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_CONF_DST_PROTOCOL_MASK, .index = IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET })
// DMA Status (common parameters)
#define IDMA_REG32_3D_STATUS_BUSY_FIELD_WIDTH 10
#define IDMA_REG32_3D_STATUS_BUSY_FIELDS_PER_REG 3
#define IDMA_REG32_3D_STATUS_MULTIREG_COUNT 16
// DMA Status
#define IDMA_REG32_3D_STATUS_0_REG_OFFSET 0x4
#define IDMA_REG32_3D_STATUS_0_BUSY_0_MASK 0x3ff
#define IDMA_REG32_3D_STATUS_0_BUSY_0_OFFSET 0
#define IDMA_REG32_3D_STATUS_0_BUSY_0_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_0_BUSY_0_MASK, .index = IDMA_REG32_3D_STATUS_0_BUSY_0_OFFSET })
// DMA Status
#define IDMA_REG32_3D_STATUS_1_REG_OFFSET 0x8
#define IDMA_REG32_3D_STATUS_1_BUSY_1_MASK 0x3ff
#define IDMA_REG32_3D_STATUS_1_BUSY_1_OFFSET 0
#define IDMA_REG32_3D_STATUS_1_BUSY_1_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_1_BUSY_1_MASK, .index = IDMA_REG32_3D_STATUS_1_BUSY_1_OFFSET })
// DMA Status
#define IDMA_REG32_3D_STATUS_2_REG_OFFSET 0xc
#define IDMA_REG32_3D_STATUS_2_BUSY_2_MASK 0x3ff
#define IDMA_REG32_3D_STATUS_2_BUSY_2_OFFSET 0
#define IDMA_REG32_3D_STATUS_2_BUSY_2_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_2_BUSY_2_MASK, .index = IDMA_REG32_3D_STATUS_2_BUSY_2_OFFSET })
// DMA Status
#define IDMA_REG32_3D_STATUS_3_REG_OFFSET 0x10
#define IDMA_REG32_3D_STATUS_3_BUSY_3_MASK 0x3ff
#define IDMA_REG32_3D_STATUS_3_BUSY_3_OFFSET 0
#define IDMA_REG32_3D_STATUS_3_BUSY_3_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_3_BUSY_3_MASK, .index = IDMA_REG32_3D_STATUS_3_BUSY_3_OFFSET })
// DMA Status
#define IDMA_REG32_3D_STATUS_4_REG_OFFSET 0x14
#define IDMA_REG32_3D_STATUS_4_BUSY_4_MASK 0x3ff
#define IDMA_REG32_3D_STATUS_4_BUSY_4_OFFSET 0
#define IDMA_REG32_3D_STATUS_4_BUSY_4_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_4_BUSY_4_MASK, .index = IDMA_REG32_3D_STATUS_4_BUSY_4_OFFSET })
// DMA Status
#define IDMA_REG32_3D_STATUS_5_REG_OFFSET 0x18
#define IDMA_REG32_3D_STATUS_5_BUSY_5_MASK 0x3ff
#define IDMA_REG32_3D_STATUS_5_BUSY_5_OFFSET 0
#define IDMA_REG32_3D_STATUS_5_BUSY_5_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_5_BUSY_5_MASK, .index = IDMA_REG32_3D_STATUS_5_BUSY_5_OFFSET })
// DMA Status
#define IDMA_REG32_3D_STATUS_6_REG_OFFSET 0x1c
#define IDMA_REG32_3D_STATUS_6_BUSY_6_MASK 0x3ff
#define IDMA_REG32_3D_STATUS_6_BUSY_6_OFFSET 0
#define IDMA_REG32_3D_STATUS_6_BUSY_6_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_6_BUSY_6_MASK, .index = IDMA_REG32_3D_STATUS_6_BUSY_6_OFFSET })
// DMA Status
#define IDMA_REG32_3D_STATUS_7_REG_OFFSET 0x20
#define IDMA_REG32_3D_STATUS_7_BUSY_7_MASK 0x3ff
#define IDMA_REG32_3D_STATUS_7_BUSY_7_OFFSET 0
#define IDMA_REG32_3D_STATUS_7_BUSY_7_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_7_BUSY_7_MASK, .index = IDMA_REG32_3D_STATUS_7_BUSY_7_OFFSET })
// DMA Status
#define IDMA_REG32_3D_STATUS_8_REG_OFFSET 0x24
#define IDMA_REG32_3D_STATUS_8_BUSY_8_MASK 0x3ff
#define IDMA_REG32_3D_STATUS_8_BUSY_8_OFFSET 0
#define IDMA_REG32_3D_STATUS_8_BUSY_8_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_8_BUSY_8_MASK, .index = IDMA_REG32_3D_STATUS_8_BUSY_8_OFFSET })
// DMA Status
#define IDMA_REG32_3D_STATUS_9_REG_OFFSET 0x28
#define IDMA_REG32_3D_STATUS_9_BUSY_9_MASK 0x3ff
#define IDMA_REG32_3D_STATUS_9_BUSY_9_OFFSET 0
#define IDMA_REG32_3D_STATUS_9_BUSY_9_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_9_BUSY_9_MASK, .index = IDMA_REG32_3D_STATUS_9_BUSY_9_OFFSET })
// DMA Status
#define IDMA_REG32_3D_STATUS_10_REG_OFFSET 0x2c
#define IDMA_REG32_3D_STATUS_10_BUSY_10_MASK 0x3ff
#define IDMA_REG32_3D_STATUS_10_BUSY_10_OFFSET 0
#define IDMA_REG32_3D_STATUS_10_BUSY_10_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_10_BUSY_10_MASK, .index = IDMA_REG32_3D_STATUS_10_BUSY_10_OFFSET })
// DMA Status
#define IDMA_REG32_3D_STATUS_11_REG_OFFSET 0x30
#define IDMA_REG32_3D_STATUS_11_BUSY_11_MASK 0x3ff
#define IDMA_REG32_3D_STATUS_11_BUSY_11_OFFSET 0
#define IDMA_REG32_3D_STATUS_11_BUSY_11_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_11_BUSY_11_MASK, .index = IDMA_REG32_3D_STATUS_11_BUSY_11_OFFSET })
// DMA Status
#define IDMA_REG32_3D_STATUS_12_REG_OFFSET 0x34
#define IDMA_REG32_3D_STATUS_12_BUSY_12_MASK 0x3ff
#define IDMA_REG32_3D_STATUS_12_BUSY_12_OFFSET 0
#define IDMA_REG32_3D_STATUS_12_BUSY_12_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_12_BUSY_12_MASK, .index = IDMA_REG32_3D_STATUS_12_BUSY_12_OFFSET })
// DMA Status
#define IDMA_REG32_3D_STATUS_13_REG_OFFSET 0x38
#define IDMA_REG32_3D_STATUS_13_BUSY_13_MASK 0x3ff
#define IDMA_REG32_3D_STATUS_13_BUSY_13_OFFSET 0
#define IDMA_REG32_3D_STATUS_13_BUSY_13_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_13_BUSY_13_MASK, .index = IDMA_REG32_3D_STATUS_13_BUSY_13_OFFSET })
// DMA Status
#define IDMA_REG32_3D_STATUS_14_REG_OFFSET 0x3c
#define IDMA_REG32_3D_STATUS_14_BUSY_14_MASK 0x3ff
#define IDMA_REG32_3D_STATUS_14_BUSY_14_OFFSET 0
#define IDMA_REG32_3D_STATUS_14_BUSY_14_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_14_BUSY_14_MASK, .index = IDMA_REG32_3D_STATUS_14_BUSY_14_OFFSET })
// DMA Status
#define IDMA_REG32_3D_STATUS_15_REG_OFFSET 0x40
#define IDMA_REG32_3D_STATUS_15_BUSY_15_MASK 0x3ff
#define IDMA_REG32_3D_STATUS_15_BUSY_15_OFFSET 0
#define IDMA_REG32_3D_STATUS_15_BUSY_15_FIELD \
((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_15_BUSY_15_MASK, .index = IDMA_REG32_3D_STATUS_15_BUSY_15_OFFSET })
// Next ID, launches transfer, returns 0 if transfer not set up properly.
// (common parameters)
#define IDMA_REG32_3D_NEXT_ID_NEXT_ID_FIELD_WIDTH 32
#define IDMA_REG32_3D_NEXT_ID_NEXT_ID_FIELDS_PER_REG 1
#define IDMA_REG32_3D_NEXT_ID_MULTIREG_COUNT 16
// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG32_3D_NEXT_ID_0_REG_OFFSET 0x44
// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET 0x48
// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG32_3D_NEXT_ID_2_REG_OFFSET 0x4c
// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG32_3D_NEXT_ID_3_REG_OFFSET 0x50
// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG32_3D_NEXT_ID_4_REG_OFFSET 0x54
// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG32_3D_NEXT_ID_5_REG_OFFSET 0x58
// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG32_3D_NEXT_ID_6_REG_OFFSET 0x5c
// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG32_3D_NEXT_ID_7_REG_OFFSET 0x60
// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG32_3D_NEXT_ID_8_REG_OFFSET 0x64
// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG32_3D_NEXT_ID_9_REG_OFFSET 0x68
// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG32_3D_NEXT_ID_10_REG_OFFSET 0x6c
// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG32_3D_NEXT_ID_11_REG_OFFSET 0x70
// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG32_3D_NEXT_ID_12_REG_OFFSET 0x74
// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG32_3D_NEXT_ID_13_REG_OFFSET 0x78
// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG32_3D_NEXT_ID_14_REG_OFFSET 0x7c
// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG32_3D_NEXT_ID_15_REG_OFFSET 0x80
// Get ID of finished transactions. (common parameters)
#define IDMA_REG32_3D_DONE_ID_DONE_ID_FIELD_WIDTH 32
#define IDMA_REG32_3D_DONE_ID_DONE_ID_FIELDS_PER_REG 1
#define IDMA_REG32_3D_DONE_ID_MULTIREG_COUNT 16
// Get ID of finished transactions.
#define IDMA_REG32_3D_DONE_ID_0_REG_OFFSET 0x84
// Get ID of finished transactions.
#define IDMA_REG32_3D_DONE_ID_1_REG_OFFSET 0x88
// Get ID of finished transactions.
#define IDMA_REG32_3D_DONE_ID_2_REG_OFFSET 0x8c
// Get ID of finished transactions.
#define IDMA_REG32_3D_DONE_ID_3_REG_OFFSET 0x90
// Get ID of finished transactions.
#define IDMA_REG32_3D_DONE_ID_4_REG_OFFSET 0x94
// Get ID of finished transactions.
#define IDMA_REG32_3D_DONE_ID_5_REG_OFFSET 0x98
// Get ID of finished transactions.
#define IDMA_REG32_3D_DONE_ID_6_REG_OFFSET 0x9c
// Get ID of finished transactions.
#define IDMA_REG32_3D_DONE_ID_7_REG_OFFSET 0xa0
// Get ID of finished transactions.
#define IDMA_REG32_3D_DONE_ID_8_REG_OFFSET 0xa4
// Get ID of finished transactions.
#define IDMA_REG32_3D_DONE_ID_9_REG_OFFSET 0xa8
// Get ID of finished transactions.
#define IDMA_REG32_3D_DONE_ID_10_REG_OFFSET 0xac
// Get ID of finished transactions.
#define IDMA_REG32_3D_DONE_ID_11_REG_OFFSET 0xb0
// Get ID of finished transactions.
#define IDMA_REG32_3D_DONE_ID_12_REG_OFFSET 0xb4
// Get ID of finished transactions.
#define IDMA_REG32_3D_DONE_ID_13_REG_OFFSET 0xb8
// Get ID of finished transactions.
#define IDMA_REG32_3D_DONE_ID_14_REG_OFFSET 0xbc
// Get ID of finished transactions.
#define IDMA_REG32_3D_DONE_ID_15_REG_OFFSET 0xc0
// Low destination address
#define IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET 0xd0
// Low source address
#define IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET 0xd8
// Low transfer length in byte
#define IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET 0xe0
// Low destination stride dimension 2
#define IDMA_REG32_3D_DST_STRIDE_2_LOW_REG_OFFSET 0xe8
// Low source stride dimension 2
#define IDMA_REG32_3D_SRC_STRIDE_2_LOW_REG_OFFSET 0xf0
// Low number of repetitions dimension 2
#define IDMA_REG32_3D_REPS_2_LOW_REG_OFFSET 0xf8
// Low destination stride dimension 3
#define IDMA_REG32_3D_DST_STRIDE_3_LOW_REG_OFFSET 0x100
// Low source stride dimension 3
#define IDMA_REG32_3D_SRC_STRIDE_3_LOW_REG_OFFSET 0x108
// Low number of repetitions dimension 3
#define IDMA_REG32_3D_REPS_3_LOW_REG_OFFSET 0x110
#ifdef __cplusplus
} // extern "C"
#endif
#endif // _IDMA_REG32_3D_REG_DEFS_
// End generated register defines for idma_reg32_3d

View file

@ -26,7 +26,8 @@
#endif // __ibex__
#include "hal/eu/eu_v3.h"
#include "hal/itc/itc_v1.h"
#include "hal/dma/mchan_v7.h"
//#include "hal/dma/mchan_v7.h"
#include "hal/dma/idma_v2.h"
#include "hal/timer/timer_v2.h"
#include "hal/soc_eu/soc_eu_v2.h"
#include "hal/cluster_ctrl/cluster_ctrl_v2.h"

362
include/hal/dma/idma_v2.h Normal file
View file

@ -0,0 +1,362 @@
/*
* Copyright (C) 2021 ETH Zurich and University of Bologna
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __HAL_IDMA_V2_H__
#define __HAL_IDMA_V2_H__
#include <archi/dma/idma_v2.h>
#include "hal/pulp.h"
#define PLP_DMA_LOC2EXT 0
#define PLP_DMA_EXT2LOC 1
#define PLP_DMA_1D 0
#define PLP_DMA_2D 1
#define IDMA_EVENT 8 // all iDMA tx_cplt events are broadcast
#define IDMA_ID_COUNTER_WIDTH 32
#define IDMA_ID_MASK 0xffffffff
#define IDMA_DEFAULT_CONFIG 0x0
#define IDMA_DEFAULT_CONFIG_2D 0x8
typedef enum {
IDMA_PROT_AXI = 0, // AXI protocol: L2 memory
IDMA_PROT_OBI = 1, // OBI protocol: L1 memory
IDMA_PROT_INIT = 4 // INIT protocol: /dev/null (write to here and the stream disappears, read from here and get all-zeros)
} idma_prot_t;
/** @name High-level DMA memory copy functions
* The following functions can be used to trigger DMA transfers to copy data between the cluster memory (L1) and another memory outside the cluster (another cluster L1 or L2).
* The DMA supports the following features:
* - Transfers are event-based. With event-based transfers the core can call a wait function to block execution until the transfer is done.
* - The DMA supports 2D transfers which allows transfering a 2D tile in one command. Additional information must then be given to specify the width of the tile and the number of bytes between 2 lines of the tile.
* - The event sent at the end of the transfer is broadcasted to all cluster cores.
* - To identify specific transfers, the DMA provides a transfer identifier.
* - Multiple transfers can be launched simultaneously, with them being executed 2-4 in parallel, with more waiting in a queue.
*/
/**@{*/
/** Memory transfer with event-based completion.
*
\param src Address from where to copy data. There is no restriction on memory alignment.
\param dst Address to which to copy data. There is no restriction on memory alignment.
\param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be smaller than 65536.
\param src_prot choose IDMA_PROT_AXI for transfer from L2, IDMA_PROT_OBI for transfer from L1 and IDMA_PROT_INIT for a transfer
of all-0 data
\param dst_prot choose IDMA_PROT_AXI for transfer to L2, IDMA_PROT_OBI for transfer to L1 and IDMA_PROT_INIT for a transfer
to /dev/null (i.e. the stream will be "eaten"). Note that AXI-to-AXI transfers are not supported.
\return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer.
*/
static inline int plp_dma_memcpy(unsigned int src, unsigned int dst, unsigned int size, idma_prot_t src_prot, idma_prot_t dst_prot);
/** Cluster memory to external memory transfer with event-based completion.
*
\param src Address in the external memory where to store the data. There is no restriction on memory alignment.
\param dst Address in the cluster memory where to load the data. There is no restriction on memory alignment.
\param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536.
\return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer.
*/
static inline int plp_dma_l1ToL2(unsigned int src, unsigned int dst, unsigned short size);
/** External memory to cluster memory transfer with event-based completion.
*
\param src Address in the cluster memory where to store the data. There is no restriction on memory alignment.
\param dst Address in the external memory where to load the data. There is no restriction on memory alignment.
\param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536.
\return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer.
*/
static inline int plp_dma_L2ToL1(unsigned int src, unsigned int dst, unsigned short size);
/** 2-dimensional memory transfer with event-based completion.
*
\param ext Address in the external memory where to access the data. There is no restriction on memory alignment.
\param loc Address in the cluster memory where to access the data. There is no restriction on memory alignment.
\param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536.
\param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536.
\param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536.
\param ext2loc If 1, the transfer is loading data from external memory and storing to cluster memory. If 0, it is the contrary
\return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer.
*/
//static inline int plp_dma_memcpy_2d(unsigned int ext, unsigned int loc, unsigned int size, unsigned int stride, unsigned int length, int ext2loc);
/** Cluster memory to external memory 2-dimensional transfer with event-based completion.
*
\param ext Address in the external memory where to store the data. There is no restriction on memory alignment.
\param loc Address in the cluster memory where to load the data. There is no restriction on memory alignment.
\param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536.
\param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory.
\param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory.
\return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer.
*/
/** @name DMA wait functions
*/
/** DMA barrier.
* This blocks the core until no transfer is on-going in the DMA.
*/
static inline void plp_dma_barrier();
/** DMA wait.
* This blocks the core until the specified transfer is finished.
*
\param counter The counter ID identifying the transfer. This has been returned from an enqueued transfer (e.g. plp_dma_l2ToL1_2d)
*/
static inline void plp_dma_wait(unsigned int dma_tx_id);
//!@}
/** @name iDMA low-level functions.
* This can be used instead of the high-level ones in order to have more control over the DMA features.
*/
/**
* iDMA configuration generation
* A standard memcpy will set all of these values - except for src and dest protocol - to 0.
*
\param decouple if set to true, there is no longer exactly one AXI write_request issued for
every read request. This mode can improve performance of unaligned transfers when crossing
the AXI page boundaries.
\param deburst if set, the DMA will split all bursts in single transfers
\param serialize if set, the DMA will only send AX belonging to a given Arbitrary 1D burst request
at a time. This is default behavior to prevent deadlocks. Setting `serialize` to
zero violates the AXI4+ATOP specification.
\param num_dim number of dimensions: 1, 2 or 3. Invalid values will be treated as 1.
\param src_prot Source protocol: AXI for transfer from L2, OBI for transfer from L1, INIT for zeromem
\param dst_prot Destination protocol: AXI for transfer to L2, OBI for transfer to L1, INIT for transfer to /dev/null
\return The generated configuration
*/
static inline unsigned int pulp_idma_get_conf(unsigned int decouple_aw, unsigned int decouple_rw, unsigned int n_d, idma_prot_t src_prot, idma_prot_t dst_prot);
/**
* Setting only source and destination protocols for a given transfer configuration
*
\param conf the configuration on which to set the source and destination protocols
\param src_prot Source protocol: AXI for transfer from L2, OBI for transfer from L1, INIT for zeromem
\param dst_prot Destination protocol: AXI for transfer to L2, OBI for transfer to L1, INIT for transfer to /dev/null
\return The generated configuration
*/
static inline unsigned int pulp_idma_set_conf_prot(unsigned int conf, idma_prot_t src_prot, idma_prot_t dst_prot);
/**
* iDMA transfer status
*
\param dma_tx_id The dma transfer identifier
\return transfer status. 1 if complete, 0 if still ongoing or waiting.
*/
static inline unsigned int pulp_idma_tx_cplt(unsigned int dma_tx_id);
/**
* iDMA 2D memory transfer
* Launches a standard 2D memory transfer
*
\param dst_addr The destination address
\param src_addr The source address
\param num_bytes The number bytes (per stride)
\param dst_stride The stride at the destination
\param src_stride The stride at the source
\param num_reps The number of repetitions
\return The dma transfer identifier
*/
//static inline unsigned int pulp_idma_memcpy_2d(unsigned int const dst_addr, unsigned int const src_addr, unsigned int num_bytes, unsigned int dst_stride, unsigned int src_stride, unsigned int num_reps);
/**
* iDMA advanced memory transfer
* Launches a 1D memory transfer with special configuration options
*
\param dst_addr The destination address
\param src_addr The source address
\param num_bytes The number bytes
\param decouple if set to true, there is no longer exactly one AXI write_request issued for
every read request. This mode can improve performance of unaligned transfers when crossing
the AXI page boundaries.
\param deburst if set, the DMA will split all bursts in single transfers
\param serialize if set, the DMA will only send AX belonging to a given Arbitrary 1D burst request
at a time. This is default behavior to prevent deadlocks. Setting `serialize` to
zero violates the AXI4+ATOP specification.
\param twod if set, the DMA will execute a 2D transfer
\param dst_stride if 2D, the stride at the destination
\param src_stride if 2D, the stride at the source
\param num_reps if 2D, the number of repetitions
\return The dma trasfer identifier
*/
//static inline unsigned int pulp_idma_memcpy_advanced(unsigned int const dst_addr, unsigned int const src_addr, unsigned int num_bytes, unsigned int decouple, unsigned int deburst, unsigned int serialize, unsigned int twod, unsigned int dst_stride, unsigned int src_stride, unsigned int num_reps);
/** Return the DMA status.
*
\return DMA status. 1 means there are still on-going transfers, 0 means nothing is on-going.
*/
static inline unsigned int plp_dma_status();
//!@}
/// @cond IMPLEM
#if ARCHI_HAS_DMA_DEMUX
#define DMA_ADDR ARCHI_IDMA_DEMUX_ADDR
#else
#define DMA_ADDR ARCHI_IDMA_EXT_ADDR
#endif
#if defined(__riscv__) && !defined(RV_ISA_RV32) && !defined(__LLVM__)
#define DMA_WRITE(value, offset) __builtin_pulp_OffsetedWrite((value), (int *)DMA_ADDR, (offset))
#define DMA_READ(offset) __builtin_pulp_OffsetedRead((int *)DMA_ADDR, (offset))
#else
#define DMA_WRITE(value, offset) pulp_write32(DMA_ADDR + (offset), (value))
#define DMA_READ(offset) pulp_read32(DMA_ADDR + (offset))
#endif
static inline unsigned int pulp_idma_set_conf_prot(unsigned int conf, idma_prot_t src_prot, idma_prot_t dst_prot){
#if defined(__riscv__)
conf = __builtin_bitinsert(conf, src_prot, 3, IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET);
conf = __builtin_bitinsert(conf, dst_prot, 3, IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET);
#else
conf &= (~((IDMA_REG32_3D_CONF_SRC_PROTOCOL_MASK | (IDMA_REG32_3D_CONF_DST_PROTOCOL_MASK << (IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET - IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET))) << IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET)); // set the relevant bits to 0
conf |= ((src_prot << IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET) | (dst_prot << IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET));
#endif
return conf;
}
static inline unsigned int pulp_idma_get_conf(unsigned int decouple_aw, unsigned int decouple_rw, unsigned int n_d, idma_prot_t src_prot, idma_prot_t dst_prot){
unsigned int conf;
#if defined(__riscv__)
conf = __builtin_bitinsert(0, decouple_aw, 1, IDMA_REG32_3D_CONF_DECOUPLE_AW_BIT);
conf = __builtin_bitinsert(conf, decouple_rw, 1, IDMA_REG32_3D_CONF_DECOUPLE_RW_BIT);
conf = __builtin_bitinsert(conf, n_d, 2, IDMA_REG32_3D_CONF_ENABLE_ND_OFFSET);
conf = __builtin_bitinsert(conf, src_prot, 3, IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET);
conf = __builtin_bitinsert(conf, dst_prot, 3, IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET);
// TODO: add burst length reduction
#else
conf = (((decouple_rw & 0x1)<<IDMA_REG32_3D_CONF_DECOUPLE_RW_BIT) | ((decouple_aw & 0x1)<<IDMA_REG32_3D_CONF_DECOUPLE_AW_BIT) | ((n_d & 0x3)<<IDMA_REG32_3D_CONF_ND_OFFSET) | ((src_prot & 0x7)<<IDMA_3D_CONF_SRC_PROTOCOL_OFFSET) | ((dst_prot & 0x7)<<IDMA_3D_CONF_DST_PROTOCOL_OFFSET));
#endif
return conf;
}
static inline unsigned int pulp_idma_tx_cplt(unsigned int dma_tx_id) {
unsigned int done_id = DMA_READ(IDMA_REG32_3D_DONE_ID_0_REG_OFFSET);
unsigned int my_id = dma_tx_id & IDMA_ID_MASK;
if (done_id >> (IDMA_ID_COUNTER_WIDTH-1) == my_id >> (IDMA_ID_COUNTER_WIDTH-1)) {
return my_id <= done_id;
} else {
return ((done_id & (IDMA_ID_MASK - (1<<(IDMA_ID_COUNTER_WIDTH-1))) < (1<<(IDMA_ID_COUNTER_WIDTH-2))));
}
}
//static inline unsigned int pulp_idma_memcpy_2d(unsigned int const dst_addr, unsigned int const src_addr, unsigned int num_bytes, unsigned int dst_stride, unsigned int src_stride, unsigned int num_reps) {
// DMA_WRITE(src_addr, IDMA_REG32_2D_FRONTEND_SRC_ADDR_REG_OFFSET);
// DMA_WRITE(dst_addr, IDMA_REG32_2D_FRONTEND_DST_ADDR_REG_OFFSET);
// DMA_WRITE(num_bytes, IDMA_REG32_2D_FRONTEND_NUM_BYTES_REG_OFFSET);
// DMA_WRITE(IDMA_DEFAULT_CONFIG_2D, IDMA_REG32_2D_FRONTEND_CONF_REG_OFFSET);
// DMA_WRITE(src_stride, IDMA_REG32_2D_FRONTEND_STRIDE_SRC_REG_OFFSET);
// DMA_WRITE(dst_stride, IDMA_REG32_2D_FRONTEND_STRIDE_DST_REG_OFFSET);
// DMA_WRITE(num_reps, IDMA_REG32_2D_FRONTEND_NUM_REPETITIONS_REG_OFFSET);
// asm volatile("" : : : "memory");
//
// // Launch TX
// unsigned int dma_tx_id = DMA_READ(IDMA_REG32_2D_FRONTEND_NEXT_ID_REG_OFFSET);
//
// return dma_tx_id;
//}
//static inline unsigned int pulp_idma_memcpy_advanced(unsigned int const dst_addr, unsigned int const src_addr, unsigned int num_bytes, unsigned int decouple, unsigned int deburst, unsigned int serialize, unsigned int twod, unsigned int dst_stride, unsigned int src_stride, unsigned int num_reps) {
// DMA_WRITE(src_addr, IDMA_REG32_2D_FRONTEND_SRC_ADDR_REG_OFFSET);
// DMA_WRITE(dst_addr, IDMA_REG32_2D_FRONTEND_DST_ADDR_REG_OFFSET);
// DMA_WRITE(num_bytes, IDMA_REG32_2D_FRONTEND_NUM_BYTES_REG_OFFSET);
// unsigned int conf = pulp_idma_get_conf(decouple, deburst, serialize, twod);
// DMA_WRITE(conf, IDMA_REG32_2D_FRONTEND_CONF_REG_OFFSET);
// if (twod) {
// DMA_WRITE(src_stride, IDMA_REG32_2D_FRONTEND_STRIDE_SRC_REG_OFFSET);
// DMA_WRITE(dst_stride, IDMA_REG32_2D_FRONTEND_STRIDE_DST_REG_OFFSET);
// DMA_WRITE(num_reps, IDMA_REG32_2D_FRONTEND_NUM_REPETITIONS_REG_OFFSET);
// }
// asm volatile("" : : : "memory");
//
// // Launch TX
// unsigned int dma_tx_id = DMA_READ(IDMA_REG32_2D_FRONTEND_NEXT_ID_REG_OFFSET);
//
// return dma_tx_id;
//}
static inline unsigned int plp_dma_status() {
return DMA_READ(IDMA_REG32_3D_STATUS_0_REG_OFFSET);
}
static inline void plp_dma_wait(unsigned int dma_tx_id) {
while(!pulp_idma_tx_cplt(dma_tx_id)) {
eu_evt_maskWaitAndClr(1 << IDMA_EVENT);
}
return;
}
static inline int plp_dma_memcpy(unsigned int src, unsigned int dst, unsigned int size, idma_prot_t src_prot, idma_prot_t dst_prot) {
unsigned int dma_tx_id;
unsigned int cfg = pulp_idma_set_conf_prot(IDMA_DEFAULT_CONFIG, src_prot, dst_prot);
DMA_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET);
DMA_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET);
DMA_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET);
DMA_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET);
asm volatile("" : : : "memory");
// Launch TX
if (src_prot == IDMA_PROT_OBI && dst_prot == IDMA_PROT_AXI)
dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_0_REG_OFFSET);
else
dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET);
return dma_tx_id;
}
//static inline int plp_dma_l1ToExt(dma_ext_t ext, unsigned int loc, unsigned short size) {
// return pulp_idma_memcpy(ext, loc, size);
//}
//
//static inline int plp_dma_extToL1(unsigned int loc, dma_ext_t ext, unsigned short size) {
// return pulp_idma_memcpy(loc, ext, size);
//}
//static inline int plp_dma_memcpy_2d(dma_ext_t ext, unsigned int loc, unsigned int size, unsigned int stride, unsigned int length, int ext2loc) {
// if (ext2loc) {
// return pulp_idma_memcpy_2d(loc, ext, length, length, stride, size/length);
// } else {
// return pulp_idma_memcpy_2d(ext, loc, length, stride, length, size/length);
// }
//}
//static inline int plp_dma_l1ToExt_2d(dma_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) {
// return pulp_idma_memcpy_2d(ext, loc, length, stride, length, size/length);
//}
//
//static inline int plp_dma_extToL1_2d(unsigned int loc, dma_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) {
// return pulp_idma_memcpy_2d(loc, ext, length, length, stride, size/length);
//}
static inline void plp_dma_barrier() {
while(plp_dma_status()) {
eu_evt_maskWaitAndClr(1 << IDMA_EVENT);
}
}
#endif // __HAL_IDMA_V1_H__

View file

@ -11,19 +11,20 @@ endif
platform ?= rtl
VSIM ?= vsim
ifdef PULP_RISCV_GCC_TOOLCHAIN
ifndef PULP_RUNTIME_GCC_TOOLCHAIN
PULP_RUNTIME_GCC_TOOLCHAIN := $(PULP_RISCV_GCC_TOOLCHAIN)
endif
endif
ifdef PULP_RUNTIME_GCC_TOOLCHAIN
PULP_CC := $(PULP_RUNTIME_GCC_TOOLCHAIN)/bin/$(PULP_CC)
PULP_LD := $(PULP_RUNTIME_GCC_TOOLCHAIN)/bin/$(PULP_LD)
else
ifdef PULP_RISCV_GCC_TOOLCHAIN
PULP_CC := $(PULP_RISCV_GCC_TOOLCHAIN)/bin/$(PULP_CC)
PULP_LD := $(PULP_RISCV_GCC_TOOLCHAIN)/bin/$(PULP_LD)
PULP_OBJDUMP := $(PULP_RUNTIME_GCC_TOOLCHAIN)/bin/$(PULP_OBJDUMP)
PULP_AR := $(PULP_RUNTIME_GCC_TOOLCHAIN)/bin/$(PULP_AR)
else
$(warning "Warning: Neither PULP_RUNTIME_GCC_TOOLCHAIN nor PULP_RISCV_GCC_TOOLCHAIN is set.\
Using defaults.")
endif
endif
ifdef gui