diff --git a/configs/carfield-cluster.sh b/configs/carfield-cluster.sh index 05b7be8..772dc3d 100644 --- a/configs/carfield-cluster.sh +++ b/configs/carfield-cluster.sh @@ -3,6 +3,7 @@ export PULPRT_TARGET=carfield-cluster export PULPRUN_TARGET=carfield-cluster export CONFIG_NO_FC=1 +export ARCHI_HMR=1 if [ -n "${ZSH_VERSION:-}" ]; then DIR="$(readlink -f -- "${(%):-%x}")" diff --git a/include/archi/chips/carfield-cluster/memory_map.h b/include/archi/chips/carfield-cluster/memory_map.h index 0f5dce3..24df05a 100644 --- a/include/archi/chips/carfield-cluster/memory_map.h +++ b/include/archi/chips/carfield-cluster/memory_map.h @@ -98,6 +98,7 @@ #define ARCHI_HWCE_OFFSET 0x00001000 #define ARCHI_ICACHE_CTRL_OFFSET 0x00001400 #define ARCHI_MCHAN_EXT_OFFSET 0x00001800 +#define ARCHI_HMR_OFFSET 0x00002000 #define ARCHI_CLUSTER_PERIPHERALS_ADDR ( ARCHI_CLUSTER_ADDR + ARCHI_CLUSTER_PERIPHERALS_OFFSET ) #define ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_GLOBAL_ADDR(cid) + ARCHI_CLUSTER_PERIPHERALS_OFFSET ) @@ -107,7 +108,15 @@ #define ARCHI_EU_ADDR ( ARCHI_CLUSTER_PERIPHERALS_ADDR + ARCHI_EU_OFFSET ) #define ARCHI_HWCE_ADDR ( ARCHI_CLUSTER_PERIPHERALS_ADDR + ARCHI_HWCE_OFFSET ) #define ARCHI_MCHAN_EXT_ADDR ( ARCHI_CLUSTER_PERIPHERALS_ADDR + ARCHI_MCHAN_EXT_OFFSET ) +#define ARCHI_HMR_ADDR ( ARCHI_CLUSTER_PERIPHERALS_ADDR + ARCHI_HMR_OFFSET ) +#define ARCHI_CLUSTER_CTRL_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) + ARCHI_CLUSTER_CTRL_OFFSET ) +#define ARCHI_ICACHE_CTRL_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) + ARCHI_ICACHE_CTRL_OFFSET ) +#define ARCHI_EU_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) + ARCHI_EU_OFFSET ) +#define ARCHI_HWCE_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) + ARCHI_HWCE_OFFSET ) +#define ARCHI_MCHAN_EXT_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) + ARCHI_MCHAN_EXT_OFFSET ) +#define ARCHI_IDMA_EXT_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) + ARCHI_IDMA_EXT_OFFSET ) +#define ARCHI_HMR_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) + ARCHI_HMR_OFFSET ) /* diff --git a/include/archi/chips/carfield-cluster/properties.h b/include/archi/chips/carfield-cluster/properties.h index f7cc8e7..d2777ed 100644 --- a/include/archi/chips/carfield-cluster/properties.h +++ b/include/archi/chips/carfield-cluster/properties.h @@ -79,6 +79,7 @@ #define RISCV_VERSION 4 #define MCHAN_VERSION 7 #define PADS_VERSION 2 +#define HMR_VERSION 1 /* @@ -92,6 +93,17 @@ #endif #define ARCHI_NB_CLUSTER 1 +// #define ARCHI_HMR_NO_RAPID_RECOVERY +// #define ARCHI_HMR_FORCE_RAPID +#if defined(ARCHI_HMR_NO_RAPID_RECOVERY) && defined(ARCHI_HMR_FORCE_RAPID) +#error "Excluding and forcing rapid recovery not compatible" +#endif + +// #define ARCHI_HMR_TMR_ONLY +// #define ARCHI_HMR_DMR_ONLY +#if defined(ARCHI_HMR_DMR_ONLY) && defined(ARCHI_HMR_TMR_ONLY) +#error "TMR only and DMR only not compatible" +#endif /* * HWS diff --git a/include/archi/chips/carfield-cluster/pulp.h b/include/archi/chips/carfield-cluster/pulp.h index d6bf5be..f8102d9 100644 --- a/include/archi/chips/carfield-cluster/pulp.h +++ b/include/archi/chips/carfield-cluster/pulp.h @@ -45,5 +45,6 @@ #include "archi/udma/spim/udma_spim_v3.h" #include "archi/udma/uart/udma_uart_v1.h" #include "archi/udma/udma_v3.h" +#include "archi/hmr/hmr_v1.h" #endif diff --git a/include/archi/hmr/hmr_v1.h b/include/archi/hmr/hmr_v1.h new file mode 100644 index 0000000..532da6a --- /dev/null +++ b/include/archi/hmr/hmr_v1.h @@ -0,0 +1,192 @@ +/* + * Copyright (C) 2023 ETH Zurich and University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ARCHI_HMR_HMR_V1_H__ +#define __ARCHI_HMR_HMR_V1_H__ + +#define HMR_IN_INTERLEAVED 1 + +#define HMR_TOP_OFFSET 0x000 +#define HMR_CORE_OFFSET 0x100 +#define HMR_DMR_OFFSET 0x200 +#define HMR_TMR_OFFSET 0x300 + +#define HMR_CORE_INCREMENT 0x010 +#define HMR_CORE_SLL 0x004 +#define HMR_DMR_INCREMENT 0x010 +#define HMR_DMR_SLL 0x004 +#define HMR_TMR_INCREMENT 0x010 +#define HMR_TMR_SLL 0x004 + +// Generated register defines for HMR_registers + +#ifndef _HMR_REGISTERS_REG_DEFS_ +#define _HMR_REGISTERS_REG_DEFS_ + +#ifdef __cplusplus +extern "C" { +#endif +#define HMR_REGISTERS_PARAM_NUM_CORES 12 + +#define HMR_REGISTERS_PARAM_NUM_D_M_R_GROUPS 6 + +#define HMR_REGISTERS_PARAM_NUM_T_M_R_GROUPS 4 + +// Register width +#define HMR_REGISTERS_PARAM_REG_WIDTH 32 + +// Available Configurations from implemented hardware. +#define HMR_REGISTERS_AVAIL_CONFIG_REG_OFFSET 0x0 +#define HMR_REGISTERS_AVAIL_CONFIG_INDEPENDENT_BIT 0 +#define HMR_REGISTERS_AVAIL_CONFIG_DUAL_BIT 1 +#define HMR_REGISTERS_AVAIL_CONFIG_TRIPLE_BIT 2 +#define HMR_REGISTERS_AVAIL_CONFIG_RAPID_RECOVERY_BIT 8 + +// Enabled cores, based on the configuration. Can be used for barriers. +#define HMR_REGISTERS_CORES_EN_REG_OFFSET 0x4 +#define HMR_REGISTERS_CORES_EN_CORES_EN_MASK 0xfff +#define HMR_REGISTERS_CORES_EN_CORES_EN_OFFSET 0 +#define HMR_REGISTERS_CORES_EN_CORES_EN_FIELD \ + ((bitfield_field32_t) { .mask = HMR_REGISTERS_CORES_EN_CORES_EN_MASK, .index = HMR_REGISTERS_CORES_EN_CORES_EN_OFFSET }) + +// DMR configuration enable, on bit per DMR group. +#define HMR_REGISTERS_DMR_ENABLE_REG_OFFSET 0x8 +#define HMR_REGISTERS_DMR_ENABLE_DMR_ENABLE_MASK 0x3f +#define HMR_REGISTERS_DMR_ENABLE_DMR_ENABLE_OFFSET 0 +#define HMR_REGISTERS_DMR_ENABLE_DMR_ENABLE_FIELD \ + ((bitfield_field32_t) { .mask = HMR_REGISTERS_DMR_ENABLE_DMR_ENABLE_MASK, .index = HMR_REGISTERS_DMR_ENABLE_DMR_ENABLE_OFFSET }) + +// TMR configuration enable, one bit per TMR group. +#define HMR_REGISTERS_TMR_ENABLE_REG_OFFSET 0xc +#define HMR_REGISTERS_TMR_ENABLE_TMR_ENABLE_MASK 0xf +#define HMR_REGISTERS_TMR_ENABLE_TMR_ENABLE_OFFSET 0 +#define HMR_REGISTERS_TMR_ENABLE_TMR_ENABLE_FIELD \ + ((bitfield_field32_t) { .mask = HMR_REGISTERS_TMR_ENABLE_TMR_ENABLE_MASK, .index = HMR_REGISTERS_TMR_ENABLE_TMR_ENABLE_OFFSET }) + +// DMR configuration bits. +#define HMR_REGISTERS_DMR_CONFIG_REG_OFFSET 0x10 +#define HMR_REGISTERS_DMR_CONFIG_RAPID_RECOVERY_BIT 0 +#define HMR_REGISTERS_DMR_CONFIG_FORCE_RECOVERY_BIT 1 +#define HMR_REGISTERS_DMR_CONFIG_SETBACK_BIT 2 +#define HMR_REGISTERS_DMR_CONFIG_SYNCH_REQ_BIT 3 + +// TMR configuration bits. +#define HMR_REGISTERS_TMR_CONFIG_REG_OFFSET 0x14 +#define HMR_REGISTERS_TMR_CONFIG_DELAY_RESYNCH_BIT 0 +#define HMR_REGISTERS_TMR_CONFIG_SETBACK_BIT 1 +#define HMR_REGISTERS_TMR_CONFIG_RELOAD_SETBACK_BIT 2 +#define HMR_REGISTERS_TMR_CONFIG_RAPID_RECOVERY_BIT 3 +#define HMR_REGISTERS_TMR_CONFIG_FORCE_RESYNCH_BIT 4 +#define HMR_REGISTERS_TMR_CONFIG_SYNCH_REQ_BIT 5 + +#ifdef __cplusplus +} // extern "C" +#endif +#endif // _HMR_REGISTERS_REG_DEFS_ +// End generated register defines for HMR_registers + +// Generated register defines for HMR_core_regs + +#ifndef _HMR_CORE_REGS_REG_DEFS_ +#define _HMR_CORE_REGS_REG_DEFS_ + +#ifdef __cplusplus +extern "C" { +#endif +// Register width +#define HMR_CORE_REGS_PARAM_REG_WIDTH 32 + +// Value to determine wich redundancy mode the core with that ID is in. +#define HMR_CORE_REGS_CURRENT_MODE_REG_OFFSET 0x0 +#define HMR_CORE_REGS_CURRENT_MODE_INDEPENDENT_BIT 0 +#define HMR_CORE_REGS_CURRENT_MODE_DUAL_BIT 1 +#define HMR_CORE_REGS_CURRENT_MODE_TRIPLE_BIT 2 + +// Mismatches of the core +#define HMR_CORE_REGS_MISMATCHES_REG_OFFSET 0x4 + +// Stack Pointer storage register +#define HMR_CORE_REGS_SP_STORE_REG_OFFSET 0x8 + +#ifdef __cplusplus +} // extern "C" +#endif +#endif // _HMR_CORE_REGS_REG_DEFS_ +// End generated register defines for HMR_core_regs + +// Generated register defines for HMR_dmr_regs + +#ifndef _HMR_DMR_REGS_REG_DEFS_ +#define _HMR_DMR_REGS_REG_DEFS_ + +#ifdef __cplusplus +extern "C" { +#endif +// Register width +#define HMR_DMR_REGS_PARAM_REG_WIDTH 32 + +// DMR configuration enable. +#define HMR_DMR_REGS_DMR_ENABLE_REG_OFFSET 0x0 +#define HMR_DMR_REGS_DMR_ENABLE_DMR_ENABLE_BIT 0 + +// DMR configuration bits. +#define HMR_DMR_REGS_DMR_CONFIG_REG_OFFSET 0x4 +#define HMR_DMR_REGS_DMR_CONFIG_RAPID_RECOVERY_BIT 0 +#define HMR_DMR_REGS_DMR_CONFIG_FORCE_RECOVERY_BIT 1 +#define HMR_DMR_REGS_DMR_CONFIG_SETBACK_BIT 2 +#define HMR_DMR_REGS_DMR_CONFIG_SYNCH_REQ_BIT 3 + +// Address for the last checkpoint. +#define HMR_DMR_REGS_CHECKPOINT_ADDR_REG_OFFSET 0x8 + +#ifdef __cplusplus +} // extern "C" +#endif +#endif // _HMR_DMR_REGS_REG_DEFS_ +// End generated register defines for HMR_dmr_regs + +// Generated register defines for HMR_tmr_regs + +#ifndef _HMR_TMR_REGS_REG_DEFS_ +#define _HMR_TMR_REGS_REG_DEFS_ + +#ifdef __cplusplus +extern "C" { +#endif +// Register width +#define HMR_TMR_REGS_PARAM_REG_WIDTH 32 + +// TMR configuration enable. +#define HMR_TMR_REGS_TMR_ENABLE_REG_OFFSET 0x0 +#define HMR_TMR_REGS_TMR_ENABLE_TMR_ENABLE_BIT 0 + +// TMR configuration bits. +#define HMR_TMR_REGS_TMR_CONFIG_REG_OFFSET 0x4 +#define HMR_TMR_REGS_TMR_CONFIG_DELAY_RESYNCH_BIT 0 +#define HMR_TMR_REGS_TMR_CONFIG_SETBACK_BIT 1 +#define HMR_TMR_REGS_TMR_CONFIG_RELOAD_SETBACK_BIT 2 +#define HMR_TMR_REGS_TMR_CONFIG_RAPID_RECOVERY_BIT 3 +#define HMR_TMR_REGS_TMR_CONFIG_FORCE_RESYNCH_BIT 4 +#define HMR_TMR_REGS_TMR_CONFIG_SYNCH_REQ_BIT 5 + +#ifdef __cplusplus +} // extern "C" +#endif +#endif // _HMR_TMR_REGS_REG_DEFS_ +// End generated register defines for HMR_tmr_regs + + +#endif // __ARCHI_HMR_HMR_V1_H__ diff --git a/include/hal/chips/carfield-cluster/pulp.h b/include/hal/chips/carfield-cluster/pulp.h index 2103f20..30ed9ce 100644 --- a/include/hal/chips/carfield-cluster/pulp.h +++ b/include/hal/chips/carfield-cluster/pulp.h @@ -41,5 +41,6 @@ #include "hal/udma/i2c/udma_i2c_v2.h" #include "hal/udma/spim/udma_spim_v3.h" #include "hal/udma/uart/udma_uart_v1.h" +#include "hal/hmr/hmr_v1.h" #endif diff --git a/include/hal/hmr/hmr_v1.h b/include/hal/hmr/hmr_v1.h new file mode 100644 index 0000000..c86181e --- /dev/null +++ b/include/hal/hmr/hmr_v1.h @@ -0,0 +1,205 @@ +/* + * Copyright (C) 2023 ETH Zurich and University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __HAL_HMR_HMR_V1_H__ +#define __HAL_HMR_HMR_V1_H__ + +#include "archi/hmr/hmr_v1.h" +#include "archi/pulp.h" +#include + +#define NUM_TMR_GROUPS (ARCHI_CLUSTER_NB_PE/3) +#define NUM_TMR_CORES (ARCHI_CLUSTER_NB_PE-(ARCHI_CLUSTER_NB_PE%3)) +#define NUM_DMR_GROUPS (ARCHI_CLUSTER_NB_PE/2) +#define NUM_DMR_CORES (ARCHI_CLUSTER_NB_PE-(ARCHI_CLUSTER_NB_PE%2)) + +// Interleaved cores +#define TMR_IS_CORE(core_id) (core_id + +#define QUAUX(X) #X +#define QU(X) QUAUX(X) + +#define HMR_STATE_ALLOC_SIZE 0xA0 + +void __attribute__((naked)) pos_hmr_store_part_to_stack() { + __asm__ __volatile__ ( + // Allocate space on the stack + "add sp, sp, -" QU(HMR_STATE_ALLOC_SIZE) " \n\t" + + // Store registers to stack + // zero not stored as hardwired // x0 + "sw ra, 0x00(sp) \n\t" // x1 + // sp stored to HMR once complete // x2 + "sw t0, 0x0C(sp) \n\t" // x5 + "sw t1, 0x10(sp) \n\t" // x6 + "sw t2, 0x14(sp) \n\t" // x7 + : : : "memory"); +} + +void __attribute((naked)) pos_hmr_store_rest_to_stack() { + __asm__ __volatile__ ( + "sw gp, 0x04(sp) \n\t" // x3 + "sw tp, 0x08(sp) \n\t" // x4 + "sw x8, 0x18(sp) \n\t" // fp + "sw s1, 0x1C(sp) \n\t" // x9 + "sw a0, 0x20(sp) \n\t" // x10 + "sw a1, 0x24(sp) \n\t" // x11 + "sw a2, 0x28(sp) \n\t" // x12 + "sw a3, 0x2C(sp) \n\t" // x13 + "sw a4, 0x30(sp) \n\t" // x14 + "sw a5, 0x34(sp) \n\t" // x15 + "sw a6, 0x38(sp) \n\t" // x16 + "sw a7, 0x3C(sp) \n\t" // x17 + "sw s2, 0x40(sp) \n\t" // x18 + "sw s3, 0x44(sp) \n\t" // x19 + "sw s4, 0x48(sp) \n\t" // x20 + "sw s5, 0x4C(sp) \n\t" // x21 + "sw s6, 0x50(sp) \n\t" // x22 + "sw s7, 0x54(sp) \n\t" // x23 + "sw s8, 0x58(sp) \n\t" // x24 + "sw s9, 0x5C(sp) \n\t" // x25 + "sw s10, 0x60(sp) \n\t" // x26 + "sw s11, 0x64(sp) \n\t" // x27 + "sw t3, 0x68(sp) \n\t" // x28 + "sw t4, 0x6C(sp) \n\t" // x29 + "sw t5, 0x70(sp) \n\t" // x30 + "sw t6, 0x74(sp) \n\t" // x31 + + // Manually store necessary CSRs + "csrr t1, 0x341 \n\t" // mepc + "csrr t2, 0x300 \n\t" // mstatus + "sw t1, 0x78(sp) \n\t" // mepc + "csrr t1, 0x304 \n\t" // mie + "sw t2, 0x7C(sp) \n\t" // mstatus + "csrr t2, 0x305 \n\t" // mtvec + "sw t1, 0x80(sp) \n\t" // mie + "csrr t1, 0x340 \n\t" // mscratch + "sw t2, 0x84(sp) \n\t" // mtvec + "csrr t2, 0x342 \n\t" // mcause + "sw t1, 0x88(sp) \n\t" // mscratch + "csrr t1, 0x343 \n\t" // mtval + "sw t2, 0x8C(sp) \n\t" // mcause +#ifdef __ibex__ + "csrr t2, 0x7d0 \n\t" // miex +#endif // __ibex__ + "sw t1, 0x90(sp) \n\t" // mtval +#ifdef __ibex__ + "csrr t1, 0x7d1 \n\t" // mtvecx + "sw t2, 0x94(sp) \n\t" // miex + "sw t1, 0x98(sp) \n\t" // mtvecx +#endif // __ibex__ + + : : : "memory"); +} + +void __attribute((interrupt)) pos_hmr_load_part_from_stack() { + __asm__ __volatile__ ( + "lw ra, 0x00(sp) \n\t" // x1 + // sp loaded from HMR regs above // x2 + "lw t0, 0x0C(sp) \n\t" // x5 + "lw t1, 0x10(sp) \n\t" // x6 + "lw t2, 0x14(sp) \n\t" // x7 + + // Release space on the stack + "add sp, sp, " QU(HMR_STATE_ALLOC_SIZE) " \n\t" + : : : "memory"); +} + + +void __attribute__((naked)) pos_hmr_store_state_to_stack() { + + pos_hmr_store_part_to_stack(); + pos_hmr_store_rest_to_stack(); +} + + +// loads state from stack, except for ra which is stored at `0x00(sp)` before and `-HMR_STATE_ALLOC_SIZE(sp)` afterwards +void __attribute__((naked)) pos_hmr_load_state_from_stack() { + __asm__ __volatile__ ( + // Manually load necessary CSRs + "lw t1, 0x78(sp) \n\t" // mepc + "lw t2, 0x7C(sp) \n\t" // mstatus + "csrw 0x341, t1 \n\t" // mepc + "lw t1, 0x80(sp) \n\t" // mie + "csrw 0x300, t2 \n\t" // mstatus + "lw t2, 0x84(sp) \n\t" // mtvec + "csrw 0x304, t1 \n\t" // mie + "lw t1, 0x88(sp) \n\t" // mscratch + "csrw 0x305, t2 \n\t" // mtvec + "lw t2, 0x8C(sp) \n\t" // mcause + "csrw 0x340, t1 \n\t" // mscratch + "lw t1, 0x90(sp) \n\t" // mtval + "csrw 0x342, t2 \n\t" // mcause +#ifdef __ibex__ + "lw t2, 0x94(sp) \n\t" // miex +#endif // __ibex__ + "csrw 0x343, t1 \n\t" // mtval +#ifdef __ibex__ + "lw t1, 0x98(sp) \n\t" // mtvecx + "csrw 0x7d0, t2 \n\t" // miex + "csrw 0x7d1, t1 \n\t" // mtvecx +#endif // __ibex__ + + // Load registers from stack + // zero not loaded as hardwired // x0 + // ra not touched + // "lw ra, 0x00(sp) \n\t" // x1 + // sp loaded from HMR regs above // x2 + "lw gp, 0x04(sp) \n\t" // x3 + "lw tp, 0x08(sp) \n\t" // x4 + "lw t0, 0x0C(sp) \n\t" // x5 + "lw t1, 0x10(sp) \n\t" // x6 + "lw t2, 0x14(sp) \n\t" // x7 + "lw x8, 0x18(sp) \n\t" // fp + "lw s1, 0x1C(sp) \n\t" // x9 + "lw a0, 0x20(sp) \n\t" // x10 + "lw a1, 0x24(sp) \n\t" // x11 + "lw a2, 0x28(sp) \n\t" // x12 + "lw a3, 0x2C(sp) \n\t" // x13 + "lw a4, 0x30(sp) \n\t" // x14 + "lw a5, 0x34(sp) \n\t" // x15 + "lw a6, 0x38(sp) \n\t" // x16 + "lw a7, 0x3C(sp) \n\t" // x17 + "lw s2, 0x40(sp) \n\t" // x18 + "lw s3, 0x44(sp) \n\t" // x19 + "lw s4, 0x48(sp) \n\t" // x20 + "lw s5, 0x4C(sp) \n\t" // x21 + "lw s6, 0x50(sp) \n\t" // x22 + "lw s7, 0x54(sp) \n\t" // x23 + "lw s8, 0x58(sp) \n\t" // x24 + "lw s9, 0x5C(sp) \n\t" // x25 + "lw s10, 0x60(sp) \n\t" // x26 + "lw s11, 0x64(sp) \n\t" // x27 + "lw t3, 0x68(sp) \n\t" // x28 + "lw t4, 0x6C(sp) \n\t" // x29 + "lw t5, 0x70(sp) \n\t" // x30 + "lw t6, 0x74(sp) \n\t" // x31 + + // Release space on the stack + "add sp, sp, " QU(HMR_STATE_ALLOC_SIZE) " \n\t" + : : : "memory"); +} + +void __attribute__((interrupt)) pos_hmr_sw_reload() { + // get sp from tmr reg + __asm__ __volatile__( + "csrr t0, 0xf14 \n\t" // Read core id + "li t1, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t" + "andi t0, t0, 0x01f \n\t" + "sll t0, t0, " QU(HMR_CORE_SLL) " \n\t" + "add t0, t0, t1 \n\t" + "lw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(t0) \n\t" + "mv ra, t0 \n\t" + : : : "memory"); + + pos_hmr_load_state_from_stack(); + + // set tmr reg to 0 + __asm__ __volatile__( + "sw zero, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(ra) \n\t" + "lw ra, -" QU(HMR_STATE_ALLOC_SIZE) "(sp) \n\t" + : : : "memory"); + + // mret handled by __attribute((interrupt)) + // __asm__ __volatile__("mret" : : : "memory"); +} + +void __attribute__((naked)) pos_hmr_tmr_irq() { + pos_hmr_store_state_to_stack(); + + // store sp to hmr core reg + __asm__ __volatile__( + "csrr t0, 0xf14 \n\t" // Read core id + "li t1, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t" + "andi t0, t0, 0x01f \n\t" + "sll t0, t0, " QU(HMR_CORE_SLL) " \n\t" + "add t0, t0, t1 \n\t" + "sw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(t0) \n\t" + : : : "memory"); + + // several nops to delay and allow for core reset + __asm__ __volatile__( + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + : : : "memory"); + pos_hmr_sw_reload(); +} + +#define LOCAL_NUM_TMR_CORES 12 + +void __attribute__((naked)) pos_hmr_synch() { + pos_hmr_store_part_to_stack(); // ra, t0, t1, t2 + + // if (master_core(core_id()) { (using only empty regs) + // eu_bar_trig_wait_clr(eu_bar_addr(TMR_BARRIER_ID(TMR_GROUP_ID(core_id())))); (with one of the empty regs) + // pos_hmr_load_part_from_stack(); + // return; + // } + __asm__ __volatile__( + // Read core id + "csrr t0, 0xf14 \n\t" + "andi t0, t0, 0x01f \n\t" + +#ifndef ARCHI_HMR_DMR_ONLY +#ifndef ARCHI_HMR_TMR_ONLY + // if not a tmr core, check dmr + "li t1, " QU(LOCAL_NUM_TMR_CORES) " \n\t" + "bgeu t0, t1, pos_hmr_synch_check_dmr \n\t" + + // get tmr offset of the id +#if HMR_IN_INTERLEAVED + "li t1, " QU(NUM_TMR_GROUPS) " \n\t" + "remu t1, t0, t1 \n\t" +#else + "li t1, 3 \n\t" + "divu t1, t0, t1 \n\t" +#endif // t1 is group id + + // read tmr register of the core + "slli t1, t1, " QU(HMR_TMR_SLL) " \n\t" + "li t2, " QU(ARCHI_HMR_ADDR + HMR_TMR_OFFSET) " \n\t" // t1 is tmr base address + "add t1, t1, t2 \n\t" + "lw t2, " QU(HMR_TMR_REGS_TMR_ENABLE_REG_OFFSET) "(t1) \n\t" + + // if tmr is not intended, pos_hmr_synch_check_dmr() + "beq t2, zero, pos_hmr_synch_check_dmr \n\t" +#endif // !ARCHI_HMR_TMR_ONLY + + // Set up ra as barrier id +#if HMR_IN_INTERLEAVED // ra is barrier id + "li ra, " QU(NUM_TMR_GROUPS) " \n\t" + "remu ra, t0, ra \n\t" + "addi ra, ra, 1 \n\t" +#else + "li ra, 3 \n\t" + "divu ra, t0, ra \n\t" + "srli t2, ra, 1 \n\t" + "addi ra, ra, 1 \n\t" + "add ra, ra, t2 \n\t" +#endif // ra is barrier id + +#ifndef ARCHI_HMR_NO_RAPID_RECOVERY + // if not main core, pos_hmr_synch_sw() +#if HMR_IN_INTERLEAVED + "li t2, " QU(NUM_TMR_GROUPS) " \n\t" + "bgeu t0, t2, pos_hmr_synch_sw \n\t" +#else + "li t2, 3 \n\t" + "divu t1, t0, t2 \n\t" // t1 is group id + "mul t2, t1, t2 \n\t" + "bneq t2, t0, pos_hmr_synch_sw \n\t" + +#ifndef ARCHI_HMR_FORCE_RAPID + // Fix t1 base address + "slli t1, t1, " QU(HMR_TMR_SLL) " \n\t" + "li t2, " QU(ARCHI_HMR_ADDR + HMR_TMR_OFFSET) " \n\t" + "add t1, t1, t2 \n\t" // t1 is tmr base address +#endif // !ARCHI_HMR_FORCE_RAPID +#endif + +#ifndef ARCHI_HMR_FORCE_RAPID + // if not rapidrecover, pos_hmr_synch_sw() + "lw t2, " QU(HMR_TMR_REGS_TMR_CONFIG_REG_OFFSET) "(t1) \n\t" + "andi t2, t2, " QU(1< this should lock the cores together + __asm__ __volatile__( // ra is barrier id + "sll t1, ra, " QU(EU_BARRIER_SIZE_LOG2) " \n\t" + "li t2, " QU(ARCHI_EU_DEMUX_ADDR + EU_BARRIER_DEMUX_OFFSET) " \n\t" + "add t1, t1, t2 \n\t" // t1 is tmr base address + "p.elw zero, " QU(EU_HW_BARR_TRIGGER_WAIT_CLEAR) "(t1) \n\t" // barrier + : : : "memory"); + + // several nops to delay and allow for core reset + __asm__ __volatile__( + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + : : : "memory"); + + pos_hmr_sw_reload(); +} + +void __attribute__((naked)) pos_hmr_tmr_synch_entry() { + pos_hmr_store_part_to_stack(); + pos_hmr_store_rest_to_stack(); + + // store sp to hmr core reg + __asm__ __volatile__( + "csrr t0, 0xf14 \n\t" // Read core id + "li t1, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t" + "andi t0, t0, 0x01f \n\t" + "sll t0, t0, " QU(HMR_CORE_SLL) " \n\t" + "add t0, t0, t1 \n\t" + "sw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(t0) \n\t" + : : : "memory"); +} + +void __attribute__((naked)) pos_hmr_tmr_synch_exit() { + // enter barrier -> this should lock the cores together + eu_bar_trig_wait_clr(eu_bar_addr(TMR_BARRIER_ID(TMR_GROUP_ID(core_id())))); + + // several nops to delay and allow for core reset + __asm__ __volatile__( + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + : : : "memory"); + + pos_hmr_sw_reload(); +} + +void __attribute__((naked)) pos_hmr_tmr_synch() { + pos_hmr_tmr_synch_entry(); + pos_hmr_tmr_synch_exit(); +} + +void __attribute__((naked)) pos_hmr_dmr_synch_entry() { + pos_hmr_store_part_to_stack(); + pos_hmr_store_rest_to_stack(); + + // store sp to hmr core reg + __asm__ __volatile__( + "csrr t0, 0xf14 \n\t" // Read core id + "li t1, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t" + "andi t0, t0, 0x01f \n\t" + "sll t0, t0, " QU(HMR_CORE_SLL) " \n\t" + "add t0, t0, t1 \n\t" + "sw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(t0) \n\t" + : : : "memory"); +} + +void __attribute__((naked)) pos_hmr_dmr_synch_exit() { + // enter barrier -> this should lock the cores together + eu_bar_trig_wait_clr(eu_bar_addr(DMR_BARRIER_ID(DMR_GROUP_ID(core_id())))); + + // several nops to delay and allow for core reset + __asm__ __volatile__( + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + : : : "memory"); + + pos_hmr_sw_reload(); +} + +void __attribute__((naked)) pos_hmr_dmr_synch() { + pos_hmr_dmr_synch_entry(); + pos_hmr_dmr_synch_exit(); +} + +int hmr_tmr_critical_section(int (*function_handle)()) { + int ret = 0; + if (TMR_IS_MAIN_CORE(core_id())) { + // enter critical section + hmr_self_enable_tmr(); + + // do critical stuff + ret += function_handle(); + + // exit critical section + hmr_disable_tmr(0, TMR_GROUP_ID(core_id())); + + } + + return ret; +} + +int hmr_dmr_critical_section(int (*function_handle)()) { + int ret = 0; + if (DMR_IS_MAIN_CORE(core_id())) { + // enter critical section + hmr_self_enable_dmr(); + + // do critical stuff + ret += function_handle(); + + // exit critical section + hmr_disable_dmr(0, DMR_GROUP_ID(core_id())); + } +} + +void hmr_tmr_performance_section(void (*function_handle)()) { + volatile unsigned int tmr_group_id = TMR_GROUP_ID(core_id()); + unsigned int tmr_config = hmr_get_tmr_config(0, tmr_group_id); + hmr_set_tmr_config_bare(0, tmr_group_id, tmr_config & ~(1< will be complex for stack... +// // pos_hmr_store_state_to(addr) +// // store addr to dmr reg? --> need to properly manage this... +// } + +// void pos_hmr_load_checkpoint() { +// // load addr from dmr reg? +// // pos_hmr_load_state_from(addr) +// // mret? ret? +// } diff --git a/rules/pulpos/targets/carfield-cluster.mk b/rules/pulpos/targets/carfield-cluster.mk index f636c5f..d29e1ed 100644 --- a/rules/pulpos/targets/carfield-cluster.mk +++ b/rules/pulpos/targets/carfield-cluster.mk @@ -56,6 +56,9 @@ PULP_SRCS += kernel/fll-v$(fll/version).c PULP_SRCS += kernel/freq-domains.c PULP_SRCS += kernel/chips/carfield-cluster/soc.c +# HMR +PULP_CFLAGS += -DARCHI_HMR +PULP_SRCS += kernel/hmr_synch.c include $(PULPRT_HOME)/rules/pulpos/configs/default.mk