/* * Copyright (C) 2023 ETH Zurich, University of Bologna * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #define QUAUX(X) #X #define QU(X) QUAUX(X) #define HMR_STATE_ALLOC_SIZE 0xA0 #define HMR_STORE_PART_TO_STACK __asm__ __volatile__ ( \ /* Allocate space on the stack */ \ "add sp, sp, -" QU(HMR_STATE_ALLOC_SIZE) " \n\t" \ \ /* Store registers to stack */ \ /* zero not stored as hardwired */ /* x0 */ \ "sw ra, 0x00(sp) \n\t" /* x1 */ \ /* sp stored to HMR once complete */ /* x2 */ \ "sw t0, 0x0C(sp) \n\t" /* x5 */ \ "sw t1, 0x10(sp) \n\t" /* x6 */ \ "sw t2, 0x14(sp) \n\t" /* x7 */ \ : : : "memory"); #define HMR_STORE_REST_TO_STACK __asm__ __volatile__ ( \ "sw gp, 0x04(sp) \n\t" /* x3 */ \ "sw tp, 0x08(sp) \n\t" /* x4 */ \ "sw x8, 0x18(sp) \n\t" /* fp */ \ "sw s1, 0x1C(sp) \n\t" /* x9 */ \ "sw a0, 0x20(sp) \n\t" /* x10 */ \ "sw a1, 0x24(sp) \n\t" /* x11 */ \ "sw a2, 0x28(sp) \n\t" /* x12 */ \ "sw a3, 0x2C(sp) \n\t" /* x13 */ \ "sw a4, 0x30(sp) \n\t" /* x14 */ \ "sw a5, 0x34(sp) \n\t" /* x15 */ \ "sw a6, 0x38(sp) \n\t" /* x16 */ \ "sw a7, 0x3C(sp) \n\t" /* x17 */ \ "sw s2, 0x40(sp) \n\t" /* x18 */ \ "sw s3, 0x44(sp) \n\t" /* x19 */ \ "sw s4, 0x48(sp) \n\t" /* x20 */ \ "sw s5, 0x4C(sp) \n\t" /* x21 */ \ "sw s6, 0x50(sp) \n\t" /* x22 */ \ "sw s7, 0x54(sp) \n\t" /* x23 */ \ "sw s8, 0x58(sp) \n\t" /* x24 */ \ "sw s9, 0x5C(sp) \n\t" /* x25 */ \ "sw s10, 0x60(sp) \n\t" /* x26 */ \ "sw s11, 0x64(sp) \n\t" /* x27 */ \ "sw t3, 0x68(sp) \n\t" /* x28 */ \ "sw t4, 0x6C(sp) \n\t" /* x29 */ \ "sw t5, 0x70(sp) \n\t" /* x30 */ \ "sw t6, 0x74(sp) \n\t" /* x31 */ \ \ /* Manually store necessary CSRs */ \ "csrr t1, 0x341 \n\t" /* mepc */ \ "csrr t2, 0x300 \n\t" /* mstatus */ \ "sw t1, 0x78(sp) \n\t" /* mepc */ \ "csrr t1, 0x304 \n\t" /* mie */ \ "sw t2, 0x7C(sp) \n\t" /* mstatus */ \ "csrr t2, 0x305 \n\t" /* mtvec */ \ "sw t1, 0x80(sp) \n\t" /* mie */ \ "csrr t1, 0x340 \n\t" /* mscratch */ \ "sw t2, 0x84(sp) \n\t" /* mtvec */ \ "csrr t2, 0x342 \n\t" /* mcause */ \ "sw t1, 0x88(sp) \n\t" /* mscratch */ \ "csrr t1, 0x343 \n\t" /* mtval */ \ "sw t2, 0x8C(sp) \n\t" /* mcause */ \ "sw t1, 0x90(sp) \n\t" /* mtval */ \ : : : "memory"); #define HMR_STORE_IBEX_EXTRA_TO_STACK __asm__ __volatile__ ( \ "csrr t2, 0x7d0 \n\t" /* miex */ \ "csrr t1, 0x7d1 \n\t" /* mtvecx */ \ "sw t2, 0x94(sp) \n\t" /* miex */ \ "sw t1, 0x98(sp) \n\t" /* mtvecx */ \ : : : "memory"); #define HMR_STORE_CV32_EXTRA_TO_STACK __asm__ __volatile__ ( \ "csrr t2, 0x347 \n\t" /* mintthresh */ \ "csrr t1, 0x307 \n\t" /* mtvt */ \ "sw t2, 0x94(sp) \n\t" /* mintthresh */\ "sw t1, 0x98(sp) \n\t" /* mtvt */ \ : : : "memory"); #define HMR_LOAD_PART_FROM_STACK __asm__ __volatile__ ( \ "lw ra, 0x00(sp) \n\t" /* x1 */ \ /* sp loaded from HMR regs above */ /* x2 */ \ "lw t0, 0x0C(sp) \n\t" /* x5 */ \ "lw t1, 0x10(sp) \n\t" /* x6 */ \ "lw t2, 0x14(sp) \n\t" /* x7 */ \ \ /* Release space on the stack */ \ "add sp, sp, " QU(HMR_STATE_ALLOC_SIZE) " \n\t" \ "mret \n\t" \ : : : "memory"); // void pos_hmr_store_state_to_stack() { // pos_hmr_store_part_to_stack(); // pos_hmr_store_rest_to_stack(); // // __asm__ __volatile__ ( // // "ret \n\t" // // : : : "memory"); // } #define HMR_LOAD_IBEX_EXTRA_FROM_STACK __asm__ __volatile__ ( \ "lw t2, 0x94(sp) \n\t" /* miex */ \ "lw t1, 0x98(sp) \n\t" /* mtvecx */ \ "csrw 0x7d0, t2 \n\t" /* miex */ \ "csrw 0x7d1, t1 \n\t" /* mtvecx */ \ : : : "memory"); #define HMR_LOAD_CV32_EXTRA_FROM_STACK __asm__ __volatile__ ( \ "lw t2, 0x94(sp) \n\t" /* mintthresh */ \ "lw t1, 0x98(sp) \n\t" /* mtvt */ \ "csrw 0x347, t2 \n\t" /* mintthresh */ \ "csrw 0x307, t1 \n\t" /* mtvt */ \ : : : "memory"); // loads state from stack, except for ra which is stored at `0x00(sp)` before and `-HMR_STATE_ALLOC_SIZE(sp)` afterwards #define HMR_LOAD_STATE_FROM_STACK __asm__ __volatile__ ( \ /* Manually load necessary CSRs */ \ "lw t1, 0x78(sp) \n\t" /* mepc */ \ "lw t2, 0x7C(sp) \n\t" /* mstatus */ \ "csrw 0x341, t1 \n\t" /* mepc */ \ "lw t1, 0x80(sp) \n\t" /* mie */ \ "csrw 0x300, t2 \n\t" /* mstatus */ \ "lw t2, 0x84(sp) \n\t" /* mtvec */ \ "csrw 0x304, t1 \n\t" /* mie */ \ "lw t1, 0x88(sp) \n\t" /* mscratch */ \ "csrw 0x305, t2 \n\t" /* mtvec */ \ "lw t2, 0x8C(sp) \n\t" /* mcause */ \ "csrw 0x340, t1 \n\t" /* mscratch */ \ "lw t1, 0x90(sp) \n\t" /* mtval */ \ "csrw 0x342, t2 \n\t" /* mcause */ \ "csrw 0x343, t1 \n\t" /* mtval */ \ \ /* Load registers from stack */ \ /* zero not loaded as hardwired /* x0 */ \ /* ra not touched */ \ /* "lw ra, 0x00(sp) \n\t" /* x1 */ \ /* sp loaded from HMR regs above */ /* x2 */ \ "lw gp, 0x04(sp) \n\t" /* x3 */ \ "lw tp, 0x08(sp) \n\t" /* x4 */ \ "lw t0, 0x0C(sp) \n\t" /* x5 */ \ "lw t1, 0x10(sp) \n\t" /* x6 */ \ "lw t2, 0x14(sp) \n\t" /* x7 */ \ "lw x8, 0x18(sp) \n\t" /* fp */ \ "lw s1, 0x1C(sp) \n\t" /* x9 */ \ "lw a0, 0x20(sp) \n\t" /* x10 */ \ "lw a1, 0x24(sp) \n\t" /* x11 */ \ "lw a2, 0x28(sp) \n\t" /* x12 */ \ "lw a3, 0x2C(sp) \n\t" /* x13 */ \ "lw a4, 0x30(sp) \n\t" /* x14 */ \ "lw a5, 0x34(sp) \n\t" /* x15 */ \ "lw a6, 0x38(sp) \n\t" /* x16 */ \ "lw a7, 0x3C(sp) \n\t" /* x17 */ \ "lw s2, 0x40(sp) \n\t" /* x18 */ \ "lw s3, 0x44(sp) \n\t" /* x19 */ \ "lw s4, 0x48(sp) \n\t" /* x20 */ \ "lw s5, 0x4C(sp) \n\t" /* x21 */ \ "lw s6, 0x50(sp) \n\t" /* x22 */ \ "lw s7, 0x54(sp) \n\t" /* x23 */ \ "lw s8, 0x58(sp) \n\t" /* x24 */ \ "lw s9, 0x5C(sp) \n\t" /* x25 */ \ "lw s10, 0x60(sp) \n\t" /* x26 */ \ "lw s11, 0x64(sp) \n\t" /* x27 */ \ "lw t3, 0x68(sp) \n\t" /* x28 */ \ "lw t4, 0x6C(sp) \n\t" /* x29 */ \ "lw t5, 0x70(sp) \n\t" /* x30 */ \ "lw t6, 0x74(sp) \n\t" /* x31 */ \ \ /* Release space on the stack */ \ "add sp, sp, " QU(HMR_STATE_ALLOC_SIZE) " \n\t" \ : : : "memory"); void __attribute__((naked)) pos_hmr_sw_reload() { // get sp from tmr reg __asm__ __volatile__( "csrr t0, 0xf14 \n\t" // Read core id "li t1, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t" "andi t0, t0, 0x01f \n\t" "sll t0, t0, " QU(HMR_CORE_SLL) " \n\t" "add t0, t0, t1 \n\t" "lw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(t0) \n\t" "mv ra, t0 \n\t" : : : "memory"); #ifdef __ibex__ HMR_LOAD_IBEX_EXTRA_FROM_STACK #endif #ifdef __cv32e40p__ HMR_LOAD_CV32_EXTRA_FROM_STACK #endif HMR_LOAD_STATE_FROM_STACK // set tmr reg to 0 __asm__ __volatile__( "sw zero, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(ra) \n\t" "lw ra, -" QU(HMR_STATE_ALLOC_SIZE) "(sp) \n\t" : : : "memory"); __asm__ __volatile__("mret" : : : "memory"); } void __attribute__((naked)) pos_hmr_tmr_irq() { HMR_STORE_PART_TO_STACK HMR_STORE_REST_TO_STACK #ifdef __ibex__ HMR_STORE_IBEX_EXTRA_TO_STACK #endif #ifdef __cv32e40p__ HMR_STORE_CV32_EXTRA_TO_STACK #endif // store sp to hmr core reg __asm__ __volatile__( "csrr t0, 0xf14 \n\t" // Read core id "li t1, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t" "andi t0, t0, 0x01f \n\t" "sll t0, t0, " QU(HMR_CORE_SLL) " \n\t" "add t0, t0, t1 \n\t" "sw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(t0) \n\t" : : : "memory"); // several nops to delay and allow for core reset __asm__ __volatile__( "nop\n\t" "nop\n\t" "nop\n\t" "nop\n\t" "nop\n\t" " j pos_hmr_sw_reload \n\t" : : : "memory"); // goto pos_hmr_sw_reload(); } #ifndef ARCHI_HMR_FIXED #define LOCAL_NUM_TMR_CORES 12 void __attribute__((naked)) pos_hmr_synch() { pos_hmr_store_part_to_stack(); // ra, t0, t1, t2 // if (master_core(core_id()) { (using only empty regs) // eu_bar_trig_wait_clr(eu_bar_addr(TMR_BARRIER_ID(TMR_GROUP_ID(core_id())))); (with one of the empty regs) // pos_hmr_load_part_from_stack(); // return; // } __asm__ __volatile__( // Read core id "csrr t0, 0xf14 \n\t" "andi t0, t0, 0x01f \n\t" #ifndef ARCHI_HMR_DMR_ONLY #ifndef ARCHI_HMR_TMR_ONLY // if not a tmr core, check dmr "li t1, " QU(LOCAL_NUM_TMR_CORES) " \n\t" "bgeu t0, t1, pos_hmr_synch_check_dmr \n\t" // get tmr offset of the id #if HMR_IN_INTERLEAVED "li t1, " QU(NUM_TMR_GROUPS) " \n\t" "remu t1, t0, t1 \n\t" #else "li t1, 3 \n\t" "divu t1, t0, t1 \n\t" #endif // t1 is group id // read tmr register of the core "slli t1, t1, " QU(HMR_TMR_SLL) " \n\t" "li t2, " QU(ARCHI_HMR_ADDR + HMR_TMR_OFFSET) " \n\t" // t1 is tmr base address "add t1, t1, t2 \n\t" "lw t2, " QU(HMR_TMR_REGS_TMR_ENABLE_REG_OFFSET) "(t1) \n\t" // if tmr is not intended, pos_hmr_synch_check_dmr() "beq t2, zero, pos_hmr_synch_check_dmr \n\t" #endif // !ARCHI_HMR_TMR_ONLY // Set up ra as barrier id #if HMR_IN_INTERLEAVED // ra is barrier id "li ra, " QU(NUM_TMR_GROUPS) " \n\t" "remu ra, t0, ra \n\t" "addi ra, ra, 1 \n\t" #else "li ra, 3 \n\t" "divu ra, t0, ra \n\t" "srli t2, ra, 1 \n\t" "addi ra, ra, 1 \n\t" "add ra, ra, t2 \n\t" #endif // ra is barrier id #ifndef ARCHI_HMR_NO_RAPID_RECOVERY // if not main core, pos_hmr_synch_sw() #if HMR_IN_INTERLEAVED "li t2, " QU(NUM_TMR_GROUPS) " \n\t" "bgeu t0, t2, pos_hmr_synch_sw \n\t" #else "li t2, 3 \n\t" "divu t1, t0, t2 \n\t" // t1 is group id "mul t2, t1, t2 \n\t" "bneq t2, t0, pos_hmr_synch_sw \n\t" #ifndef ARCHI_HMR_FORCE_RAPID // Fix t1 base address "slli t1, t1, " QU(HMR_TMR_SLL) " \n\t" "li t2, " QU(ARCHI_HMR_ADDR + HMR_TMR_OFFSET) " \n\t" "add t1, t1, t2 \n\t" // t1 is tmr base address #endif // !ARCHI_HMR_FORCE_RAPID #endif #ifndef ARCHI_HMR_FORCE_RAPID // if not rapidrecover, pos_hmr_synch_sw() "lw t2, " QU(HMR_TMR_REGS_TMR_CONFIG_REG_OFFSET) "(t1) \n\t" "andi t2, t2, " QU(1< this should lock the cores together __asm__ __volatile__( // ra is barrier id "sll t1, ra, " QU(EU_BARRIER_SIZE_LOG2) " \n\t" "li t2, " QU(ARCHI_EU_DEMUX_ADDR + EU_BARRIER_DEMUX_OFFSET) " \n\t" "add t1, t1, t2 \n\t" // t1 is tmr base address "p.elw zero, " QU(EU_HW_BARR_TRIGGER_WAIT_CLEAR) "(t1) \n\t" // barrier : : : "memory"); // several nops to delay and allow for core reset __asm__ __volatile__( "nop\n\t" "nop\n\t" "nop\n\t" "nop\n\t" "nop\n\t" : : : "memory"); pos_hmr_sw_reload(); } void __attribute__((naked)) pos_hmr_tmr_synch_entry() { pos_hmr_store_part_to_stack(); pos_hmr_store_rest_to_stack(); // store sp to hmr core reg __asm__ __volatile__( "csrr t0, 0xf14 \n\t" // Read core id "li t1, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t" "andi t0, t0, 0x01f \n\t" "sll t0, t0, " QU(HMR_CORE_SLL) " \n\t" "add t0, t0, t1 \n\t" "sw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(t0) \n\t" : : : "memory"); } void __attribute__((naked)) pos_hmr_tmr_synch_exit() { // enter barrier -> this should lock the cores together eu_bar_trig_wait_clr(eu_bar_addr(TMR_BARRIER_ID(TMR_GROUP_ID(core_id())))); // several nops to delay and allow for core reset __asm__ __volatile__( "nop\n\t" "nop\n\t" "nop\n\t" "nop\n\t" "nop\n\t" : : : "memory"); pos_hmr_sw_reload(); } void __attribute__((naked)) pos_hmr_tmr_synch() { pos_hmr_tmr_synch_entry(); pos_hmr_tmr_synch_exit(); } void __attribute__((naked)) pos_hmr_dmr_synch_entry() { pos_hmr_store_part_to_stack(); pos_hmr_store_rest_to_stack(); // store sp to hmr core reg __asm__ __volatile__( "csrr t0, 0xf14 \n\t" // Read core id "li t1, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t" "andi t0, t0, 0x01f \n\t" "sll t0, t0, " QU(HMR_CORE_SLL) " \n\t" "add t0, t0, t1 \n\t" "sw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(t0) \n\t" : : : "memory"); } void __attribute__((naked)) pos_hmr_dmr_synch_exit() { // enter barrier -> this should lock the cores together eu_bar_trig_wait_clr(eu_bar_addr(DMR_BARRIER_ID(DMR_GROUP_ID(core_id())))); // several nops to delay and allow for core reset __asm__ __volatile__( "nop\n\t" "nop\n\t" "nop\n\t" "nop\n\t" "nop\n\t" : : : "memory"); pos_hmr_sw_reload(); } void __attribute__((naked)) pos_hmr_dmr_synch() { pos_hmr_dmr_synch_entry(); pos_hmr_dmr_synch_exit(); } int hmr_tmr_critical_section(int (*function_handle)()) { int ret = 0; if (TMR_IS_MAIN_CORE(core_id())) { // enter critical section hmr_self_enable_tmr(); // do critical stuff ret += function_handle(); // exit critical section hmr_disable_tmr(0, TMR_GROUP_ID(core_id())); } return ret; } int hmr_dmr_critical_section(int (*function_handle)()) { int ret = 0; if (DMR_IS_MAIN_CORE(core_id())) { // enter critical section hmr_self_enable_dmr(); // do critical stuff ret += function_handle(); // exit critical section hmr_disable_dmr(0, DMR_GROUP_ID(core_id())); } } void hmr_tmr_performance_section(void (*function_handle)()) { volatile unsigned int tmr_group_id = TMR_GROUP_ID(core_id()); unsigned int tmr_config = hmr_get_tmr_config(0, tmr_group_id); hmr_set_tmr_config_bare(0, tmr_group_id, tmr_config & ~(1< will be complex for stack... // // pos_hmr_store_state_to(addr) // // store addr to dmr reg? --> need to properly manage this... // } // void pos_hmr_load_checkpoint() { // // load addr from dmr reg? // // pos_hmr_load_state_from(addr) // // mret? ret? // } #endif