pulp-runtime/kernel/hmr_synch.c
2023-10-30 22:15:17 +01:00

741 lines
24 KiB
C

/*
* Copyright (C) 2023 ETH Zurich, University of Bologna
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <pulp.h>
#define QUAUX(X) #X
#define QU(X) QUAUX(X)
#define HMR_STATE_ALLOC_SIZE 0xA0
void __attribute__((naked)) pos_hmr_store_part_to_stack() {
__asm__ __volatile__ (
// Allocate space on the stack
"add sp, sp, -" QU(HMR_STATE_ALLOC_SIZE) " \n\t"
// Store registers to stack
// zero not stored as hardwired // x0
"sw ra, 0x00(sp) \n\t" // x1
// sp stored to HMR once complete // x2
"sw t0, 0x0C(sp) \n\t" // x5
"sw t1, 0x10(sp) \n\t" // x6
"sw t2, 0x14(sp) \n\t" // x7
: : : "memory");
}
void __attribute((naked)) pos_hmr_store_rest_to_stack() {
__asm__ __volatile__ (
"sw gp, 0x04(sp) \n\t" // x3
"sw tp, 0x08(sp) \n\t" // x4
"sw x8, 0x18(sp) \n\t" // fp
"sw s1, 0x1C(sp) \n\t" // x9
"sw a0, 0x20(sp) \n\t" // x10
"sw a1, 0x24(sp) \n\t" // x11
"sw a2, 0x28(sp) \n\t" // x12
"sw a3, 0x2C(sp) \n\t" // x13
"sw a4, 0x30(sp) \n\t" // x14
"sw a5, 0x34(sp) \n\t" // x15
"sw a6, 0x38(sp) \n\t" // x16
"sw a7, 0x3C(sp) \n\t" // x17
"sw s2, 0x40(sp) \n\t" // x18
"sw s3, 0x44(sp) \n\t" // x19
"sw s4, 0x48(sp) \n\t" // x20
"sw s5, 0x4C(sp) \n\t" // x21
"sw s6, 0x50(sp) \n\t" // x22
"sw s7, 0x54(sp) \n\t" // x23
"sw s8, 0x58(sp) \n\t" // x24
"sw s9, 0x5C(sp) \n\t" // x25
"sw s10, 0x60(sp) \n\t" // x26
"sw s11, 0x64(sp) \n\t" // x27
"sw t3, 0x68(sp) \n\t" // x28
"sw t4, 0x6C(sp) \n\t" // x29
"sw t5, 0x70(sp) \n\t" // x30
"sw t6, 0x74(sp) \n\t" // x31
// Manually store necessary CSRs
"csrr t1, 0x341 \n\t" // mepc
"csrr t2, 0x300 \n\t" // mstatus
"sw t1, 0x78(sp) \n\t" // mepc
"csrr t1, 0x304 \n\t" // mie
"sw t2, 0x7C(sp) \n\t" // mstatus
"csrr t2, 0x305 \n\t" // mtvec
"sw t1, 0x80(sp) \n\t" // mie
"csrr t1, 0x340 \n\t" // mscratch
"sw t2, 0x84(sp) \n\t" // mtvec
"csrr t2, 0x342 \n\t" // mcause
"sw t1, 0x88(sp) \n\t" // mscratch
"csrr t1, 0x343 \n\t" // mtval
"sw t2, 0x8C(sp) \n\t" // mcause
#ifdef __ibex__
"csrr t2, 0x7d0 \n\t" // miex
#endif // __ibex__
"sw t1, 0x90(sp) \n\t" // mtval
#ifdef __ibex__
"csrr t1, 0x7d1 \n\t" // mtvecx
"sw t2, 0x94(sp) \n\t" // miex
"sw t1, 0x98(sp) \n\t" // mtvecx
#endif // __ibex__
: : : "memory");
}
void __attribute((interrupt)) pos_hmr_load_part_from_stack() {
__asm__ __volatile__ (
"lw ra, 0x00(sp) \n\t" // x1
// sp loaded from HMR regs above // x2
"lw t0, 0x0C(sp) \n\t" // x5
"lw t1, 0x10(sp) \n\t" // x6
"lw t2, 0x14(sp) \n\t" // x7
// Release space on the stack
"add sp, sp, " QU(HMR_STATE_ALLOC_SIZE) " \n\t"
: : : "memory");
}
void __attribute__((naked)) pos_hmr_store_state_to_stack() {
pos_hmr_store_part_to_stack();
pos_hmr_store_rest_to_stack();
}
// loads state from stack, except for ra which is stored at `0x00(sp)` before and `-HMR_STATE_ALLOC_SIZE(sp)` afterwards
void __attribute__((naked)) pos_hmr_load_state_from_stack() {
__asm__ __volatile__ (
// Manually load necessary CSRs
"lw t1, 0x78(sp) \n\t" // mepc
"lw t2, 0x7C(sp) \n\t" // mstatus
"csrw 0x341, t1 \n\t" // mepc
"lw t1, 0x80(sp) \n\t" // mie
"csrw 0x300, t2 \n\t" // mstatus
"lw t2, 0x84(sp) \n\t" // mtvec
"csrw 0x304, t1 \n\t" // mie
"lw t1, 0x88(sp) \n\t" // mscratch
"csrw 0x305, t2 \n\t" // mtvec
"lw t2, 0x8C(sp) \n\t" // mcause
"csrw 0x340, t1 \n\t" // mscratch
"lw t1, 0x90(sp) \n\t" // mtval
"csrw 0x342, t2 \n\t" // mcause
#ifdef __ibex__
"lw t2, 0x94(sp) \n\t" // miex
#endif // __ibex__
"csrw 0x343, t1 \n\t" // mtval
#ifdef __ibex__
"lw t1, 0x98(sp) \n\t" // mtvecx
"csrw 0x7d0, t2 \n\t" // miex
"csrw 0x7d1, t1 \n\t" // mtvecx
#endif // __ibex__
// Load registers from stack
// zero not loaded as hardwired // x0
// ra not touched
// "lw ra, 0x00(sp) \n\t" // x1
// sp loaded from HMR regs above // x2
"lw gp, 0x04(sp) \n\t" // x3
"lw tp, 0x08(sp) \n\t" // x4
"lw t0, 0x0C(sp) \n\t" // x5
"lw t1, 0x10(sp) \n\t" // x6
"lw t2, 0x14(sp) \n\t" // x7
"lw x8, 0x18(sp) \n\t" // fp
"lw s1, 0x1C(sp) \n\t" // x9
"lw a0, 0x20(sp) \n\t" // x10
"lw a1, 0x24(sp) \n\t" // x11
"lw a2, 0x28(sp) \n\t" // x12
"lw a3, 0x2C(sp) \n\t" // x13
"lw a4, 0x30(sp) \n\t" // x14
"lw a5, 0x34(sp) \n\t" // x15
"lw a6, 0x38(sp) \n\t" // x16
"lw a7, 0x3C(sp) \n\t" // x17
"lw s2, 0x40(sp) \n\t" // x18
"lw s3, 0x44(sp) \n\t" // x19
"lw s4, 0x48(sp) \n\t" // x20
"lw s5, 0x4C(sp) \n\t" // x21
"lw s6, 0x50(sp) \n\t" // x22
"lw s7, 0x54(sp) \n\t" // x23
"lw s8, 0x58(sp) \n\t" // x24
"lw s9, 0x5C(sp) \n\t" // x25
"lw s10, 0x60(sp) \n\t" // x26
"lw s11, 0x64(sp) \n\t" // x27
"lw t3, 0x68(sp) \n\t" // x28
"lw t4, 0x6C(sp) \n\t" // x29
"lw t5, 0x70(sp) \n\t" // x30
"lw t6, 0x74(sp) \n\t" // x31
// Release space on the stack
"add sp, sp, " QU(HMR_STATE_ALLOC_SIZE) " \n\t"
: : : "memory");
}
void __attribute__((interrupt)) pos_hmr_sw_reload() {
// get sp from tmr reg
__asm__ __volatile__(
"csrr t0, 0xf14 \n\t" // Read core id
"li t1, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t"
"andi t0, t0, 0x01f \n\t"
"sll t0, t0, " QU(HMR_CORE_SLL) " \n\t"
"add t0, t0, t1 \n\t"
"lw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(t0) \n\t"
"mv ra, t0 \n\t"
: : : "memory");
pos_hmr_load_state_from_stack();
// set tmr reg to 0
__asm__ __volatile__(
"sw zero, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(ra) \n\t"
"lw ra, -" QU(HMR_STATE_ALLOC_SIZE) "(sp) \n\t"
: : : "memory");
// mret handled by __attribute((interrupt))
// __asm__ __volatile__("mret" : : : "memory");
}
void __attribute__((naked)) pos_hmr_tmr_irq() {
pos_hmr_store_state_to_stack();
// store sp to hmr core reg
__asm__ __volatile__(
"csrr t0, 0xf14 \n\t" // Read core id
"li t1, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t"
"andi t0, t0, 0x01f \n\t"
"sll t0, t0, " QU(HMR_CORE_SLL) " \n\t"
"add t0, t0, t1 \n\t"
"sw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(t0) \n\t"
: : : "memory");
// several nops to delay and allow for core reset
__asm__ __volatile__(
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
: : : "memory");
pos_hmr_sw_reload();
}
#define LOCAL_NUM_TMR_CORES 12
void __attribute__((naked)) pos_hmr_synch() {
pos_hmr_store_part_to_stack(); // ra, t0, t1, t2
// if (master_core(core_id()) { (using only empty regs)
// eu_bar_trig_wait_clr(eu_bar_addr(TMR_BARRIER_ID(TMR_GROUP_ID(core_id())))); (with one of the empty regs)
// pos_hmr_load_part_from_stack();
// return;
// }
__asm__ __volatile__(
// Read core id
"csrr t0, 0xf14 \n\t"
"andi t0, t0, 0x01f \n\t"
#ifndef ARCHI_HMR_DMR_ONLY
#ifndef ARCHI_HMR_TMR_ONLY
// if not a tmr core, check dmr
"li t1, " QU(LOCAL_NUM_TMR_CORES) " \n\t"
"bgeu t0, t1, pos_hmr_synch_check_dmr \n\t"
// get tmr offset of the id
#if HMR_IN_INTERLEAVED
"li t1, " QU(NUM_TMR_GROUPS) " \n\t"
"remu t1, t0, t1 \n\t"
#else
"li t1, 3 \n\t"
"divu t1, t0, t1 \n\t"
#endif // t1 is group id
// read tmr register of the core
"slli t1, t1, " QU(HMR_TMR_SLL) " \n\t"
"li t2, " QU(ARCHI_HMR_ADDR + HMR_TMR_OFFSET) " \n\t" // t1 is tmr base address
"add t1, t1, t2 \n\t"
"lw t2, " QU(HMR_TMR_REGS_TMR_ENABLE_REG_OFFSET) "(t1) \n\t"
// if tmr is not intended, pos_hmr_synch_check_dmr()
"beq t2, zero, pos_hmr_synch_check_dmr \n\t"
#endif // !ARCHI_HMR_TMR_ONLY
// Set up ra as barrier id
#if HMR_IN_INTERLEAVED // ra is barrier id
"li ra, " QU(NUM_TMR_GROUPS) " \n\t"
"remu ra, t0, ra \n\t"
"addi ra, ra, 1 \n\t"
#else
"li ra, 3 \n\t"
"divu ra, t0, ra \n\t"
"srli t2, ra, 1 \n\t"
"addi ra, ra, 1 \n\t"
"add ra, ra, t2 \n\t"
#endif // ra is barrier id
#ifndef ARCHI_HMR_NO_RAPID_RECOVERY
// if not main core, pos_hmr_synch_sw()
#if HMR_IN_INTERLEAVED
"li t2, " QU(NUM_TMR_GROUPS) " \n\t"
"bgeu t0, t2, pos_hmr_synch_sw \n\t"
#else
"li t2, 3 \n\t"
"divu t1, t0, t2 \n\t" // t1 is group id
"mul t2, t1, t2 \n\t"
"bneq t2, t0, pos_hmr_synch_sw \n\t"
#ifndef ARCHI_HMR_FORCE_RAPID
// Fix t1 base address
"slli t1, t1, " QU(HMR_TMR_SLL) " \n\t"
"li t2, " QU(ARCHI_HMR_ADDR + HMR_TMR_OFFSET) " \n\t"
"add t1, t1, t2 \n\t" // t1 is tmr base address
#endif // !ARCHI_HMR_FORCE_RAPID
#endif
#ifndef ARCHI_HMR_FORCE_RAPID
// if not rapidrecover, pos_hmr_synch_sw()
"lw t2, " QU(HMR_TMR_REGS_TMR_CONFIG_REG_OFFSET) "(t1) \n\t"
"andi t2, t2, " QU(1<<HMR_TMR_REGS_TMR_CONFIG_RAPID_RECOVERY_BIT) " \n\t"
"beq t2, zero, pos_hmr_synch_sw \n\t"
#endif
// This is main core in rapidrecover mode
"j pos_hmr_synch_rapid \n"
#else // ARCHI_HMR_NO_RAPID_RECOVERY
"j pos_hmr_synch_sw \n\t"
#endif // !ARCHI_HMR_NO_RAPID_RECOVERY
#endif // !ARCHI_HMR_DMR_ONLY
#ifndef ARCHI_HMR_TMR_ONLY
// Assume DMR! (we are not in TMR, but in reliability entry, so this is implied)
"pos_hmr_synch_check_dmr: \n\t"
// Set up ra as barrier id
#if HMR_IN_INTERLEAVED // ra is barrier id
"li t1, " QU(NUM_DMR_GROUPS) " \n\t"
"remu t1, t0, t1 \n\t"
"addi ra, t1, 1 \n"
#else
"srli t1, t0, 1 \n\t"
"addi ra, t1, 1 \n"
#endif
// get dmr offset of the id
#if HMR_IN_INTERLEAVED
"li t1, " QU(NUM_DMR_GROUPS) " \n\t"
"remu t1, t0, t1 \n\t"
#else
"srli t1, t0, 1 \n\t"
#endif // t1 is group id
// if not main core, pos_hmr_synch_sw()
#if HMR_IN_INTERLEAVED
"li t2, " QU(NUM_DMR_GROUPS) " \n\t"
"bgeu t0, t2, pos_hmr_synch_sw \n\t"
#else
"slli t1, t0, 1 \n\t" // t1 is group id
"srli t2, t1, 1 \n\t"
"bneq t2, t0, pos_hmr_synch_sw \n\t"
#endif
#ifndef ARCHI_HMR_FORCE_RAPID
// if not rapidrecover, pos_hmr_synch_sw()
"slli t1, t1, " QU(HMR_DMR_SLL) " \n\t"
"li t2, " QU(ARCHI_HMR_ADDR + HMR_DMR_OFFSET) " \n\t"
"add t1, t1, t2 \n\t" // t1 is dmr base address
"lw t2, " QU(HMR_DMR_REGS_DMR_CONFIG_REG_OFFSET) "(t1) \n\t"
"andi t2, t2, " QU(1<<HMR_DMR_REGS_DMR_CONFIG_RAPID_RECOVERY_BIT) " \n\t"
"beq t2, zero, pos_hmr_synch_sw \n\t"
#endif
#endif // !ARCHI_HMR_TMR_ONLY
#ifndef ARCHI_HMR_NO_RAPID_RECOVERY
// This is main core in rapidrecover mode
"pos_hmr_synch_rapid: \n\t"
"sll t1, ra, " QU(EU_BARRIER_SIZE_LOG2) " \n\t"
"li t2, " QU(ARCHI_EU_DEMUX_ADDR + EU_BARRIER_DEMUX_OFFSET) " \n\t" // t1 is tmr base address
"add t1, t1, t2 \n\t"
"p.elw zero, " QU(EU_HW_BARR_TRIGGER_WAIT_CLEAR) "(t1) \n\t" // barrier
/* Removing the following nops to allow the cores to continue executing */
// "nop\n\t"
// "nop\n\t"
// "nop\n\t"
// "nop\n\t"
// "nop\n\t"
"j pos_hmr_load_part_from_stack \n" // Executes mret
#endif // !ARCHI_HMR_NO_RAPID_RECOVERY
// Rest is the normal SW routine
"pos_hmr_synch_sw: \n\t"
: : : "memory");
// t0 is core_id
// ra is barrier id
pos_hmr_store_rest_to_stack(); // does not touch t0, ra
// store sp to hmr core reg
__asm__ __volatile__( // t0 is core_id
"li t1, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t"
"sll t2, t0, " QU(HMR_CORE_SLL) " \n\t"
"add t2, t2, t1 \n\t"
"sw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(t2) \n\t"
: : : "memory");
// enter barrier -> this should lock the cores together
__asm__ __volatile__( // ra is barrier id
"sll t1, ra, " QU(EU_BARRIER_SIZE_LOG2) " \n\t"
"li t2, " QU(ARCHI_EU_DEMUX_ADDR + EU_BARRIER_DEMUX_OFFSET) " \n\t"
"add t1, t1, t2 \n\t" // t1 is tmr base address
"p.elw zero, " QU(EU_HW_BARR_TRIGGER_WAIT_CLEAR) "(t1) \n\t" // barrier
: : : "memory");
// several nops to delay and allow for core reset
__asm__ __volatile__(
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
: : : "memory");
pos_hmr_sw_reload();
}
void __attribute__((naked)) pos_hmr_tmr_synch_entry() {
pos_hmr_store_part_to_stack();
pos_hmr_store_rest_to_stack();
// store sp to hmr core reg
__asm__ __volatile__(
"csrr t0, 0xf14 \n\t" // Read core id
"li t1, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t"
"andi t0, t0, 0x01f \n\t"
"sll t0, t0, " QU(HMR_CORE_SLL) " \n\t"
"add t0, t0, t1 \n\t"
"sw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(t0) \n\t"
: : : "memory");
}
void __attribute__((naked)) pos_hmr_tmr_synch_exit() {
// enter barrier -> this should lock the cores together
eu_bar_trig_wait_clr(eu_bar_addr(TMR_BARRIER_ID(TMR_GROUP_ID(core_id()))));
// several nops to delay and allow for core reset
__asm__ __volatile__(
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
: : : "memory");
pos_hmr_sw_reload();
}
void __attribute__((naked)) pos_hmr_tmr_synch() {
pos_hmr_tmr_synch_entry();
pos_hmr_tmr_synch_exit();
}
void __attribute__((naked)) pos_hmr_dmr_synch_entry() {
pos_hmr_store_part_to_stack();
pos_hmr_store_rest_to_stack();
// store sp to hmr core reg
__asm__ __volatile__(
"csrr t0, 0xf14 \n\t" // Read core id
"li t1, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t"
"andi t0, t0, 0x01f \n\t"
"sll t0, t0, " QU(HMR_CORE_SLL) " \n\t"
"add t0, t0, t1 \n\t"
"sw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(t0) \n\t"
: : : "memory");
}
void __attribute__((naked)) pos_hmr_dmr_synch_exit() {
// enter barrier -> this should lock the cores together
eu_bar_trig_wait_clr(eu_bar_addr(DMR_BARRIER_ID(DMR_GROUP_ID(core_id()))));
// several nops to delay and allow for core reset
__asm__ __volatile__(
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
: : : "memory");
pos_hmr_sw_reload();
}
void __attribute__((naked)) pos_hmr_dmr_synch() {
pos_hmr_dmr_synch_entry();
pos_hmr_dmr_synch_exit();
}
int hmr_tmr_critical_section(int (*function_handle)()) {
int ret = 0;
if (TMR_IS_MAIN_CORE(core_id())) {
// enter critical section
hmr_self_enable_tmr();
// do critical stuff
ret += function_handle();
// exit critical section
hmr_disable_tmr(0, TMR_GROUP_ID(core_id()));
}
return ret;
}
int hmr_dmr_critical_section(int (*function_handle)()) {
int ret = 0;
if (DMR_IS_MAIN_CORE(core_id())) {
// enter critical section
hmr_self_enable_dmr();
// do critical stuff
ret += function_handle();
// exit critical section
hmr_disable_dmr(0, DMR_GROUP_ID(core_id()));
}
}
void hmr_tmr_performance_section(void (*function_handle)()) {
volatile unsigned int tmr_group_id = TMR_GROUP_ID(core_id());
unsigned int tmr_config = hmr_get_tmr_config(0, tmr_group_id);
hmr_set_tmr_config_bare(0, tmr_group_id, tmr_config & ~(1<<HMR_REGISTERS_TMR_CONFIG_SETBACK_BIT));
register unsigned int my_core_id;
hmr_disable_tmr(0, tmr_group_id);
__asm__ __volatile__(
"nop\n\t"
"nop\n\t"
"nop\n\t"
"csrr %[core_id], 0xf14 \n\t" // Read core id
: [core_id] "=r" (my_core_id) : : "memory");
// volatile unsigned int my_core_id = core_id();
if (TMR_IS_MAIN_CORE(my_core_id)) {
hmr_set_tmr_config_bare(0, TMR_GROUP_ID(my_core_id), tmr_config);
} else {
// get sp from tmr reg
__asm__ __volatile__(
// "csrr a5, 0xf14 \n\t" // Read core id
"li sp, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t"
"andi %[core_id], %[core_id], 0x01f \n\t"
"sll %[core_id], %[core_id], " QU(HMR_CORE_SLL) " \n\t"
"add %[core_id], %[core_id], sp \n\t"
"lw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(%[core_id]) \n\t"
: : [core_id] "r" (my_core_id) : "memory");
eu_evt_maskSet((1<<PULP_DISPATCH_EVENT) | (1<<PULP_MUTEX_EVENT) | (1<<PULP_HW_BAR_EVENT));
}
function_handle();
if (TMR_IS_MAIN_CORE(core_id())) {
// Enable TMR
pulp_write32(ARCHI_HMR_ADDR + HMR_TMR_OFFSET + HMR_TMR_INCREMENT*core_id() + HMR_TMR_REGS_TMR_ENABLE_REG_OFFSET, 1<<HMR_TMR_REGS_TMR_ENABLE_TMR_ENABLE_BIT);
#if !defined(ARCHI_HMR_FORCE_RAPID) || !defined(ARCHI_HMR_NO_RAPID_RECOVERY)
// TODO check Rapid
}
__asm__ __volatile__(
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
);
eu_bar_trig_wait_clr(eu_bar_addr(TMR_BARRIER_ID(TMR_GROUP_ID(core_id()))));
__asm__ __volatile__(
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
);
#elif defined(ARCHI_HMR_FORCE_RAPID)
}
__asm__ __volatile__(
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
);
eu_bar_trig_wait_clr(eu_bar_addr(TMR_BARRIER_ID(TMR_GROUP_ID(core_id()))));
__asm__ __volatile__(
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
);
#else
pos_hmr_tmr_synch_entry();
// Ugly hack allows for proper cleanup of stack by function
__asm__ __volatile__(
"auipc t1, 0\n\t"
"addi t1, t1, 12\n\t" // Add instruction increment to after pos_hmr_tmr_synch_exit call
"sw t1, 0x78(sp) \n\t" // Update mepc on stack for return later
"j pos_hmr_tmr_synch_exit\n\t"
);
} else {
__asm__ __volatile__ (
"j pos_hmr_tmr_synch_exit\n\t");
}
#endif
}
void hmr_tmr_perf_setup_sp() {
unsigned int core_id_1 = TMR_CORE_ID(TMR_GROUP_ID(core_id()), 1);
unsigned int core_id_2 = TMR_CORE_ID(TMR_GROUP_ID(core_id()), 2);
unsigned int *extra_sp_1 = (unsigned int *)(ARCHI_HMR_ADDR + HMR_CORE_OFFSET + (core_id_1 << HMR_CORE_SLL) + HMR_CORE_REGS_SP_STORE_REG_OFFSET);
unsigned int *extra_sp_2 = (unsigned int *)(ARCHI_HMR_ADDR + HMR_CORE_OFFSET + (core_id_2 << HMR_CORE_SLL) + HMR_CORE_REGS_SP_STORE_REG_OFFSET);
eu_bar_setup(eu_bar_addr(TMR_BARRIER_ID(TMR_GROUP_ID(core_id()))), TMR_BARRIER_SETUP(TMR_GROUP_ID(core_id())));
pulp_write32(extra_sp_1, (unsigned int)((core_id_1+1)*CLUSTER_STACK_SIZE + cluster_stacks));
pulp_write32(extra_sp_2, (unsigned int)((core_id_2+1)*CLUSTER_STACK_SIZE + cluster_stacks));
}
void hmr_dmr_performance_section(void (*function_handle)()) {
volatile unsigned int dmr_group_id = DMR_GROUP_ID(core_id());
unsigned int dmr_config = hmr_get_dmr_config(0, dmr_group_id);
hmr_set_dmr_config_bare(0, dmr_group_id, dmr_config & ~(1<<HMR_DMR_REGS_DMR_CONFIG_SETBACK_BIT));
register unsigned int my_core_id;
hmr_disable_dmr(0, dmr_group_id);
__asm__ __volatile__(
"nop\n\t"
"nop\n\t"
"nop\n\t"
"csrr %[core_id], 0xf14 \n\t" // Read core id
: [core_id] "=r" (my_core_id) : : "memory");
// volatile unsigned int my_core_id = core_id();
if (DMR_IS_MAIN_CORE(my_core_id)) {
hmr_set_dmr_config_bare(0, DMR_GROUP_ID(my_core_id), dmr_config);
} else {
// get sp from dmr reg
__asm__ __volatile__(
// "csrr a5, 0xf14 \n\t" // Read core id
"li sp, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t"
"andi %[core_id], %[core_id], 0x01f \n\t"
"sll %[core_id], %[core_id], " QU(HMR_CORE_SLL) " \n\t"
"add %[core_id], %[core_id], sp \n\t"
"lw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(%[core_id]) \n\t"
: : [core_id] "r" (my_core_id) : "memory");
eu_evt_maskSet((1<<PULP_DISPATCH_EVENT) | (1<<PULP_MUTEX_EVENT) | (1<<PULP_HW_BAR_EVENT));
}
function_handle();
if (DMR_IS_MAIN_CORE(core_id())) {
// Enable TMR
pulp_write32(ARCHI_HMR_ADDR + HMR_DMR_OFFSET + HMR_DMR_INCREMENT*core_id() + HMR_DMR_REGS_DMR_ENABLE_REG_OFFSET, 1<<HMR_DMR_REGS_DMR_ENABLE_DMR_ENABLE_BIT);
#if !defined(ARCHI_HMR_FORCE_RAPID) || !defined(ARCHI_HMR_NO_RAPID_RECOVERY)
// TODO check Rapid
}
__asm__ __volatile__(
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
);
eu_bar_trig_wait_clr(eu_bar_addr(DMR_BARRIER_ID(DMR_GROUP_ID(core_id()))));
__asm__ __volatile__(
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
);
#elif defined(ARCHI_HMR_FORCE_RAPID)
}
__asm__ __volatile__(
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
);
eu_bar_trig_wait_clr(eu_bar_addr(DMR_BARRIER_ID(DMR_GROUP_ID(core_id()))));
__asm__ __volatile__(
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
);
#else
pos_hmr_dmr_synch_entry();
// Ugly hack allows for proper cleanup of stack by function
__asm__ __volatile__(
"auipc t1, 0\n\t"
"addi t1, t1, 12\n\t" // Add instruction increment to after pos_hmr_tmr_synch_exit call
"sw t1, 0x78(sp) \n\t" // Update mepc on stack for return later
"j pos_hmr_dmr_synch_exit\n\t"
);
} else {
__asm__ __volatile__ (
"j pos_hmr_dmr_synch_exit\n\t");
}
#endif
}
void hmr_dmr_perf_setup_sp() {
unsigned int core_id_1 = DMR_CORE_ID(DMR_GROUP_ID(core_id()), 1);
unsigned int *extra_sp_1 = (unsigned int *)(ARCHI_HMR_ADDR + HMR_CORE_OFFSET + (core_id_1 << HMR_CORE_SLL) + HMR_CORE_REGS_SP_STORE_REG_OFFSET);
eu_bar_setup(eu_bar_addr(DMR_BARRIER_ID(DMR_GROUP_ID(core_id()))), DMR_BARRIER_SETUP(DMR_GROUP_ID(core_id())));
pulp_write32(extra_sp_1, (unsigned int)((core_id_1+1)*CLUSTER_STACK_SIZE + cluster_stacks));
}
// void pos_hmr_tmr_unsync() {
// // Update event unit mask
// // write unsync to hmr tmr ctrl reg
// if (!TMR_IS_MAIN_CORE(core_id())) {
// // get sp from a core reg
// if (sp == 0) {
// j
// }
// pos_hmr_load_state_from_stack();
// // mret?
// }
// }
// void pos_hmr_create_checkpoint() {
// // get checkpoint addr (or alloc the space?) --> will be complex for stack...
// // pos_hmr_store_state_to(addr)
// // store addr to dmr reg? --> need to properly manage this...
// }
// void pos_hmr_load_checkpoint() {
// // load addr from dmr reg?
// // pos_hmr_load_state_from(addr)
// // mret? ret?
// }