mirror of
https://github.com/saymrwulf/pulp-runtime.git
synced 2026-05-26 22:35:49 +00:00
Add DMR performance section code
This commit is contained in:
parent
f754455e34
commit
4b8bebae9a
4 changed files with 169 additions and 20 deletions
|
|
@ -80,6 +80,8 @@ extern "C" {
|
|||
#define HMR_REGISTERS_DMR_CONFIG_REG_OFFSET 0x10
|
||||
#define HMR_REGISTERS_DMR_CONFIG_RAPID_RECOVERY_BIT 0
|
||||
#define HMR_REGISTERS_DMR_CONFIG_FORCE_RECOVERY_BIT 1
|
||||
#define HMR_REGISTERS_DMR_CONFIG_SETBACK_BIT 2
|
||||
#define HMR_REGISTERS_DMR_CONFIG_SYNCH_REQ_BIT 3
|
||||
|
||||
// TMR configuration bits.
|
||||
#define HMR_REGISTERS_TMR_CONFIG_REG_OFFSET 0x14
|
||||
|
|
@ -144,7 +146,8 @@ extern "C" {
|
|||
#define HMR_DMR_REGS_DMR_CONFIG_REG_OFFSET 0x4
|
||||
#define HMR_DMR_REGS_DMR_CONFIG_RAPID_RECOVERY_BIT 0
|
||||
#define HMR_DMR_REGS_DMR_CONFIG_FORCE_RECOVERY_BIT 1
|
||||
#define HMR_DMR_REGS_DMR_CONFIG_SETBACK_BIT 1
|
||||
#define HMR_DMR_REGS_DMR_CONFIG_SETBACK_BIT 2
|
||||
#define HMR_DMR_REGS_DMR_CONFIG_SYNCH_REQ_BIT 3
|
||||
|
||||
// Address for the last checkpoint.
|
||||
#define HMR_DMR_REGS_CHECKPOINT_ADDR_REG_OFFSET 0x8
|
||||
|
|
|
|||
|
|
@ -94,14 +94,26 @@ static inline void hmr_disable_dmr(unsigned int cid, unsigned int dmr_id) {
|
|||
pulp_write32(ARCHI_HMR_GLOBAL_ADDR(cid) + HMR_DMR_OFFSET + HMR_DMR_INCREMENT*dmr_id + HMR_DMR_REGS_DMR_ENABLE_REG_OFFSET, 0);
|
||||
}
|
||||
|
||||
static inline void hmr_set_dmr_config(unsigned int cid, unsigned int dmr_id, bool rapid_recovery) {
|
||||
static inline void hmr_set_dmr_config(unsigned int cid, unsigned int dmr_id, bool rapid_recovery, bool setback, bool synch_req) {
|
||||
pulp_write32(ARCHI_HMR_GLOBAL_ADDR(cid) + HMR_DMR_OFFSET + HMR_DMR_INCREMENT*dmr_id + HMR_DMR_REGS_DMR_CONFIG_REG_OFFSET,
|
||||
(rapid_recovery ? 1<<HMR_DMR_REGS_DMR_CONFIG_RAPID_RECOVERY_BIT : 0));
|
||||
(rapid_recovery ? 1<<HMR_DMR_REGS_DMR_CONFIG_RAPID_RECOVERY_BIT : 0) |
|
||||
(setback ? 1<<HMR_DMR_REGS_DMR_CONFIG_SETBACK_BIT : 0) |
|
||||
(synch_req ? 1<<HMR_DMR_REGS_DMR_CONFIG_SYNCH_REQ_BIT : 0));
|
||||
}
|
||||
|
||||
static inline void hmr_set_dmr_config_all(unsigned int cid, bool rapid_recovery) {
|
||||
static inline void hmr_set_dmr_config_all(unsigned int cid, bool rapid_recovery, bool setback, bool synch_req) {
|
||||
pulp_write32(ARCHI_HMR_GLOBAL_ADDR(cid) + HMR_TOP_OFFSET + HMR_REGISTERS_DMR_CONFIG_REG_OFFSET,
|
||||
(rapid_recovery ? 1<<HMR_REGISTERS_DMR_CONFIG_RAPID_RECOVERY_BIT : 0));
|
||||
(rapid_recovery ? 1<<HMR_REGISTERS_DMR_CONFIG_RAPID_RECOVERY_BIT : 0) |
|
||||
(setback ? 1<<HMR_REGISTERS_DMR_CONFIG_SETBACK_BIT : 0) |
|
||||
(synch_req ? 1<<HMR_REGISTERS_DMR_CONFIG_SYNCH_REQ_BIT : 0));
|
||||
}
|
||||
|
||||
static inline unsigned int hmr_get_dmr_config(unsigned int cid, unsigned int dmr_id) {
|
||||
return pulp_read32(ARCHI_HMR_GLOBAL_ADDR(0) + HMR_DMR_OFFSET + HMR_DMR_INCREMENT*dmr_id + HMR_DMR_REGS_DMR_CONFIG_REG_OFFSET);
|
||||
}
|
||||
|
||||
static inline void hmr_set_dmr_config_bare(unsigned int cid, unsigned int dmr_id, unsigned int config) {
|
||||
pulp_write32(ARCHI_HMR_GLOBAL_ADDR(0) + HMR_DMR_OFFSET + HMR_DMR_INCREMENT*dmr_id + HMR_DMR_REGS_DMR_CONFIG_REG_OFFSET, config);
|
||||
}
|
||||
|
||||
static inline void hmr_force_dmr_resynch(unsigned int cid, unsigned int dmr_id) {
|
||||
|
|
|
|||
|
|
@ -48,12 +48,15 @@ void __attribute__((naked)) pos_hmr_synch();
|
|||
void __attribute__((naked)) pos_hmr_tmr_synch_entry();
|
||||
void __attribute__((naked)) pos_hmr_tmr_synch_exit();
|
||||
void __attribute__((naked)) pos_hmr_tmr_synch();
|
||||
void __attribute__((naked)) pos_hmr_dmr_synch_entry();
|
||||
void __attribute__((naked)) pos_hmr_dmr_synch_exit();
|
||||
void __attribute__((naked)) pos_hmr_dmr_synch();
|
||||
int hmr_tmr_critical_section(int (*function_handle)());
|
||||
int hmr_dmr_critical_section(int (*function_handle)());
|
||||
void hmr_tmr_performance_section(void (*function_handle)());
|
||||
void hmr_tmr_perf_setup_sp();
|
||||
|
||||
void hmr_dmr_performance_section(void (*function_handle)());
|
||||
void hmr_dmr_perf_setup_sp();
|
||||
#endif
|
||||
|
||||
#ifdef ARCHI_HAS_CLUSTER
|
||||
|
|
|
|||
|
|
@ -445,8 +445,7 @@ void __attribute__((naked)) pos_hmr_tmr_synch() {
|
|||
pos_hmr_tmr_synch_exit();
|
||||
}
|
||||
|
||||
|
||||
void __attribute__((naked)) pos_hmr_dmr_synch() {
|
||||
void __attribute__((naked)) pos_hmr_dmr_synch_entry() {
|
||||
pos_hmr_store_part_to_stack();
|
||||
pos_hmr_store_rest_to_stack();
|
||||
|
||||
|
|
@ -459,7 +458,9 @@ void __attribute__((naked)) pos_hmr_dmr_synch() {
|
|||
"add t0, t0, t1 \n\t"
|
||||
"sw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(t0) \n\t"
|
||||
: : : "memory");
|
||||
}
|
||||
|
||||
void __attribute__((naked)) pos_hmr_dmr_synch_exit() {
|
||||
// enter barrier -> this should lock the cores together
|
||||
eu_bar_trig_wait_clr(eu_bar_addr(DMR_BARRIER_ID(DMR_GROUP_ID(core_id()))));
|
||||
|
||||
|
|
@ -475,6 +476,11 @@ void __attribute__((naked)) pos_hmr_dmr_synch() {
|
|||
pos_hmr_sw_reload();
|
||||
}
|
||||
|
||||
void __attribute__((naked)) pos_hmr_dmr_synch() {
|
||||
pos_hmr_dmr_synch_entry();
|
||||
pos_hmr_dmr_synch_exit();
|
||||
}
|
||||
|
||||
int hmr_tmr_critical_section(int (*function_handle)()) {
|
||||
int ret = 0;
|
||||
if (TMR_IS_MAIN_CORE(core_id())) {
|
||||
|
|
@ -507,10 +513,6 @@ int hmr_dmr_critical_section(int (*function_handle)()) {
|
|||
}
|
||||
|
||||
void hmr_tmr_performance_section(void (*function_handle)()) {
|
||||
__asm__ __volatile__(
|
||||
"la ra, pos_hmr_perf_help\n\t"
|
||||
"csrw 0x341, ra\n\t"
|
||||
);
|
||||
volatile unsigned int tmr_group_id = TMR_GROUP_ID(core_id());
|
||||
unsigned int tmr_config = hmr_get_tmr_config(0, tmr_group_id);
|
||||
hmr_set_tmr_config_bare(0, tmr_group_id, tmr_config & ~(1<<HMR_REGISTERS_TMR_CONFIG_SETBACK_BIT));
|
||||
|
|
@ -529,7 +531,7 @@ void hmr_tmr_performance_section(void (*function_handle)()) {
|
|||
if (TMR_IS_MAIN_CORE(my_core_id)) {
|
||||
hmr_set_tmr_config_bare(0, TMR_GROUP_ID(my_core_id), tmr_config);
|
||||
} else {
|
||||
// get sp from tmr reg, assumes a5 is empty
|
||||
// get sp from tmr reg
|
||||
__asm__ __volatile__(
|
||||
// "csrr a5, 0xf14 \n\t" // Read core id
|
||||
"li sp, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t"
|
||||
|
|
@ -544,16 +546,47 @@ void hmr_tmr_performance_section(void (*function_handle)()) {
|
|||
function_handle();
|
||||
|
||||
if (TMR_IS_MAIN_CORE(core_id())) {
|
||||
// Enable TMR
|
||||
pulp_write32(ARCHI_HMR_ADDR + HMR_TMR_OFFSET + HMR_TMR_INCREMENT*core_id() + HMR_TMR_REGS_TMR_ENABLE_REG_OFFSET, 1<<HMR_TMR_REGS_TMR_ENABLE_TMR_ENABLE_BIT);
|
||||
|
||||
#if !defined(ARCHI_HMR_FORCE_RAPID) || !defined(ARCHI_HMR_NO_RAPID_RECOVERY)
|
||||
// TODO check Rapid
|
||||
|
||||
}
|
||||
__asm__ __volatile__(
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
);
|
||||
eu_bar_trig_wait_clr(eu_bar_addr(TMR_BARRIER_ID(TMR_GROUP_ID(core_id()))));
|
||||
__asm__ __volatile__(
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
);
|
||||
#elif defined(ARCHI_HMR_FORCE_RAPID)
|
||||
|
||||
}
|
||||
__asm__ __volatile__(
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
);
|
||||
eu_bar_trig_wait_clr(eu_bar_addr(TMR_BARRIER_ID(TMR_GROUP_ID(core_id()))));
|
||||
__asm__ __volatile__(
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
);
|
||||
#else
|
||||
|
||||
#endif
|
||||
|
||||
pos_hmr_tmr_synch_entry();
|
||||
|
||||
// Ugly hack allows for proper cleanup of stack by function
|
||||
|
|
@ -567,10 +600,9 @@ void hmr_tmr_performance_section(void (*function_handle)()) {
|
|||
__asm__ __volatile__ (
|
||||
"j pos_hmr_tmr_synch_exit\n\t");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void __attribute__((naked)) pos_hmr_perf_help() {}
|
||||
|
||||
void hmr_tmr_perf_setup_sp() {
|
||||
unsigned int core_id_1 = TMR_CORE_ID(TMR_GROUP_ID(core_id()), 1);
|
||||
unsigned int core_id_2 = TMR_CORE_ID(TMR_GROUP_ID(core_id()), 2);
|
||||
|
|
@ -582,6 +614,105 @@ void hmr_tmr_perf_setup_sp() {
|
|||
pulp_write32(extra_sp_2, (unsigned int)((core_id_2+1)*CLUSTER_STACK_SIZE + cluster_stacks));
|
||||
}
|
||||
|
||||
void hmr_dmr_performance_section(void (*function_handle)()) {
|
||||
volatile unsigned int dmr_group_id = DMR_GROUP_ID(core_id());
|
||||
unsigned int dmr_config = hmr_get_dmr_config(0, dmr_group_id);
|
||||
hmr_set_dmr_config_bare(0, dmr_group_id, dmr_config & ~(1<<HMR_DMR_REGS_DMR_CONFIG_SETBACK_BIT));
|
||||
|
||||
register unsigned int my_core_id;
|
||||
|
||||
hmr_disable_dmr(0, dmr_group_id);
|
||||
|
||||
__asm__ __volatile__(
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"csrr %[core_id], 0xf14 \n\t" // Read core id
|
||||
: [core_id] "=r" (my_core_id) : : "memory");
|
||||
// volatile unsigned int my_core_id = core_id();
|
||||
if (DMR_IS_MAIN_CORE(my_core_id)) {
|
||||
hmr_set_dmr_config_bare(0, DMR_GROUP_ID(my_core_id), dmr_config);
|
||||
} else {
|
||||
// get sp from dmr reg
|
||||
__asm__ __volatile__(
|
||||
// "csrr a5, 0xf14 \n\t" // Read core id
|
||||
"li sp, " QU(ARCHI_HMR_ADDR + HMR_CORE_OFFSET) " \n\t"
|
||||
"andi %[core_id], %[core_id], 0x01f \n\t"
|
||||
"sll %[core_id], %[core_id], " QU(HMR_CORE_SLL) " \n\t"
|
||||
"add %[core_id], %[core_id], sp \n\t"
|
||||
"lw sp, " QU(HMR_CORE_REGS_SP_STORE_REG_OFFSET) "(%[core_id]) \n\t"
|
||||
: : [core_id] "r" (my_core_id) : "memory");
|
||||
eu_evt_maskSet((1<<PULP_DISPATCH_EVENT) | (1<<PULP_MUTEX_EVENT) | (1<<PULP_HW_BAR_EVENT));
|
||||
}
|
||||
|
||||
function_handle();
|
||||
|
||||
if (DMR_IS_MAIN_CORE(core_id())) {
|
||||
// Enable TMR
|
||||
pulp_write32(ARCHI_HMR_ADDR + HMR_DMR_OFFSET + HMR_DMR_INCREMENT*core_id() + HMR_DMR_REGS_DMR_ENABLE_REG_OFFSET, 1<<HMR_DMR_REGS_DMR_ENABLE_DMR_ENABLE_BIT);
|
||||
|
||||
#if !defined(ARCHI_HMR_FORCE_RAPID) || !defined(ARCHI_HMR_NO_RAPID_RECOVERY)
|
||||
// TODO check Rapid
|
||||
}
|
||||
__asm__ __volatile__(
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
);
|
||||
eu_bar_trig_wait_clr(eu_bar_addr(DMR_BARRIER_ID(DMR_GROUP_ID(core_id()))));
|
||||
__asm__ __volatile__(
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
);
|
||||
#elif defined(ARCHI_HMR_FORCE_RAPID)
|
||||
}
|
||||
__asm__ __volatile__(
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
);
|
||||
eu_bar_trig_wait_clr(eu_bar_addr(DMR_BARRIER_ID(DMR_GROUP_ID(core_id()))));
|
||||
__asm__ __volatile__(
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
"nop\n\t"
|
||||
);
|
||||
#else
|
||||
pos_hmr_dmr_synch_entry();
|
||||
|
||||
// Ugly hack allows for proper cleanup of stack by function
|
||||
__asm__ __volatile__(
|
||||
"auipc t1, 0\n\t"
|
||||
"addi t1, t1, 12\n\t" // Add instruction increment to after pos_hmr_tmr_synch_exit call
|
||||
"sw t1, 0x78(sp) \n\t" // Update mepc on stack for return later
|
||||
"j pos_hmr_dmr_synch_exit\n\t"
|
||||
);
|
||||
} else {
|
||||
__asm__ __volatile__ (
|
||||
"j pos_hmr_dmr_synch_exit\n\t");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void hmr_dmr_perf_setup_sp() {
|
||||
unsigned int core_id_1 = DMR_CORE_ID(DMR_GROUP_ID(core_id()), 1);
|
||||
unsigned int *extra_sp_1 = (unsigned int *)(ARCHI_HMR_ADDR + HMR_CORE_OFFSET + (core_id_1 << HMR_CORE_SLL) + HMR_CORE_REGS_SP_STORE_REG_OFFSET);
|
||||
eu_bar_setup(eu_bar_addr(DMR_BARRIER_ID(DMR_GROUP_ID(core_id()))), DMR_BARRIER_SETUP(DMR_GROUP_ID(core_id())));
|
||||
|
||||
pulp_write32(extra_sp_1, (unsigned int)((core_id_1+1)*CLUSTER_STACK_SIZE + cluster_stacks));
|
||||
}
|
||||
|
||||
// void pos_hmr_tmr_unsync() {
|
||||
|
||||
// // Update event unit mask
|
||||
|
|
|
|||
Loading…
Reference in a new issue