diff --git a/include/archi/chips/pulp/pulp.h b/include/archi/chips/pulp/pulp.h index 7c7c016..72bf38e 100644 --- a/include/archi/chips/pulp/pulp.h +++ b/include/archi/chips/pulp/pulp.h @@ -23,7 +23,11 @@ #include "archi/gpio/gpio_v3.h" #include "archi/riscv/priv_1_10.h" +#ifdef __ibex__ +#include "archi/ibex/mhpm.h" +#else // __ibex__ #include "archi/riscv/pcer_v2.h" +#endif // __ibex__ #include "archi/itc/itc_v1.h" #include "archi/chips/pulp/memory_map.h" diff --git a/include/archi/ibex/mhpm.h b/include/archi/ibex/mhpm.h new file mode 100644 index 0000000..5520e47 --- /dev/null +++ b/include/archi/ibex/mhpm.h @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2020 ETH Zurich and University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _ARCHI_IBEX_MHPM_H +#define _ARCHI_IBEX_MHPM_H + +/* + * Bit definitions for Performance counters mode registers + * + */ + +#define IBEX_MHPMCOUNTER_BASE 0xB00 +// Currently the additional register for more perf. counting is not yet implemented + +// ibex performance counters +#define IBEX_NumCycles 0 /* Number of cycles */ +#define IBEX_NumInstrRet 2 /* Number of instructions retired */ +#define IBEX_NumCyclesLSU 3 /* Number of cycles waiting for data memory */ +#define IBEX_NumCyclesIF 4 /* Cycles waiting for instruction fetches, i.e., number of instructions wasted due to non-ideal caching */ +#define IBEX_NumLoads 5 /* Number of data memory loads. Misaligned accesses are counted as two accesses */ +#define IBEX_NumStores 6 /* Number of data memory stores. Misaligned accesses are counted as two accesses */ +#define IBEX_NumJumps 7 /* Number of unconditional jumps (j, jal, jr, jalr) */ +#define IBEX_NumBranches 8 /* Number of branches (conditional) */ +#define IBEX_NumBranchesTaken 9 /* Number of taken branches (conditional) */ +#define IBEX_NumInstrRetC 10 /* Number of compressed instructions retired */ +#define IBEX_NumCyclesMulWait 11 /* Cycles waiting for multiply to complete */ +#define IBEX_NumCyclesDivWait 12 /* Cycles waiting for divide to complete */ + + +// riscv map for ibex counters +#define CSR_PCER_CYCLES IBEX_NumCycles /* Count the number of cycles the core was running */ +#define CSR_PCER_INSTR IBEX_NumInstrRet /* Count the number of instructions executed */ +#define CSR_PCER_LD_STALL IBEX_NumCyclesLSU /* Number of load use hazards */ +#define CSR_PCER_IMISS IBEX_NumCyclesIF /* Cycles waiting for instruction fetches. i.e. the number of instructions wasted due to non-ideal caches */ +#define CSR_PCER_LD IBEX_NumLoads /* Number of memory loads executed. Misaligned accesses are counted twice */ +#define CSR_PCER_ST IBEX_NumStores /* Number of memory stores executed. Misaligned accesses are counted twice */ +#define CSR_PCER_JUMP IBEX_NumJumps /* Number of jump instructions seen, i.e. j, jr, jal, jalr */ +#define CSR_PCER_BRANCH IBEX_NumBranches /* Number of branch instructions seen, i.e. bf, bnf */ +#define CSR_PCER_TAKEN_BRANCH IBEX_NumBranchesTaken /* Number of taken branch instructions seen, i.e. bf, bnf */ +#define CSR_PCER_RVC IBEX_NumInstrRetC /* Number of compressed instructions */ +// Not implemented in ibex +// #define CSR_PCER_ELW 0 /* Cycles wasted due to ELW instruction */ +// #define CSR_PCER_JMP_STALL 0 /* Number of jump register hazards */ + + +// External perf counters +#define IBEX_External_BASE 13 +#define CSR_PCER_LD_EXT 13 /* Number of memory loads to EXT executed. Misaligned accesses are counted twice. Every non-TCDM access is considered external */ +#define CSR_PCER_ST_EXT 14 /* Number of memory stores to EXT executed. Misaligned accesses are counted twice. Every non-TCDM access is considered external */ +#define CSR_PCER_LD_EXT_CYC 15 /* Cycles used for memory loads to EXT. Every non-TCDM access is considered external */ +#define CSR_PCER_ST_EXT_CYC 16 /* Cycles used for memory stores to EXT. Every non-TCDM access is considered external */ +#define CSR_PCER_TCDM_CONT 17 /* Cycles wasted due to TCDM/log-interconnect contention */ + + +#define CSR_PCER_NB_EVENTS 16 +#define CSR_PCER_TOP_EVENT 17 +#define CSR_PCER_NB_INTERNAL_EVENTS 11 +#define CSR_PCER_TOP_INTERNAL_EVENTS 12 +#define CSR_NB_PCCR 32 + +// Gives from the event ID, the HW mask that can be stored (with an OR with other events mask) to the PCER +#define CSR_PCER_EVENT_MASK(eventId) (1<<(eventId)) +#define CSR_PCER_ALL_EVENTS_MASK 0xffffffff + +#define CSR_PCER_NAME(id) \ + ( id == IBEX_NumCycles ? "Cycles" : \ + id == IBEX_NumInstrRet ? "Instructions" : \ + id == IBEX_NumCyclesLSU ? "LD_Stall" : \ + id == IBEX_NumCyclesIF ? "IMISS" : \ + id == IBEX_NumLoads ? "LD" : \ + id == IBEX_NumStores ? "ST" : \ + id == IBEX_NumJumps ? "JUMP" : \ + id == IBEX_NumBranches ? "BRANCH" : \ + id == IBEX_NumBranchesTaken ? "TAKEN_BRANCH" : \ + id == IBEX_NumInstrRetC ? "RVC" : \ + id == 16 ? "LD_EXT" : \ + id == 17 ? "ST_EXT" : \ + id == 18 ? "LD_EXT_CYC" : \ + id == 19 ? "ST_EXT_CYC" : \ + id == 20 ? "TCDM_CONT" : \ + "NA") + +#endif diff --git a/include/archi/riscv/pcer_v2.h b/include/archi/riscv/pcer_v2.h index c329258..6b1afeb 100644 --- a/include/archi/riscv/pcer_v2.h +++ b/include/archi/riscv/pcer_v2.h @@ -21,23 +21,23 @@ * Bit definitions for Performance counters mode registers * */ -#define CSR_PCER_CYCLES 0 /* Count the number of cycles the core was running */ -#define CSR_PCER_INSTR 1 /* Count the number of instructions executed */ -#define CSR_PCER_LD_STALL 2 /* Number of load use hazards */ -#define CSR_PCER_JMP_STALL 3 /* Number of jump register hazards */ -#define CSR_PCER_IMISS 4 /* Cycles waiting for instruction fetches. i.e. the number of instructions wasted due to non-ideal caches */ -#define CSR_PCER_LD 5 /* Number of memory loads executed. Misaligned accesses are counted twice */ -#define CSR_PCER_ST 6 /* Number of memory stores executed. Misaligned accesses are counted twice */ -#define CSR_PCER_JUMP 7 /* Number of jump instructions seen, i.e. j, jr, jal, jalr */ -#define CSR_PCER_BRANCH 8 /* Number of branch instructions seen, i.e. bf, bnf */ -#define CSR_PCER_TAKEN_BRANCH 9 /* Number of taken branch instructions seen, i.e. bf, bnf */ -#define CSR_PCER_RVC 10 /* Number of compressed instructions */ -#define CSR_PCER_ELW 11 /* Cycles wasted due to ELW instruction */ -#define CSR_PCER_LD_EXT 12 /* Number of memory loads to EXT executed. Misaligned accesses are counted twice. Every non-TCDM access is considered external */ -#define CSR_PCER_ST_EXT 13 /* Number of memory stores to EXT executed. Misaligned accesses are counted twice. Every non-TCDM access is considered external */ -#define CSR_PCER_LD_EXT_CYC 14 /* Cycles used for memory loads to EXT. Every non-TCDM access is considered external */ -#define CSR_PCER_ST_EXT_CYC 15 /* Cycles used for memory stores to EXT. Every non-TCDM access is considered external */ -#define CSR_PCER_TCDM_CONT 16 /* Cycles wasted due to TCDM/log-interconnect contention */ +#define CSR_PCER_CYCLES 0 /* Count the number of cycles the core was running */ +#define CSR_PCER_INSTR 1 /* Count the number of instructions executed */ +#define CSR_PCER_LD_STALL 2 /* Number of load use hazards */ +#define CSR_PCER_JMP_STALL 3 /* Number of jump register hazards */ +#define CSR_PCER_IMISS 4 /* Cycles waiting for instruction fetches. i.e. the number of instructions wasted due to non-ideal caches */ +#define CSR_PCER_LD 5 /* Number of memory loads executed. Misaligned accesses are counted twice */ +#define CSR_PCER_ST 6 /* Number of memory stores executed. Misaligned accesses are counted twice */ +#define CSR_PCER_JUMP 7 /* Number of jump instructions seen, i.e. j, jr, jal, jalr */ +#define CSR_PCER_BRANCH 8 /* Number of branch instructions seen, i.e. bf, bnf */ +#define CSR_PCER_TAKEN_BRANCH 9 /* Number of taken branch instructions seen, i.e. bf, bnf */ +#define CSR_PCER_RVC 10 /* Number of compressed instructions */ +#define CSR_PCER_ELW 11 /* Cycles wasted due to ELW instruction */ +#define CSR_PCER_LD_EXT 12 /* Number of memory loads to EXT executed. Misaligned accesses are counted twice. Every non-TCDM access is considered external */ +#define CSR_PCER_ST_EXT 13 /* Number of memory stores to EXT executed. Misaligned accesses are counted twice. Every non-TCDM access is considered external */ +#define CSR_PCER_LD_EXT_CYC 14 /* Cycles used for memory loads to EXT. Every non-TCDM access is considered external */ +#define CSR_PCER_ST_EXT_CYC 15 /* Cycles used for memory stores to EXT. Every non-TCDM access is considered external */ +#define CSR_PCER_TCDM_CONT 16 /* Cycles wasted due to TCDM/log-interconnect contention */ #define CSR_PCER_NB_EVENTS 17 @@ -51,6 +51,24 @@ #define CSR_PCMR_ACTIVE 0x1 /* Activate counting */ #define CSR_PCMR_SATURATE 0x2 /* Activate saturation */ -#define CSR_PCER_NAME(id) (id == 0 ? "Cycles" : id == 1 ? "Instructions" : id == 2 ? "LD_Stall" : id == 3 ? "Jmp_Stall" : id == 4 ? "IMISS" : id == 5 ? "LD" : id == 6 ? "ST" : id == 7 ? "JUMP" : id == 8 ? "BRANCH" : id == 9 ? "TAKEN_BRANCH" : id == 10 ? "RVC" : id == 11 ? "ELW" : id == 12 ? "LD_EXT" : id == 13 ? "ST_EXT" : id == 14 ? "LD_EXT_CYC" : id == 15 ? "ST_EXT_CYC" : id == 16 ? "TCDM_CONT" : "NA") +#define CSR_PCER_NAME(id) \ + ( id == 0 ? "Cycles" : \ + id == 1 ? "Instructions" : \ + id == 2 ? "LD_Stall" : \ + id == 3 ? "Jmp_Stall" : \ + id == 4 ? "IMISS" : \ + id == 5 ? "LD" : \ + id == 6 ? "ST" : \ + id == 7 ? "JUMP" : \ + id == 8 ? "BRANCH" : \ + id == 9 ? "TAKEN_BRANCH" : \ + id == 10 ? "RVC" : \ + id == 11 ? "ELW" : \ + id == 12 ? "LD_EXT" : \ + id == 13 ? "ST_EXT" : \ + id == 14 ? "LD_EXT_CYC" : \ + id == 15 ? "ST_EXT_CYC" : \ + id == 16 ? "TCDM_CONT" : \ + "NA") #endif diff --git a/include/bench/bench.h b/include/bench/bench.h index 58be0ea..429012f 100755 --- a/include/bench/bench.h +++ b/include/bench/bench.h @@ -143,6 +143,8 @@ static inline void perf_start(void) { #ifdef __riscv__ cpu_perf_conf_events(CSR_PCER_ALL_EVENTS_MASK); cpu_perf_conf(CSR_PCMR_ACTIVE | CSR_PCMR_SATURATE); +#elif defined(__ibex__) + cpu_perf_start(); #else cpu_perf_conf_events(SPR_PCER_ALL_EVENTS_MASK); cpu_perf_conf(SPR_PCMR_ACTIVE | SPR_PCMR_SATURATE); @@ -159,7 +161,7 @@ static inline void perf_start(void) { */ static inline void perf_stop(void) { #ifdef CSR_PCER_ALL_EVENTS_MASK - cpu_perf_conf(0); + cpu_perf_stop(); #endif // TODO this is failing on most targets, please include that also for specific ones #if 0 @@ -172,7 +174,9 @@ static inline void perf_stop(void) { */ static inline void perf_reset(void) { #ifdef CSR_PCER_ALL_EVENTS_MASK + cpu_perf_stop(); cpu_perf_setall(0); + cpu_perf_start(); #endif // TODO this is failing on most targets, please include that also for specific ones #if 0 @@ -188,6 +192,8 @@ static inline void perf_enable_id( int eventid){ #ifdef __riscv__ cpu_perf_conf_events(CSR_PCER_EVENT_MASK(eventid)); cpu_perf_conf(CSR_PCMR_ACTIVE | CSR_PCMR_SATURATE); +#elif defined(__ibex__) + cpu_perf_conf_events(CSR_PCER_EVENT_MASK(eventid)); #else cpu_perf_conf_events(SPR_PCER_EVENT_MASK(eventid)); cpu_perf_conf(SPR_PCMR_ACTIVE | SPR_PCMR_SATURATE); diff --git a/include/hal/chips/pulp/pulp.h b/include/hal/chips/pulp/pulp.h index 3560030..5b12fb6 100644 --- a/include/hal/chips/pulp/pulp.h +++ b/include/hal/chips/pulp/pulp.h @@ -17,7 +17,11 @@ #ifndef __HAL_CHIPS_PULP_PULP_H__ #define __HAL_CHIPS_PULP_PULP_H__ +#ifdef __ibex__ +#include "hal/ibex/ibex.h" +#else // __ibex__ #include "hal/riscv/riscv_v5.h" +#endif // __ibex__ #include "hal/eu/eu_v3.h" #include "hal/itc/itc_v1.h" #include "hal/dma/mchan_v7.h" diff --git a/include/hal/ibex/ibex.h b/include/hal/ibex/ibex.h new file mode 100644 index 0000000..ce3a68d --- /dev/null +++ b/include/hal/ibex/ibex.h @@ -0,0 +1,293 @@ +/* + * Copyright (C) 2020 ETH Zurich and University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __HAL_IBEX_IBEX_H__ +#define __HAL_IBEX_IBEX_H__ + +#include "archi/pulp.h" + +#include "archi/riscv/builtins_v2_emu.h" + +// For PULP, ibex added non-standard irqs to allow for 32 fast interrupts. +// These use custom CSRs: MIE: 0x7D0, MTVEC: 0x7D1, MIP: 0x7D2 +#define SR_MTVEC 0x7D1 + +#define hal_spr_read_then_clr(reg,val) \ + ({ \ + int state; \ + asm volatile ("csrrc %0, %1, %2" : "=r" (state) : "I" (reg), "I" (val) ); \ + state; \ + }) + +#define hal_spr_read_then_set(reg,val) \ + ({ \ + int state; \ + asm volatile ("csrrs %0, %1, %2" : "=r" (state) : "I" (reg), "I" (val) ); \ + state; \ + }) + +#define hal_spr_read_then_clr_from_reg(reg,val) \ + ({ \ + int state; \ + asm volatile ("csrrc %0, %1, %2" : "=r" (state) : "I" (reg), "r" (val) ); \ + state; \ + }) + +#define hal_spr_read_then_set(reg,val) \ + ({ \ + int state; \ + asm volatile ("csrrs %0, %1, %2" : "=r" (state) : "I" (reg), "I" (val) ); \ + state; \ + }) + +#define hal_spr_read_then_set_from_reg(reg,val) \ + ({ \ + int state; \ + asm volatile ("csrrs %0, %1, %2" : "=r" (state) : "I" (reg), "r" (val) ); \ + state; \ + }) + +#define hal_spr_write(reg,val) \ +({do { \ + asm volatile ("csrw %0, %1" : : "I" (reg), "r" (val) ); \ +} while(0); \ +}) + +#define hal_spr_read(reg) \ +({ \ + int result; \ + asm volatile ("csrr %0, %1" : "=r" (result) : "I" (reg) ); \ + result; \ +}) + +#define hal_mepc_read() hal_spr_read(RV_CSR_MEPC) + +static inline unsigned int core_id() { + int hart_id; + asm("csrr %0, 0xF14" : "=r" (hart_id) : ); + // in PULP the hart id is {22'b0, cluster_id, core_id} + return hart_id & 0x01f; +} + +static inline unsigned int cluster_id() { + int hart_id; + asm("csrr %0, 0xF14" : "=r" (hart_id) : ); + // in PULP the hart id is {22'b0, cluster_id, core_id} + return (hart_id >> 5) & 0x3f; +} + +static inline unsigned int hal_core_id() { + return core_id(); +} + +static inline unsigned int hal_cluster_id() { + return cluster_id(); +} + +// TODO replace by compiler builtin +static inline __attribute__((always_inline)) unsigned int hal_has_fc() { +#ifdef ARCHI_HAS_FC + return 1; +#else + return 0; +#endif +} + +static inline __attribute__((always_inline)) unsigned int hal_is_fc() { +#ifndef ARCHI_HAS_FC + return 0; +#else + if (hal_has_fc()) return hal_cluster_id() == ARCHI_FC_CID; + else return 0; +#endif +} + +static inline int hal_irq_disable() +{ + int irq = hal_spr_read_then_clr(0x300, 0x1<<3); + // This memory barrier is needed to prevent the compiler to cross the irq barrier + __asm__ __volatile__ ("" : : : "memory"); + return irq; +} + +static inline void hal_irq_restore(int state) +{ + // This memory barrier is needed to prevent the compiler to cross the irq barrier + __asm__ __volatile__ ("" : : : "memory"); + hal_spr_write(0x300, state); +} + +static inline void hal_irq_enable() +{ + // This memory barrier is needed to prevent the compiler to cross the irq barrier + __asm__ __volatile__ ("" : : : "memory"); + hal_spr_read_then_set(0x300, 0x1<<3); +} + + +/* + * PERFORMANCE COUNTERS + * + * API for accessing performance counters registers. + * Have a look at file mhpm.h to speficy registers through defines + * CSR_PCER_* and CSR_PCMR_* + */ + +#define PCER_NB_EVENTS CSR_PCER_NB_EVENTS +#define PCER_ALL_EVENTS_MASK CSR_PCER_ALL_EVENTS_MASK + +/* Configure the active events. eventMask is an OR of events got through SPR_PCER_EVENT_MASK */ +static inline void cpu_perf_conf_events(unsigned int eventMask) +{ +#ifndef PLP_NO_PERF_COUNTERS + unsigned int test = ~eventMask; + asm volatile ("csrw 0x320, %0" : "+r" (test)); +#endif +} + +/* Return events configuration */ +static inline unsigned int cpu_perf_conf_events_get() +{ +#ifndef PLP_NO_PERF_COUNTERS + unsigned int result; + asm volatile ("csrr %0, 0x320" : "=r" (result)); + return ~result; +#else + return 0; +#endif +} + +/* Configure the mode. confMask is an OR of all SPR_PCMR_* macros */ +static inline void cpu_perf_conf(unsigned int confMask) +{ +#ifndef PLP_NO_PERF_COUNTERS + asm volatile ("csrw 0x320, %0" :: "r" (~confMask)); +#endif +} + +/* Starts counting in all counters. As this is using the mode register, + * the rest of the config can be given through conf parameter */ +static inline void cpu_perf_start() { +#ifndef PLP_NO_PERF_COUNTERS + cpu_perf_conf(CSR_PCER_ALL_EVENTS_MASK); +#endif +} + +/* Stops counting in all counters. As this is using the mode register, + * the rest of the config can be given through conf parameter */ +static inline void cpu_perf_stop() { +#ifndef PLP_NO_PERF_COUNTERS + cpu_perf_conf(0); +#endif +} + +/* Set the specified counter to the specified value */ +static inline void cpu_perf_set(unsigned int counterId, unsigned int value) { + switch(counterId) { + case 0: asm volatile ("csrw 0xB00, %0" : : "r" (value) ); break; + case 1: break; // This CSR does not exist + case 2: asm volatile ("csrw 0xB02, %0" : : "r" (value) ); break; + case 3: asm volatile ("csrw 0xB03, %0" : : "r" (value) ); break; + case 4: asm volatile ("csrw 0xB04, %0" : : "r" (value) ); break; + case 5: asm volatile ("csrw 0xB05, %0" : : "r" (value) ); break; + case 6: asm volatile ("csrw 0xB06, %0" : : "r" (value) ); break; + case 7: asm volatile ("csrw 0xB07, %0" : : "r" (value) ); break; + case 8: asm volatile ("csrw 0xB08, %0" : : "r" (value) ); break; + case 9: asm volatile ("csrw 0xB09, %0" : : "r" (value) ); break; + case 10: asm volatile ("csrw 0xB0A, %0" : : "r" (value) ); break; + case 11: asm volatile ("csrw 0xB0B, %0" : : "r" (value) ); break; + case 12: asm volatile ("csrw 0xB0C, %0" : : "r" (value) ); break; + case 13: asm volatile ("csrw 0xB0D, %0" : : "r" (value) ); break; + case 14: asm volatile ("csrw 0xB0E, %0" : : "r" (value) ); break; + case 15: asm volatile ("csrw 0xB0F, %0" : : "r" (value) ); break; + case 16: asm volatile ("csrw 0xB10, %0" : : "r" (value) ); break; + case 17: asm volatile ("csrw 0xB11, %0" : : "r" (value) ); break; + case 18: asm volatile ("csrw 0xB12, %0" : : "r" (value) ); break; + case 19: asm volatile ("csrw 0xB13, %0" : : "r" (value) ); break; + case 20: asm volatile ("csrw 0xB14, %0" : : "r" (value) ); break; + case 21: asm volatile ("csrw 0xB15, %0" : : "r" (value) ); break; + case 22: asm volatile ("csrw 0xB16, %0" : : "r" (value) ); break; + case 23: asm volatile ("csrw 0xB17, %0" : : "r" (value) ); break; + case 24: asm volatile ("csrw 0xB18, %0" : : "r" (value) ); break; + case 25: asm volatile ("csrw 0xB19, %0" : : "r" (value) ); break; + case 26: asm volatile ("csrw 0xB1A, %0" : : "r" (value) ); break; + case 27: asm volatile ("csrw 0xB1B, %0" : : "r" (value) ); break; + case 28: asm volatile ("csrw 0xB1C, %0" : : "r" (value) ); break; + case 29: asm volatile ("csrw 0xB1D, %0" : : "r" (value) ); break; + case 30: asm volatile ("csrw 0xB1E, %0" : : "r" (value) ); break; + case 31: asm volatile ("csrw 0xB1F, %0" : : "r" (value) ); break; + } +} + +/* Set all counters to the specified value */ +static inline void cpu_perf_setall(unsigned int value) { +#ifndef PLP_NO_PERF_COUNTERS + // This implementation is rather slow. ri5cy has a register to set all, ibex does not. + for (int i = 0; i < CSR_PCER_TOP_EVENT; i++) { + cpu_perf_set(i, value); + } +#endif +} + +/* Return the value of the specified counter */ +static inline unsigned int cpu_perf_get(const unsigned int counterId) { +#ifndef PLP_NO_PERF_COUNTERS + unsigned int value; + switch(counterId) { + case 0: asm volatile ("csrr %0, 0xB00" : "=r" (value)); break; + case 1: break; // This CSR does not exist + case 2: asm volatile ("csrr %0, 0xB02" : "=r" (value)); break; + case 3: asm volatile ("csrr %0, 0xB03" : "=r" (value)); break; + case 4: asm volatile ("csrr %0, 0xB04" : "=r" (value)); break; + case 5: asm volatile ("csrr %0, 0xB05" : "=r" (value)); break; + case 6: asm volatile ("csrr %0, 0xB06" : "=r" (value)); break; + case 7: asm volatile ("csrr %0, 0xB07" : "=r" (value)); break; + case 8: asm volatile ("csrr %0, 0xB08" : "=r" (value)); break; + case 9: asm volatile ("csrr %0, 0xB09" : "=r" (value)); break; + case 10: asm volatile ("csrr %0, 0xB0A" : "=r" (value)); break; + case 11: asm volatile ("csrr %0, 0xB0B" : "=r" (value)); break; + case 12: asm volatile ("csrr %0, 0xB0C" : "=r" (value)); break; + case 13: asm volatile ("csrr %0, 0xB0D" : "=r" (value)); break; + case 14: asm volatile ("csrr %0, 0xB0E" : "=r" (value)); break; + case 15: asm volatile ("csrr %0, 0xB0F" : "=r" (value)); break; + case 16: asm volatile ("csrr %0, 0xB10" : "=r" (value)); break; + case 17: asm volatile ("csrr %0, 0xB11" : "=r" (value)); break; + case 18: asm volatile ("csrr %0, 0xB12" : "=r" (value)); break; + case 19: asm volatile ("csrr %0, 0xB13" : "=r" (value)); break; + case 20: asm volatile ("csrr %0, 0xB14" : "=r" (value)); break; + case 21: asm volatile ("csrr %0, 0xB15" : "=r" (value)); break; + case 22: asm volatile ("csrr %0, 0xB16" : "=r" (value)); break; + case 23: asm volatile ("csrr %0, 0xB17" : "=r" (value)); break; + case 24: asm volatile ("csrr %0, 0xB18" : "=r" (value)); break; + case 25: asm volatile ("csrr %0, 0xB19" : "=r" (value)); break; + case 26: asm volatile ("csrr %0, 0xB1A" : "=r" (value)); break; + case 27: asm volatile ("csrr %0, 0xB1B" : "=r" (value)); break; + case 28: asm volatile ("csrr %0, 0xB1C" : "=r" (value)); break; + case 29: asm volatile ("csrr %0, 0xB1D" : "=r" (value)); break; + case 30: asm volatile ("csrr %0, 0xB1E" : "=r" (value)); break; + case 31: asm volatile ("csrr %0, 0xB1F" : "=r" (value)); break; + } + return value; +#else + return 0; +#endif +} + +static inline const char *cpu_perf_name(int event) { + return CSR_PCER_NAME(event); +} + +#endif diff --git a/include/hal/riscv/riscv_v5.h b/include/hal/riscv/riscv_v5.h index 88ff492..be249d0 100644 --- a/include/hal/riscv/riscv_v5.h +++ b/include/hal/riscv/riscv_v5.h @@ -152,7 +152,8 @@ static inline unsigned int core_id() { return hart_id & 0x01f; } -static inline unsigned int cluster_id() { int hart_id; +static inline unsigned int cluster_id() { + int hart_id; #if RISCV_VERSION >= 4 && !defined(RISCV_1_7) #if PULP_CHIP_FAMILY == CHIP_GAP asm("csrr %0, 0x014" : "=r" (hart_id) : ); @@ -294,8 +295,8 @@ static inline void hal_irq_enable() * PERFORMANCE COUNTERS * * API for accessing performance counters registers. - * Have a look at file spr-defs.h to speficy registers through defines - * SPR_PCER_* and SPR_PCMR_* + * Have a look at file pcer_v2.h to speficy registers through defines + * CSR_PCER_* and CSR_PCMR_* */ #define PCER_NB_EVENTS CSR_PCER_NB_EVENTS @@ -405,26 +406,7 @@ static inline unsigned int cpu_perf_get(const unsigned int counterId) { } static inline const char *cpu_perf_name(int event) { - switch (event) - { - case 0: return "CYCLES"; - case 1: return "INSTR"; - case 2: return "LD_STALL"; - case 3: return "JMP_STALL"; - case 4: return "IMISS"; - case 5: return "LD"; - case 6: return "ST"; - case 7: return "JUMP"; - case 8: return "BRANCH"; - case 9: return "TAKEN_BRANCH"; - case 10: return "RVC"; - case 11: return "LD_EXT"; - case 12: return "ST_EXT"; - case 13: return "LD_EXT_CYC"; - case 14: return "ST_EXT_CYC"; - case 15: return "TCDM_CONT"; - } - return (char *)0; + return CSR_PCER_NAME(event); } diff --git a/kernel/bench.c b/kernel/bench.c index 846ebf0..c460070 100644 --- a/kernel/bench.c +++ b/kernel/bench.c @@ -145,7 +145,25 @@ void check_uint32(testresult_t* result, const char* fail_msg, uint32_t actual, u } void perf_print_all(void) { -#ifdef __riscv__ +#ifdef __ibex__ + printf("Perf CYCLES: %d\n", cpu_perf_get(CSR_PCER_CYCLES)); + printf("Perf INSTR: %d\n", cpu_perf_get(CSR_PCER_INSTR)); + printf("Perf CINSTR: %d\n", cpu_perf_get(CSR_PCER_RVC)); + printf("Perf LD_STALL: %d\n", cpu_perf_get(CSR_PCER_LD_STALL)); + printf("Perf JR_STALL: [Not Implemented]\n"); + printf("Perf IMISS: %d\n", cpu_perf_get(CSR_PCER_IMISS)); + printf("Perf LD: %d\n", cpu_perf_get(CSR_PCER_LD)); + printf("Perf ST: %d\n", cpu_perf_get(CSR_PCER_ST)); + printf("Perf JUMP: %d\n", cpu_perf_get(CSR_PCER_JUMP)); + printf("Perf BRANCH: %d\n", cpu_perf_get(CSR_PCER_BRANCH)); + printf("Perf BTAKEN: %d\n", cpu_perf_get(CSR_PCER_TAKEN_BRANCH)); + printf("Perf LD EXT: %d\n", cpu_perf_get(CSR_PCER_LD_EXT)); + printf("Perf ST EXT: %d\n", cpu_perf_get(CSR_PCER_ST_EXT)); + printf("Perf LD EXT CYC: %d\n", cpu_perf_get(CSR_PCER_LD_EXT_CYC)); + printf("Perf ST EXT CYC: %d\n", cpu_perf_get(CSR_PCER_ST_EXT_CYC)); + printf("Perf TCDM CONT: %d\n", cpu_perf_get(CSR_PCER_TCDM_CONT)); + printf("Perf CSR HAZARD: [Not Implemented]\n"); +#elif defined( __riscv__ ) printf("Perf CYCLES: %d\n", cpu_perf_get(0)); printf("Perf INSTR: %d\n", cpu_perf_get(1)); printf("Perf CINSTR: %d\n", cpu_perf_get(10)); @@ -211,7 +229,7 @@ void illegal_insn_handler_c(void) { #ifndef __ariane__ unsigned int exception_address, insn; -#ifdef __riscv__ +#if defined( __riscv__ ) || defined( __ibex__) asm("csrr %0, 0x341" : "=r" (exception_address) : ); #else exception_address = hal_spr_read(SPR_EPCR_BASE); diff --git a/rules/pulpos/targets/pulp.mk b/rules/pulpos/targets/pulp.mk index f4d8be5..fecc378 100644 --- a/rules/pulpos/targets/pulp.mk +++ b/rules/pulpos/targets/pulp.mk @@ -1,6 +1,6 @@ ifdef USE_IBEX PULP_LDFLAGS += -PULP_CFLAGS += -D__ibex__ -UARCHI_CORE_HAS_PULPV2 -DPLP_NO_BUILTIN -UCORE_PULP_BUILTINS -DRV_ISA_RV32 -DPLP_NO_PERF_COUNTERS +PULP_CFLAGS += -D__ibex__ -U__riscv__ -UARCHI_CORE_HAS_PULPV2 -DPLP_NO_BUILTIN -UCORE_PULP_BUILTINS -DRV_ISA_RV32 PULP_ARCH_CFLAGS ?= -march=rv32imc PULP_ARCH_LDFLAGS ?= -march=rv32imc PULP_ARCH_OBJDFLAGS ?= -Mmarch=rv32imc