diff --git a/configs/pulpissimo_cv32e40p.sh b/configs/pulpissimo_cv32e40p.sh new file mode 100644 index 0000000..0a9b0f4 --- /dev/null +++ b/configs/pulpissimo_cv32e40p.sh @@ -0,0 +1,16 @@ +#!/bin/bash -e + +export PULPRT_TARGET=pulpissimo +export PULPRUN_TARGET=pulpissimo +export USE_CV32E40P=1 + +if [ -n "${ZSH_VERSION:-}" ]; then + DIR="$(readlink -f -- "${(%):-%x}")" + scriptDir="$(dirname $DIR)" +else + + scriptDir="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" + +fi + +source $scriptDir/common.sh diff --git a/include/archi/chips/pulpissimo/pulp.h b/include/archi/chips/pulpissimo/pulp.h index 454bcab..ee552ec 100644 --- a/include/archi/chips/pulpissimo/pulp.h +++ b/include/archi/chips/pulpissimo/pulp.h @@ -26,7 +26,9 @@ #include "archi/riscv/priv_1_10.h" #ifdef __ibex__ #include "archi/ibex/mhpm.h" -#else // __ibex__ +#elif defined(__cv32e40p__) +#include "archi/cv32e40p/cv32e40p.h" +#else #include "archi/riscv/pcer_v2.h" #endif // __ibex__ @@ -40,4 +42,4 @@ #include "archi/udma/uart/udma_uart_v1.h" #include "archi/udma/udma_v3.h" -#endif \ No newline at end of file +#endif diff --git a/include/archi/cv32e40p/cv32e40p.h b/include/archi/cv32e40p/cv32e40p.h new file mode 100644 index 0000000..817b97c --- /dev/null +++ b/include/archi/cv32e40p/cv32e40p.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2018 ETH Zurich and University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/* + * Bit definitions for Performance counters mode registers + * + */ +#define CSR_PCER_CYCLES 0 /* Count the number of cycles the core was running */ +#define CSR_PCER_INSTR 1 /* Count the number of instructions executed */ +#define CSR_PCER_LD_STALL 2 /* Number of load use hazards */ +#define CSR_PCER_JMP_STALL 3 /* Number of jump register hazards */ +#define CSR_PCER_IMISS 4 /* Cycles waiting for instruction fetches. i.e. the number of instructions wasted due to non-ideal caches */ +#define CSR_PCER_LD 5 /* Number of memory loads executed. Misaligned accesses are counted twice */ +#define CSR_PCER_ST 6 /* Number of memory stores executed. Misaligned accesses are counted twice */ +#define CSR_PCER_JUMP 7 /* Number of jump instructions seen, i.e. j, jr, jal, jalr */ +#define CSR_PCER_BRANCH 8 /* Number of branch instructions seen, i.e. bf, bnf */ +#define CSR_PCER_TAKEN_BRANCH 9 /* Number of taken branch instructions seen, i.e. bf, bnf */ +#define CSR_PCER_COMP_INSTR 10 /* Number of compressed instructions */ +#define CSR_PCER_PIPE_STALL 11 /* Cycles wasted due to ELW instruction */ +#define CSR_PCER_APU_TYPE 12 /* Number of memory loads to EXT executed. Misaligned accesses are counted twice. Every non-TCDM access is considered external */ +#define CSR_PCER_APU_CONT 13 /* Number of memory stores to EXT executed. Misaligned accesses are counted twice. Every non-TCDM access is considered external */ +#define CSR_PCER_APU_DEP 14 /* Cycles used for memory loads to EXT. Every non-TCDM access is considered external */ +#define CSR_PCER_APU_WB 15 /* Cycles used for memory stores to EXT. Every non-TCDM access is considered external */ + + +// Gives from the event ID, the HW mask that can be stored (with an OR with other events mask) to the PCER +#define CSR_PCER_EVENT_MASK(eventId) (1<<(eventId)) +#define CSR_PCER_ALL_EVENTS_MASK 0xffffffff + +#define CSR_PCMR_ACTIVE 0x1 /* Activate counting */ +#define CSR_PCMR_SATURATE 0x2 /* Activate saturation */ + +#define CSR_PCER_NAME(id) \ + ( id == 0 ? "Cycles" : \ + id == 1 ? "Instructions" : \ + id == 2 ? "LD_Stall" : \ + id == 3 ? "Jmp_Stall" : \ + id == 4 ? "IMISS" : \ + id == 5 ? "LD" : \ + id == 6 ? "ST" : \ + id == 7 ? "JUMP" : \ + id == 8 ? "BRANCH" : \ + id == 9 ? "TAKEN_BRANCH" : \ + id == 10 ? "COMP_INSTR" : \ + id == 11 ? "PIPE_STALL" : \ + id == 12 ? "APU_TYPE" : \ + id == 13 ? "APU_CONT" : \ + id == 14 ? "APU_DEP" : \ + id == 15 ? "APU_WB" : \ + "NA") diff --git a/include/bench/bench.h b/include/bench/bench.h index dfdd46b..4f2197f 100755 --- a/include/bench/bench.h +++ b/include/bench/bench.h @@ -145,6 +145,8 @@ static inline void perf_start(void) { cpu_perf_conf(CSR_PCMR_ACTIVE | CSR_PCMR_SATURATE); #elif defined(__ibex__) cpu_perf_start(); +#elif defined(__cv32e40p__) + cpu_perf_start(); #else cpu_perf_conf_events(SPR_PCER_ALL_EVENTS_MASK); cpu_perf_conf(SPR_PCMR_ACTIVE | SPR_PCMR_SATURATE); @@ -194,6 +196,8 @@ static inline void perf_enable_id( int eventid){ cpu_perf_conf(CSR_PCMR_ACTIVE | CSR_PCMR_SATURATE); #elif defined(__ibex__) cpu_perf_conf_events(CSR_PCER_EVENT_MASK(eventid)); +#elif defined(__cv32e40p__) + cpu_perf_conf_events(1<> 5) & 0x3f; +} + +#ifndef PLP_NO_BUILTIN + +static inline unsigned int hal_core_id() { + return core_id(); + //return __builtin_pulp_CoreId(); +} + +static inline unsigned int hal_cluster_id() { + return cluster_id(); + //return __builtin_pulp_ClusterId(); +} + +// TODO replace by compiler builtin +static inline __attribute__((always_inline)) unsigned int hal_has_fc() { +#ifdef ARCHI_HAS_FC + return 1; +#else + return 0; +#endif +} + +static inline __attribute__((always_inline)) unsigned int hal_is_fc() { +#ifndef ARCHI_HAS_FC + return 0; +#else + if (hal_has_fc()) return hal_cluster_id() == ARCHI_FC_CID; + else return 0; +#endif +} + +#else + +static inline __attribute__((always_inline)) unsigned int hal_core_id() { + int hart_id; + asm("csrr %0, 0xF14" : "=r" (hart_id) : ); + // in PULP the hart id is {22'b0, cluster_id, core_id} + return hart_id & 0x01f; +} + +static inline __attribute__((always_inline)) unsigned int hal_cluster_id() { + int hart_id; + asm("csrr %0, 0xF14" : "=r" (hart_id) : ); + // in PULP the hart id is {22'b0, cluster_id, core_id} + return (hart_id >> 5) & 0x3f; +} + +static inline __attribute__((always_inline)) unsigned int hal_has_fc() { +#ifdef ARCHI_HAS_FC + return 1; +#else + return 0; +#endif +} + +static inline __attribute__((always_inline)) unsigned int hal_is_fc() { +#ifndef ARCHI_HAS_FC + return 0; +#else + if (hal_has_fc()) return hal_cluster_id() == ARCHI_FC_CID; + else return 0; +#endif +} + +#endif + + + +#if defined(__LLVM__) + +static inline int hal_irq_disable() +{ + return 0; +} + +static inline void hal_irq_restore(int state) +{ +} + +static inline void hal_irq_enable() +{ +} + +#else + +static inline int hal_irq_disable() +{ + int irq = hal_spr_read_then_clr(0x300, 0x1<<3); + // This memory barrier is needed to prevent the compiler to cross the irq barrier + __asm__ __volatile__ ("" : : : "memory"); + return irq; +} + +static inline void hal_irq_restore(int state) +{ + // This memory barrier is needed to prevent the compiler to cross the irq barrier + __asm__ __volatile__ ("" : : : "memory"); + hal_spr_write(0x300, state); +} + +static inline void hal_irq_enable() +{ + // This memory barrier is needed to prevent the compiler to cross the irq barrier + __asm__ __volatile__ ("" : : : "memory"); + hal_spr_read_then_set(0x300, 0x1<<3); +} + +#endif + +/* + * PERFORMANCE COUNTERS + * + * API for accessing performance counters registers. + * Have a look at CV32E40P specifications. + * We implement two not-configurable perf counters: MCYCLE and MISNTR plus a configurable counter + * in which we can OR multiple events (16 different events). Basic example can be found in the + * regression tests repository under the perf counters test. + * Instantiated configurable counter : 0xB03 + * Register to set the counter event : 0x323 + * Register to enable the counters (wheter they are instantiated or not) : 0x320 + */ + + +/* Configure the active events. eventMask is an OR of events */ +static inline void cpu_perf_conf_events(unsigned int eventMask) +{ +#ifndef PLP_NO_PERF_COUNTERS + asm volatile("csrw 0x323, %0" : : "r"(eventMask)); +#endif +} + +/* Return events configuration */ +static inline unsigned int cpu_perf_conf_events_get() +{ +#ifndef PLP_NO_PERF_COUNTERS + unsigned int result; + asm volatile ("csrr %0, 0x323" : "=r" (result)); + return result; +#else + return 0; +#endif +} + +/* Configure the mode. confMask is an OR of all SPR_PCMR_* macros */ +static inline void cpu_perf_conf(unsigned int confMask) +{ +} + +/* Starts counting in all counters. As this is using the mode register, + * the rest of the config can be given through conf parameter */ +static inline void cpu_perf_start() { +#ifndef PLP_NO_PERF_COUNTERS + asm volatile("csrc 0x320, %0" : : "r"(0xffffffff)); +#endif +} + +/* Stops counting in all counters. As this is using the mode register, + * the rest of the config can be given through conf parameter */ +static inline void cpu_perf_stop() { +#ifndef PLP_NO_PERF_COUNTERS + asm volatile("csrs 0x320, %0" : : "r"(0xffffffff)); +#endif +} + +/* Set the specified counter to the specified value */ +static inline void cpu_perf_set(unsigned int counterId, unsigned int value) { + +} + +/* Set all counters to the specified value */ +static inline void cpu_perf_setall(unsigned int value) { +#ifndef PLP_NO_PERF_COUNTERS + +#endif +} + +/* Return the value of the specified counter */ +static inline unsigned int cpu_perf_get(const unsigned int counterId) { +#ifndef PLP_NO_PERF_COUNTERS + unsigned int value = 0; + + // This is stupid! But I really don't know how else we could do that + switch(counterId) { + case 0: asm volatile ("csrr %0, 0xB00" : "=r" (value)); break; + case 1: break; + case 2: asm volatile ("csrr %0, 0xB02" : "=r" (value)); break; + case 3: asm volatile ("csrr %0, 0xB03" : "=r" (value)); break; + case 4: asm volatile ("csrr %0, 0xB04" : "=r" (value)); break; + case 5: asm volatile ("csrr %0, 0xB05" : "=r" (value)); break; + case 6: asm volatile ("csrr %0, 0xB06" : "=r" (value)); break; + case 7: asm volatile ("csrr %0, 0xB07" : "=r" (value)); break; + case 8: asm volatile ("csrr %0, 0xB08" : "=r" (value)); break; + case 9: asm volatile ("csrr %0, 0xB09" : "=r" (value)); break; + case 10: asm volatile ("csrr %0, 0xB0A" : "=r" (value)); break; + case 11: asm volatile ("csrr %0, 0xB0B" : "=r" (value)); break; + case 12: asm volatile ("csrr %0, 0xB0C" : "=r" (value)); break; + case 13: asm volatile ("csrr %0, 0xB0D" : "=r" (value)); break; + case 14: asm volatile ("csrr %0, 0xB0E" : "=r" (value)); break; + case 15: asm volatile ("csrr %0, 0xB0F" : "=r" (value)); break; + } + return value; +#else + return 0; +#endif +} + +static inline const char *cpu_perf_name(int event) { +} + + + +/* + * Stack checking + */ + +static inline void cpu_stack_check_enable(unsigned int base, unsigned int end) +{ + asm volatile ("csrwi 0x7D0, 0" :: ); + asm volatile ("csrw 0x7D1, %0" :: "r" (base)); + asm volatile ("csrw 0x7D2, %0" :: "r" (end)); + asm volatile ("csrwi 0x7D0, 1" :: ); +} + +static inline void cpu_stack_check_disable() +{ + asm volatile ("csrwi 0x7D0, 0" :: ); +} + + + +#if !defined(RV_ISA_RV32) + +/* Packing of scalars into vectors */ +#define __builtin_pack2(x, y) __builtin_pulp_pack2((signed short) (x), (signed short) (y)) +#define __builtin_packu2(x, y) __builtin_pulp_pack2((unsigned short) (x), (unsigned short) (y)) + +#define __builtin_pack4(x, y, z, t) __builtin_pulp_pack4((signed char) (x), (signed char) (y), (signed char) (z), (signed char) (t)) +#define __builtin_packu4(x, y, z, t) __builtin_pulp_pack4((unsigned char) (x), (unsigned char) (y), (unsigned char) (z), (unsigned char) (t)) + +#define __builtin_max2(x, y) __builtin_pulp_max2((x), (y)) +#define __builtin_max4(x, y) __builtin_pulp_max4((x), (y)) + +#define __builtin_maxu2(x, y) __builtin_pulp_maxu2((x), (y)) +#define __builtin_maxu4(x, y) __builtin_pulp_maxu4((x), (y)) + +/* Min */ +#define __builtin_min2(x, y) __builtin_pulp_min2((x), (y)) +#define __builtin_min4(x, y) __builtin_pulp_min4((x), (y)) + +#define __builtin_minu2(x, y) __builtin_pulp_minu2((x), (y)) +#define __builtin_minu4(x, y) __builtin_pulp_minu4((x), (y)) + +/* Clip */ +#define __builtin_clip(x, precision) __builtin_pulp_clip((x), -(1<<(precision)), (1<(signed short)(y)[0])?((signed short)(x)[0]):((signed short)(y)[0]), \ + ((signed short)(x)[1]>(signed short)(y)[1])?((signed short)(x)[1]):((signed short)(y)[1])}) +#define __builtin_max4(x, y) ((v4s) {((signed char)(x)[0]>(signed char)(y)[0])?(signed char)(x)[0]:(signed char)(y)[0], \ + ((signed char)(x)[1]>(signed char)(y)[1])?(signed char)(x)[1]:(signed char)(y)[1], \ + ((signed char)(x)[2]>(signed char)(y)[2])?(signed char)(x)[2]:(signed char)(y)[2], \ + ((signed char)(x)[3]>(signed char)(y)[3])?(signed char)(x)[3]:(signed char)(y)[3]}) + +#define __builtin_maxu2(x, y) ((v2u) {((unsigned short)(x)[0]>(unsigned short)(y)[0])?(unsigned short)(x)[0]:(unsigned short)(y)[0], \ + ((unsigned short)(x)[1]>(unsigned short)(y)[1])?(unsigned short)(x)[1]:(unsigned short)(y)[1]}) +#define __builtin_maxu4(x, y) ((v4u) {((unsigned char)(x)[0]>(unsigned char)(y)[0])?(unsigned char)(x)[0]:(unsigned char)(y)[0], \ + ((unsigned char)(x)[1]>(unsigned char)(y)[1])?(unsigned char)(x)[1]:(unsigned char)(y)[1], \ + ((unsigned char)(x)[2]>(unsigned char)(y)[2])?(unsigned char)(x)[2]:(unsigned char)(y)[2], \ + ((unsigned char)(x)[3]>(unsigned char)(y)[3])?(unsigned char)(x)[3]:(unsigned char)(y)[3]}) + +/* Min */ +#define __builtin_min2(x, y) ((v2s) {((signed short)(x)[0]<(signed short)(y)[0])?((signed short)(x)[0]):((signed short)(y)[0]), \ + ((signed short)(x)[1]<(signed short)(y)[1])?((signed short)(x)[1]):((signed short)(y)[1])}) +#define __builtin_min4(x, y) ((v4s) {((signed char)(x)[0]<(signed char)(y)[0])?(signed char)(x)[0]:(signed char)(y)[0], \ + ((signed char)(x)[1]<(signed char)(y)[1])?(signed char)(x)[1]:(signed char)(y)[1], \ + ((signed char)(x)[2]<(signed char)(y)[2])?(signed char)(x)[2]:(signed char)(y)[2], \ + ((signed char)(x)[3]<(signed char)(y)[3])?(signed char)(x)[3]:(signed char)(y)[3]}) + +#define __builtin_minu2(x, y) ((v2u) {((unsigned short)(x)[0]<(unsigned short)(y)[0])?(unsigned short)(x)[0]:(unsigned short)(y)[0], \ + ((unsigned short)(x)[1]<(unsigned short)(y)[1])?(unsigned short)(x)[1]:(unsigned short)(y)[1]}) +#define __builtin_minu4(x, y) ((v4u) {((unsigned char)(x)[0]<(unsigned char)(y)[0])?(unsigned char)(x)[0]:(unsigned char)(y)[0], \ + ((unsigned char)(x)[1]<(unsigned char)(y)[1])?(unsigned char)(x)[1]:(unsigned char)(y)[1], \ + ((unsigned char)(x)[2]<(unsigned char)(y)[2])?(unsigned char)(x)[2]:(unsigned char)(y)[2], \ + ((unsigned char)(x)[3]<(unsigned char)(y)[3])?(unsigned char)(x)[3]:(unsigned char)(y)[3]}) + +/* Clip */ +#define __builtin_clip(x, precision) ((x)<(-(1<<(precision)))?(-(1<<(precision))):(((x)>((1<<(precision))-1))?((1<<(precision))-1):(x))) +#define __builtin_clipu(x, precision) ((x)<0)?0:(((x)>((1<<(precision))-1))?((1<<(precision))-1):(x)) + +/* Abs */ +#define __builtin_abs2(x) ((v2s) {((x)[0]<0)?-(x)[0]:(x)[0], ((x)[1]<0)?-(x)[1]:(x)[1]}) +#define __builtin_abs4(x) ((v4s) {((x)[0]<0)?-(x)[0]:(x)[0], ((x)[1]<0)?-(x)[1]:(x)[1], \ + ((x)[2]<0)?-(x)[2]:(x)[2], ((x)[3]<0)?-(x)[3]:(x)[3]}) + +/* Mac */ +#define __builtin_macs(Acc, x, y) ((Acc) + ((short int) (x) * (short int) (y))) +#define __builtin_machhs(Acc, x, y) ((Acc) + ((short int) ((x)>>16) * (short int) ((y)>>16))) +#define __builtin_macu(Acc, x, y) ((Acc) + ((unsigned short int) (x) * (unsigned short int) (y))) +#define __builtin_machhu(Acc, x, y) ((Acc) + ((unsigned short int) ((x)>>16) * (unsigned short int) ((y)>>16))) + +#define __builtin_macsN(Acc, x, y, n) (((Acc) + ((short int) (x) * (short int) (y)))>>(n)) +#define __builtin_macuN(Acc, x, y, n) (((Acc) + ((unsigned short int) (x) * (unsigned short int) (y)))>>(n)) +#define __builtin_macsRN(Acc, x, y, n) ((((Acc) + ((short int) (x) * (short int) (y))) + (1<<((n)-1))) >> (n)) +#define __builtin_macuRN(Acc, x, y, n) ((((Acc) + ((unsigned short int) (x) * (unsigned short int) (y))) + (1<<((n)-1))) >> (n)) + +#define __builtin_machhsN(Acc, x, y, n) (((Acc) + ((short int) ((x)>>16) * (short int) ((y)>>16))) >> (n)) +#define __builtin_machhuN(Acc, x, y, n) (((Acc) + ((unsigned short int) ((x)>>16) * (unsigned short int) ((y)>>16))) >> (n)) +#define __builtin_machhsRN(Acc, x, y, n) ((((Acc) + ((short int) ((x)>>16) * (short int) ((y)>>16))) + (1<<((n)-1))) >> (n)) +#define __builtin_machhuRN(Acc, x, y, n) ((((Acc) + ((unsigned short int) ((x)>>16) * (unsigned short int) ((y)>>16))) + (n))) + +/* Multiplications */ +#define __builtin_mulsN(x, y, n) (((short int) (x) * (short int) (y))>>(n)) +#define __builtin_mulsRN(x, y, n) ((((short int) (x) * (short int) (y)) + (1<<((n)-1)))>>(n)) +#define __builtin_muluN(x, y, n) (((unsigned short int) (x) * (unsigned short int) (y))>>(n)) +#define __builtin_muluRN(x, y, n) ((((unsigned short int) (x) * (unsigned short int) (y)) + (1<<((n)-1)))>>(n)) + +/* Vectorial product and sum of products */ +#define __builtin_dotp2(x, y) ( (x)[0]*(y)[0] + (x)[1]*(y)[1]) +#define __builtin_dotpu2(x, y) ( (x)[0]*(y)[0] + (x)[1]*(y)[1]) +#define __builtin_dotpus2(x, y) ( (x)[0]*(y)[0] + (x)[1]*(y)[1]) + +#define __builtin_sumdotp2(x, y, z) ((z)+(x)[0]*(y)[0] + (x)[1]*(y)[1]) +#define __builtin_sumdotpu2(x, y, z) ((z)+(x)[0]*(y)[0] + (x)[1]*(y)[1]) +#define __builtin_sumdotpus2(x, y, z) ((z)+(x)[0]*(y)[0] + (x)[1]*(y)[1]) + +#define __builtin_dotp4(x, y) ( (x)[0]*(y)[0] + (x)[1]*(y)[1] + (x)[2]*(y)[2] + (x)[3]*(y)[3]) +#define __builtin_dotpu4(x, y) ( (x)[0]*(y)[0] + (x)[1]*(y)[1] + (x)[2]*(y)[2] + (x)[3]*(y)[3]) +#define __builtin_dotpus4(x, y) ( (x)[0]*(y)[0] + (x)[1]*(y)[1] + (x)[2]*(y)[2] + (x)[3]*(y)[3]) + +#define __builtin_sumdotp4(x, y, z) ((z)+(x)[0]*(y)[0] + (x)[1]*(y)[1] + (x)[2]*(y)[2] + (x)[3]*(y)[3]) +#define __builtin_sumdotpu4(x, y, z) ((z)+(x)[0]*(y)[0] + (x)[1]*(y)[1] + (x)[2]*(y)[2] + (x)[3]*(y)[3]) +#define __builtin_sumdotpus4(x, y, z) ((z)+(x)[0]*(y)[0] + (x)[1]*(y)[1] + (x)[2]*(y)[2] + (x)[3]*(y)[3]) + + +/* Position of the most significant bit of x */ +#define __FL1(x) (31 - __builtin_clz((x))) + +/* Number of sign bits */ +static inline unsigned int __builtin_clb(unsigned int x) { + int result = 0; + while (x) { + if (x & 1) result++; + x >>= 1; + } + return result; +} + +/* Bit Extraction */ +#define __builtin_bitextract(x, size, off) (((((x)>>(off))&((unsigned int)(1<<(size))-1))<<(32-(size)))>>(32-(size))) +#define __builtin_bitextractu(x, size, off) (((x)>>(off))&((unsigned int)(1<<(size))-1)) + +/* Bit insertion */ +#define __builtin_bitinsert(dst, src, size, off) (((dst) & ~(((1<<(size))-1)<<(off))) | (((src) & ((1<<(size))-1))<<(off))) +#define __builtin_bitinsert_r(dst, src, size, off) (((dst) & ~(((1<<(size))-1)<<(off))) | (((src) & ((1<<(size))-1))<<(off))) + +/* 1 bit rotation to the right, 32 bits input */ +#define __builtin_rotr(x) ((((x)>>1)&0x7FFFFFFF) | ((x)<<31)) + +/* Add with normalization and rounding */ +#define __builtin_addroundnormu(x, y, scale) ((unsigned int)((x) + (y) + (1<<((scale)-1)))>>(scale)) +#define __builtin_addroundnorm(x, y, scale) ((int)((x) + (y) + (1<<((scale)-1)))>>(scale)) + +/* Normalization and rounding */ +#define __builtin_roundnormu(x, scale) ((unsigned int)((x) + (1<<((scale)-1)))>>(scale)) +#define __builtin_roundnorm(x, scale) ((int)((x) + (1<<((scale)-1)))>>(scale)) + +#endif + +#endif diff --git a/kernel/bench.c b/kernel/bench.c index c460070..d51fed8 100644 --- a/kernel/bench.c +++ b/kernel/bench.c @@ -163,6 +163,24 @@ void perf_print_all(void) { printf("Perf ST EXT CYC: %d\n", cpu_perf_get(CSR_PCER_ST_EXT_CYC)); printf("Perf TCDM CONT: %d\n", cpu_perf_get(CSR_PCER_TCDM_CONT)); printf("Perf CSR HAZARD: [Not Implemented]\n"); +#elif defined(__cv32e40p__) + // not implemented registers will return 0s. Only 1,2,3 are implemented. + printf("MCYCLE :%d\n", cpu_perf_get(0)); + printf("Perf reg 1 : not implemented\n") ; + printf("MINSTR :%d\n", cpu_perf_get(2)); + printf("Perf reg 3 :%d\n", cpu_perf_get(3)); + printf("Perf reg 4 :%d\n", cpu_perf_get(4)); + printf("Perf reg 5 :%d\n", cpu_perf_get(5)); + printf("Perf reg 6 :%d\n", cpu_perf_get(6)); + printf("Perf reg 7 :%d\n", cpu_perf_get(7)); + printf("Perf reg 8 :%d\n", cpu_perf_get(8)); + printf("Perf reg 9 :%d\n", cpu_perf_get(9)); + printf("Perf reg 10:%d\n", cpu_perf_get(10)); + printf("Perf reg 11:%d\n", cpu_perf_get(11)); + printf("Perf reg 12:%d\n", cpu_perf_get(12)); + printf("Perf reg 13:%d\n", cpu_perf_get(13)); + printf("Perf reg 14:%d\n", cpu_perf_get(14)); + printf("Perf reg 15:%d\n", cpu_perf_get(15)); #elif defined( __riscv__ ) printf("Perf CYCLES: %d\n", cpu_perf_get(0)); printf("Perf INSTR: %d\n", cpu_perf_get(1)); @@ -229,7 +247,7 @@ void illegal_insn_handler_c(void) { #ifndef __ariane__ unsigned int exception_address, insn; -#if defined( __riscv__ ) || defined( __ibex__) +#if defined( __riscv__ ) || defined( __ibex__) || defined(__cv32e40p__) asm("csrr %0, 0x341" : "=r" (exception_address) : ); #else exception_address = hal_spr_read(SPR_EPCR_BASE); diff --git a/rules/pulpos/targets/pulpissimo.mk b/rules/pulpos/targets/pulpissimo.mk index 782b300..25ed391 100644 --- a/rules/pulpos/targets/pulpissimo.mk +++ b/rules/pulpos/targets/pulpissimo.mk @@ -4,12 +4,18 @@ PULP_CFLAGS += -D__ibex__ -U__riscv__ -UARCHI_CORE_HAS_PULPV2 -DRV_ISA_RV32 PULP_ARCH_CFLAGS ?= -march=rv32imc PULP_ARCH_LDFLAGS ?= -march=rv32imc PULP_ARCH_OBJDFLAGS ?= -Mmarch=rv32imc +else ifdef USE_CV32E40P +PULP_LDFLAGS += +PULP_CFLAGS += -D__cv32e40p__ -U__riscv__ -UARCHI_CORE_HAS_PULPV2 +PULP_ARCH_CFLAGS ?= -march=rv32imcxgap9 +PULP_ARCH_LDFLAGS ?= -march=rv32imcxgap9 +PULP_ARCH_OBJDFLAGS ?= -Mmarch=rv32imcxgap9 else PULP_LDFLAGS += PULP_CFLAGS += -D__riscv__ -PULP_ARCH_CFLAGS ?= -march=rv32imcxgap9 +PULP_ARCH_CFLAGS ?= -march=rv32imcxgap9 PULP_ARCH_LDFLAGS ?= -march=rv32imcxgap9 -PULP_ARCH_OBJDFLAGS ?= -Mmarch=rv32imcxgap9 +PULP_ARCH_OBJDFLAGS ?= -Mmarch=rv32imcxgap9 endif PULP_CFLAGS += -fdata-sections -ffunction-sections -include chips/pulpissimo/config.h -I$(PULPRT_HOME)/include/chips/pulpissimo @@ -54,4 +60,4 @@ ifeq '$(platform)' 'fpga' CONFIG_IO_UART=1 endif -include $(PULPRT_HOME)/rules/pulpos/default_rules.mk \ No newline at end of file +include $(PULPRT_HOME)/rules/pulpos/default_rules.mk