diff --git a/include/bench/bench.h b/include/bench/bench.h index 9698f50..cbf6935 100755 --- a/include/bench/bench.h +++ b/include/bench/bench.h @@ -37,24 +37,24 @@ static inline int get_time_hi() {return *(volatile int*) GET_TIME_HI_ADDR;} #if !defined(ARCHI_HAS_FC) -static inline void start_timer() +static inline void start_timer(int cid) { - timer_start(timer_base_cl(0, 0, 1)); + timer_start(timer_base_cl(cid, 0, 1)); } -static inline void stop_timer() +static inline void stop_timer(int cid) { - timer_conf_set(timer_base_cl(0, 0, 1), 0); + timer_conf_set(timer_base_cl(cid, 0, 1), 0); } -static inline void reset_timer() +static inline void reset_timer(int cid) { - timer_reset(timer_base_cl(0, 0, 1)); + timer_reset(timer_base_cl(cid, 0, 1)); } -static inline int get_time() +static inline int get_time(int cid) { - return timer_count_get(timer_base_cl(0, 0, 1)); + return timer_count_get(timer_base_cl(cid, 0, 1)); } #else diff --git a/include/data/data.h b/include/data/data.h index 8d8acbd..3e59ce6 100644 --- a/include/data/data.h +++ b/include/data/data.h @@ -39,7 +39,7 @@ #define PI_L2 __attribute__((section(".l2_data"))) #define L2_DATA PI_L2 -#define L1_GLOBAL_DATA __attribute__((section(".data_l1"))) +#define L1_GLOBAL_DATA __attribute__((section("__data_l1"))) #define L1_DATA L1_GLOBAL_DATA #ifdef USE_CLUSTER diff --git a/include/hal/timer/timer_v2.h b/include/hal/timer/timer_v2.h index d6923f1..6961a2a 100644 --- a/include/hal/timer/timer_v2.h +++ b/include/hal/timer/timer_v2.h @@ -39,7 +39,7 @@ static inline unsigned int timer_base_fc(int id, int sub_id) static inline unsigned int timer_base_cl(int cid, int id, int sub_id) { - return ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(0) + ARCHI_TIMER_OFFSET + id * ARCHI_TIMER_SIZE + sub_id * 4; + return ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) + ARCHI_TIMER_OFFSET + id * ARCHI_TIMER_SIZE + sub_id * 4; } #else diff --git a/kernel/bench.c b/kernel/bench.c index 8bc820d..e8f6378 100644 --- a/kernel/bench.c +++ b/kernel/bench.c @@ -29,7 +29,7 @@ void bench_disable_printf(void) { void bench_timer_start(void) { if (get_core_id()==0) - start_timer(); + start_timer((int) get_cluster_id()); #ifdef PROFILE perf_start(); #endif @@ -37,7 +37,7 @@ void bench_timer_start(void) { void bench_timer_stop(void) { if (get_core_id()==0) - stop_timer(); + stop_timer((int) get_cluster_id()); #ifdef PROFILE perf_stop(); @@ -47,8 +47,8 @@ void bench_timer_stop(void) { void bench_timer_reset(void) { if (get_core_id()==0) { - stop_timer(); - reset_timer(); + stop_timer((int) get_cluster_id()); + reset_timer((int) get_cluster_id()); } #ifdef PROFILE perf_reset(); @@ -79,7 +79,7 @@ void print_result(testcase_t *test, testresult_t *result) void print_summary(unsigned int errors) { #ifdef RTL_SDK - volatile int* ptr = (int*)(0x10001000+get_core_id()*4*2); + volatile int* ptr = (int*)(pos_l1_base+get_core_id()*4*2); ptr[1] = errors; #endif @@ -110,7 +110,7 @@ void run_benchmark(testcase_t *test, testresult_t *result) test->test(result, bench_timer_start, bench_timer_stop); - result->time = get_time(); + result->time = get_time((int) get_cluster_id()); } int run_suite(testcase_t *tests) diff --git a/kernel/chips/pulp_cluster/link.ld b/kernel/chips/pulp_cluster/link.ld index 534bb11..bdad1f7 100644 --- a/kernel/chips/pulp_cluster/link.ld +++ b/kernel/chips/pulp_cluster/link.ld @@ -221,12 +221,16 @@ SECTIONS . = ALIGN(4); } > L2 + __l2_data_end = .; + .l1cluster_g : { . = ALIGN(4); + __heap_sram = .; *(.heapsram) *(.heapsram.*) *(.l1cluster_g) *(.l1cluster_g.*) + __data_l1 = .; *(.data_l1) *(.data_l1.*) . = ALIGN(4); @@ -245,7 +249,7 @@ SECTIONS __l1_end = ALIGN(4); - __l2_shared_end = LOADADDR(.bss_l1) + SIZEOF(.bss_l1); + __l2_shared_end = __l2_data_end + SIZEOF(.l1cluster_g) + SIZEOF(.bss_l1); diff --git a/kernel/crt0.S b/kernel/crt0.S index c6c755a..fc846a0 100644 --- a/kernel/crt0.S +++ b/kernel/crt0.S @@ -30,9 +30,10 @@ pos_init_entry: # PEs from 1 to 7 will go to sync_loop and wait. PE0 will reach them # later after the pos_init_start. Then, they'll set up their stack into # the L1 and jump to cluster_entry_stub - li t0, 0x10080000 - sw x0, 0(t0) - bnez a0, sync_loop + # We check if the offset of the core is zero, so that even if the cluster + # ID is not zero, the execution does not break here + andi a2, a0, 0x0f + bnez a2, sync_loop #else srli a0, a0, 5 #ifdef ARCHI_CL_BOOT @@ -76,9 +77,6 @@ pos_init_entry: #ifdef ARCHI_NO_FC csrr a0, 0xF14 andi a1, a0, 0x1f - li t0, 0x10080000 - li t1, 0x1 - sw t1, 0(t0) j pe_start #endif