pqc-accelerate/HLS_Codes/polymult.cpp

1091 lines
25 KiB
C++

// polymult.cpp (Set A, with all inner pragmas kept, and .user/.id/.dest removed)
#include "ntt.h"
coeff_t q = 3329;
coeff_t inv_n = 3303;
//double_coeff_t v = 20159;
/*coeff_t mod(double_coeff_t A)
{
#pragma HLS inline OFF
//double_coeff_t v = (double_coeff_t) ((1<<26) + 1664)/q;
double_coeff_t t = (v * A + (1 << 25)) >> 26;
t = t * q;
coeff_t val;
if (A < t)
val = A - t + q;
else
val = A - t;
return val;
}*/
ap_uint<13> m = 5039;
coeff_t mod(double_coeff_t A)
{
#pragma HLS pipeline II = 1
coeff_t val;
ap_uint<36> t123 = m * A;
ap_uint<12> t = (t123 >> 24);
ap_uint<24> ta = t * q;
ap_uint<24> c = A - ta;
if (c > q)
val = (coeff_t) (c - q);
else
val = (coeff_t) c;
return val;
}
coeff_t modadd(coeff_t x, coeff_t y)
{
#pragma HLS inline
coeff_t w = x + y;
return (coeff_t)(w - (w < q ? (coeff_t)0 : q));
}
coeff_t modsub(coeff_t x, coeff_t y)
{
#pragma HLS inline
coeff_t s = x + (x > y ? (coeff_t)0 : q);
return (coeff_t)(s - y);
}
void butterfly_unit_dif(coeff_t w, coeff_t a, coeff_t b, coeff_t &x, coeff_t &y)
{
#pragma HLS pipeline II = 1
x = modadd(a, b);
y = modsub(a, b);
y = mod(w * y);
}
void butterfly_unit_dit(coeff_t w, coeff_t a, coeff_t b, coeff_t &x, coeff_t &y)
{
#pragma HLS pipeline II = 1
coeff_t wb = mod(w * b);
x = modadd(a, wb);
y = modsub(a, wb);
}
void delay_cycle()
{
#ifdef __SYNTHESIS__
ap_wait_n(1);
#endif
}
void ntt_stage1 (hls::stream<coeff_t> &a, hls::stream<coeff_t> &b, coeff_t fifo[])
{
#pragma HLS dataflow
coeff_t twiddle_coeff = 1729;
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int x, y;
coeff_t a_, b_, it, bf1, bf2, tf;
for (int i = 0; i < 64; i++)
{
#pragma HLS pipeline
it = a.read();
fifo[i + 64] = it;
}
for (int j = 0; j < 1; j++)
{
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int iter = 0;
for (int k = 0; k < 64; k++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
a_ = fifo[iter + 64];
b_ = a.read();
tf = twiddle_coeff;
butterfly_unit_dit(tf, a_, b_, bf1, bf2);
b.write(bf1);
fifo[iter] = bf2;
iter++;
delay_cycle();
}
for (int i = 0; i < 64; i++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
b.write(fifo[i]);
delay_cycle();
}
}
}
void ntt_stage2 (hls::stream<coeff_t> &a, hls::stream<coeff_t> &b, coeff_t fifo[])
{
#pragma HLS dataflow
coeff_t twiddle_coeffs[2] = {2580, 3289};
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int x, y;
coeff_t a_, b_, it, bf1, bf2, tf;
for (int i = 0; i < 32; i++)
{
#pragma HLS pipeline
it = a.read();
fifo[i + 64] = it;
}
for (int j = 0; j < 2; j++)
{
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int iter = 0;
for (int k = 0; k < 32; k++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
a_ = fifo[iter + 64];
b_ = a.read();
tf = twiddle_coeffs[j];
butterfly_unit_dit(tf, a_, b_, bf1, bf2);
b.write(bf1);
fifo[iter] = bf2;
iter++;
delay_cycle();
}
for (int i = 0; i < 32; i++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
b.write(fifo[i]);
delay_cycle();
if (j < 1)
{
it = a.read();
fifo[i + 64] = it;
}
}
}
}
void ntt_stage3 (hls::stream<coeff_t> &a, hls::stream<coeff_t> &b, coeff_t fifo[])
{
#pragma HLS dataflow
coeff_t twiddle_coeffs[4] = {2642, 630, 1897, 848};
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int x, y;
coeff_t a_, b_, it, bf1, bf2, tf;
for (int i = 0; i < 16; i++)
{
#pragma HLS pipeline
it = a.read();
fifo[i + 64] = it;
}
for (int j = 0; j < 4; j++)
{
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int iter = 0;
for (int k = 0; k < 16; k++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
a_ = fifo[iter + 64];
b_ = a.read();
tf = twiddle_coeffs[j];
butterfly_unit_dit(tf, a_, b_, bf1, bf2);
b.write(bf1);
fifo[iter] = bf2;
iter++;
delay_cycle();
}
for (int i = 0; i < 16; i++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
b.write(fifo[i]);
delay_cycle();
if (j < 3)
{
it = a.read();
fifo[i + 64] = it;
}
}
}
}
void ntt_stage4 (hls::stream<coeff_t> &a, hls::stream<coeff_t> &b, coeff_t fifo[])
{
#pragma HLS dataflow
coeff_t twiddle_coeffs[8] = {1062, 1919, 193, 797, 2786, 3260, 569, 1746};
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int x, y;
coeff_t a_, b_, it, bf1, bf2, tf;
for (int i = 0; i < 8; i++)
{
#pragma HLS pipeline
it = a.read();
fifo[i + 64] = it;
}
for (int j = 0; j < 8; j++)
{
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int iter = 0;
int ind = 1;
for (int k = 0; k < 8; k++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
a_ = fifo[iter + 64];
b_ = a.read();
tf = twiddle_coeffs[j];
butterfly_unit_dit(tf, a_, b_, bf1, bf2);
b.write(bf1);
fifo[iter] = bf2;
iter++;
delay_cycle();
}
for (int i = 0; i < 8; i++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
b.write(fifo[i]);
delay_cycle();
if (j < 7)
{
it = a.read();
fifo[i + 64] = it;
}
}
}
}
void ntt_stage5 (hls::stream<coeff_t> &a, hls::stream<coeff_t> &b, coeff_t fifo[])
{
#pragma HLS dataflow
coeff_t twiddle_coeffs[16] = {296, 2447, 1339, 1476, 3046, 56, 2240, 1333,
1426, 2094, 535, 2882, 2393, 2879, 1974, 821};
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int x, y;
coeff_t a_, b_, it, bf1, bf2, tf;
for (int i = 0; i < 4; i++)
{
#pragma HLS pipeline
it = a.read();
fifo[i + 64] = it;
}
for (int j = 0; j < 16; j++)
{
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int iter = 0;
int ind = 1;
for (int k = 0; k < 4; k = k + 1)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
a_ = fifo[iter + 64];
b_ = a.read();
tf = twiddle_coeffs[j];
butterfly_unit_dit(tf, a_, b_, bf1, bf2);
b.write(bf1);
fifo[iter] = bf2;
iter++;
delay_cycle();
}
for (int i = 0; i < 4; i++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
b.write(fifo[i]);
delay_cycle();
if (j < 15)
{
it = a.read();
fifo[i + 64] = it;
}
}
}
}
void ntt_stage6 (hls::stream<coeff_t> &a, hls::stream<coeff_t> &b, coeff_t fifo[])
{
#pragma HLS dataflow
coeff_t twiddle_coeffs[32] = {289, 331, 3253, 1756, 1197, 2304, 2277, 2055,
650, 1977, 2513, 632, 2865, 33, 1320, 1915,
2319, 1435, 807, 452, 1438, 2868, 1534, 2402,
2647, 2617, 1481, 648, 2474, 3110, 1227, 910};
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int x, y;
coeff_t a_, b_, it, bf1, bf2, tf;
for (int i = 0; i < 2; i++)
{
#pragma HLS pipeline
it = a.read();
fifo[i + 64] = it;
}
for (int j = 0; j < 32; j++)
{
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int iter = 0;
int ind = 1;
for (int k = 0; k < 2; k = k + 1)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
a_ = fifo[iter + 64];
b_ = a.read();
tf = twiddle_coeffs[j];
butterfly_unit_dit(tf, a_, b_, bf1, bf2);
b.write(bf1);
fifo[iter] = bf2;
iter++;
delay_cycle();
}
for (int i = 0; i < 2; i++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
b.write(fifo[i]);
delay_cycle();
if (j < 31)
{
it = a.read();
fifo[i + 64] = it;
}
}
}
}
void ntt_stage7 (hls::stream<coeff_t> &a, hls::stream<coeff_t> &b, coeff_t fifo[])
{
#pragma HLS inline off
#pragma HLS DEPENDENCE variable = fifo inter RAW false
coeff_t twiddle_coeffs[64] = {17, 2761, 583, 2649, 1637, 723, 2288, 1100,
1409, 2662, 3281, 233, 756, 2156, 3015, 3050,
1703, 1651, 2789, 1789, 1847, 952, 1461, 2687,
939, 2308, 2437, 2388, 733, 2337, 268, 641,
1584, 2298, 2037, 3220, 375, 2549, 2090, 1645,
1063, 319, 2773, 757, 2099, 561, 2466, 2594,
2804, 1092, 403, 1026, 1143, 2150, 2775, 886,
1722, 1212, 1874, 1029, 2110, 2935, 885, 2154};
int x, y;
coeff_t u, t, it, bf1, bf2;
u = a.read();
for (int j = 0; j < 64; j++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
t = a.read();
butterfly_unit_dit(twiddle_coeffs[j], u, t, bf1, bf2);
b.write(bf1);
b.write(bf2);
if (j < 63)
u = a.read();
}
}
void intt_stage1 (hls::stream<coeff_t> &a, hls::stream<coeff_t> &b, coeff_t fifo[])
{
coeff_t twiddle_coeffs[64] = {1175, 2444, 394, 1219, 2300, 1455, 2117, 1607,
2443, 554, 1179, 2186, 2303, 2926, 2237, 525,
735, 863, 2768, 1230, 2572, 556, 3010, 2266,
1684, 1239, 780, 2954, 109, 1292, 1031, 1745,
2688, 3061, 992, 2596, 941, 892, 1021, 2390,
642, 1868, 2377, 1482, 1540, 540, 1678, 1626,
279, 314, 1173, 2573, 3096, 48, 667, 1920,
2229, 1041, 2606, 1692, 680, 2746, 568, 3312};
#pragma HLS inline off
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int x, y;
coeff_t u, t, it, bf1, bf2;
u = a.read();
for (int j = 0; j < 64; j++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
t = a.read();
butterfly_unit_dif(twiddle_coeffs[j], u, t, bf1, bf2);
b.write(bf1);
b.write(bf2);
if (j < 63)
u = a.read();
}
}
void intt_stage2 (hls::stream<coeff_t> &a, hls::stream<coeff_t> &b, coeff_t fifo[])
{
#pragma HLS dataflow
coeff_t twiddle_coeffs[32] = {2419, 2102, 219, 855, 2681, 1848, 712, 682,
927, 1795, 461, 1891, 2877, 2522, 1894, 1010,
1414, 2009, 3296, 464, 2697, 816, 1352, 2679,
1274, 1052, 1025, 2132, 1573, 76, 2998, 3040};
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int x, y;
coeff_t a_, b_, it, bf1, bf2, tf;
for (int i = 0; i < 2; i++)
{
#pragma HLS pipeline
it = a.read();
fifo[i + 64] = it;
}
int ind = 0;
int count = 0;
for (int j = 0; j < 32; j++)
{
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int iter = 0;
for (int k = 0; k < 2; k = k + 1)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
a_ = fifo[iter + 64];
b_ = a.read();
tf = twiddle_coeffs[ind];
butterfly_unit_dif(tf, a_, b_, bf1, bf2);
b.write(bf1);
fifo[iter] = bf2;
iter++;
count++;
if (count % 2 == 0)
ind++;
delay_cycle();
}
for (int i = 0; i < 2; i++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
b.write(fifo[i]);
delay_cycle();
if (j < 31)
{
it = a.read();
fifo[i + 64] = it;
}
}
}
}
void intt_stage3 (hls::stream<coeff_t> &a, hls::stream<coeff_t> &b, coeff_t fifo[])
{
#pragma HLS dataflow
coeff_t twiddle_coeffs[16] = {2508, 1355, 450, 936, 447, 2794, 1235, 1903,
1996, 1089, 3273, 283, 1853, 1990, 882, 3033};
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int x, y;
int m = 4;
coeff_t a_, b_, it, bf1, bf2, tf;
for (int i = 0; i < 4; i++)
{
#pragma HLS pipeline
it = a.read();
fifo[i + 64] = it;
}
int ind = 0;
int count = 0;
for (int j = 0; j < 16; j++)
{
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int iter = 0;
for (int k = 0; k < 4; k = k + 1)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
a_ = fifo[iter + 64];
b_ = a.read();
tf = twiddle_coeffs[ind];
butterfly_unit_dif(tf, a_, b_, bf1, bf2);
b.write(bf1);
fifo[iter] = bf2;
iter++;
count++;
if (count % 4 == 0)
ind++;
delay_cycle();
}
for (int i = 0; i < 4; i++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
b.write(fifo[i]);
delay_cycle();
if (j < 15)
{
it = a.read();
fifo[i + 64] = it;
}
}
}
}
void intt_stage4 (hls::stream<coeff_t> &a, hls::stream<coeff_t> &b, coeff_t fifo[])
{
#pragma HLS dataflow
coeff_t twiddle_coeffs[8] = {1583, 2760, 69, 543, 2532, 3136, 1410, 2267};
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int x, y;
coeff_t a_, b_, it, bf1, bf2, tf;
for (int i = 0; i < 8; i++)
{
#pragma HLS pipeline
it = a.read();
fifo[i + 64] = it;
}
int ind = 0;
int count = 0;
for (int j = 0; j < 8; j++)
{
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int iter = 0;
for (int k = 0; k < 8; k = k + 1)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
a_ = fifo[iter + 64];
b_ = a.read();
tf = twiddle_coeffs[ind];
butterfly_unit_dif(tf, a_, b_, bf1, bf2);
b.write(bf1);
fifo[iter] = bf2;
iter++;
count++;
if (count % 8 == 0)
ind++;
delay_cycle();
}
for (int i = 0; i < 8; i++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
b.write(fifo[i]);
delay_cycle();
if (j < 7)
{
it = a.read();
fifo[i + 64] = it;
}
}
}
}
void intt_stage5 (hls::stream<coeff_t> &a, hls::stream<coeff_t> &b, coeff_t fifo[])
{
#pragma HLS dataflow
coeff_t twiddle_coeffs[4] = {2481, 1432, 2699, 687};
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int x, y;
coeff_t a_, b_, it, bf1, bf2, tf;
for (int i = 0; i < 16; i++)
{
#pragma HLS pipeline
it = a.read();
fifo[i + 64] = it;
}
int ind = 0;
int count = 0;
for (int j = 0; j < 4; j++)
{
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int iter = 0;
for (int k = 0; k < 16; k = k + 1)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
a_ = fifo[iter + 64];
b_ = a.read();
tf = twiddle_coeffs[ind];
butterfly_unit_dif(tf, a_, b_, bf1, bf2);
b.write(bf1);
fifo[iter] = bf2;
iter++;
count++;
if (count % 16 == 0)
ind++;
delay_cycle();
}
for (int i = 0; i < 16; i++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
b.write(fifo[i]);
delay_cycle();
if (j < 3)
{
it = a.read();
fifo[i + 64] = it;
}
}
}
}
void intt_stage6 (hls::stream<coeff_t> &a, hls::stream<coeff_t> &b, coeff_t fifo[])
{
#pragma HLS dataflow
coeff_t twiddle_coeffs[2] = {40, 749};
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int x, y;
coeff_t a_, b_, it, bf1, bf2, tf;
for (int i = 0; i < 32; i++)
{
#pragma HLS pipeline
it = a.read();
fifo[i + 64] = it;
}
int ind = 0;
int count = 0;
for (int j = 0; j < 2; j++)
{
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int iter = 0;
for (int k = 0; k < 32; k = k + 1)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
a_ = fifo[iter + 64];
b_ = a.read();
tf = twiddle_coeffs[ind];
butterfly_unit_dif(tf, a_, b_, bf1, bf2);
b.write(bf1);
fifo[iter] = bf2;
iter++;
count++;
if (count == 32)
ind++;
delay_cycle();
}
for (int i = 0; i < 32; i++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
b.write(fifo[i]);
delay_cycle();
if (j < 1)
{
it = a.read();
fifo[i + 64] = it;
}
}
}
}
void intt_stage7 (hls::stream<coeff_t> &a, hls::stream<coeff_t> &b, coeff_t fifo[])
{
#pragma HLS inline off
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int x, y;
coeff_t a_, b_, it, bf1, bf2, bfn1, bfn2, tf;
for (int i = 0; i < 64; i++)
{
#pragma HLS pipeline
it = a.read();
fifo[i + 64] = it;
}
for (int j = 0; j < 1; j++)
{
#pragma HLS DEPENDENCE variable = fifo inter RAW false
int iter = 0;
for (int k = 0; k < 64; k = k + 1)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
a_ = fifo[iter + 64];
b_ = a.read();
tf = 1600;
butterfly_unit_dif(tf, a_, b_, bf1, bf2);
bfn1 = mod(bf1 * inv_n);
bfn2 = mod(bf2 * inv_n);
b.write(bfn1);
fifo[iter] = bfn2;
iter++;
delay_cycle();
}
for (int i = 0; i < 64; i++)
{
#pragma HLS pipeline II = 1
#pragma HLS DEPENDENCE variable = fifo inter RAW false
b.write(fifo[i]);
delay_cycle();
}
}
}
void read_inputs (hls::stream<coeff_t_stream> &input, hls::stream<coeff_t> &se, hls::stream<coeff_t> &so)
{
coeff_t_stream x;
coeff_t a;
int i;
for (i=0; i<Nt; i++)
{
#pragma HLS pipeline II = 1
x = input.read();
a = x.value;
if (i%2 == 0)
se.write(a);
else
so.write(a);
}
}
void write_outputs (hls::stream<coeff_t> &se, hls::stream<coeff_t> &so, hls::stream<coeff_t_stream> &output)
{
coeff_t a1, a0;
coeff_t_stream y;
int i;
y.last = 0;
for (i=0; i<N; i++)
{
#pragma HLS pipeline II = 1
a0 = se.read();
a1 = so.read();
y.value = a0;
output.write(y);
y.value = a1;
if (i == N-1)
y.last = 1;
output.write(y);
}
}
void ct_ntt (hls::stream<coeff_t_stream> &input, hls::stream<coeff_t_stream> &output)
{
#pragma HLS dataflow
hls::stream<coeff_t> s0o("s0o"), s1o("s1o"), s2o("s2o"), s3o("s3o"),
s4o("s4o"), s5o("s5o"), s6o("s6o"), s7o("s7o"),
s0e("s0e"), s1e("s1e"), s2e("s2e"), s3e("s3e"),
s4e("s4e"), s5e("s5e"), s6e("s6e"), s7e("s7e");
coeff_t fo7[65], fo6[66], fo5[68], fo4[72], fo3[80], fo2[96], fo1[128];
coeff_t fe7[65], fe6[66], fe5[68], fe4[72], fe3[80], fe2[96], fe1[128];
coeff_t_stream x, y;
#pragma HLS STREAM variable = s7o depth = 1
#pragma HLS STREAM variable = s6o depth = 2
#pragma HLS STREAM variable = s5o depth = 4
#pragma HLS STREAM variable = s4o depth = 8
#pragma HLS STREAM variable = s3o depth = 16
#pragma HLS STREAM variable = s2o depth = 32
#pragma HLS STREAM variable = s1o depth = 64
#pragma HLS STREAM variable = s0o depth = 128
#pragma HLS STREAM variable = s7e depth = 1
#pragma HLS STREAM variable = s6e depth = 2
#pragma HLS STREAM variable = s5e depth = 4
#pragma HLS STREAM variable = s4e depth = 8
#pragma HLS STREAM variable = s3e depth = 16
#pragma HLS STREAM variable = s2e depth = 32
#pragma HLS STREAM variable = s1e depth = 64
#pragma HLS STREAM variable = s0e depth = 128
read_inputs(input, s0e, s0o);
ntt_stage1 (s0e, s1e, fe1);
ntt_stage1 (s0o, s1o, fo1);
ntt_stage2 (s1e, s2e, fe2);
ntt_stage2 (s1o, s2o, fo2);
ntt_stage3 (s2e, s3e, fe3);
ntt_stage3 (s2o, s3o, fo3);
ntt_stage4 (s3e, s4e, fe4);
ntt_stage4 (s3o, s4o, fo4);
ntt_stage5 (s4e, s5e, fe5);
ntt_stage5 (s4o, s5o, fo5);
ntt_stage6 (s5e, s6e, fe6);
ntt_stage6 (s5o, s6o, fo6);
ntt_stage7 (s6e, s7e, fe7);
ntt_stage7 (s6o, s7o, fo7);
write_outputs(s7e, s7o, output);
}
void gs_intt (hls::stream<coeff_t_stream> &input, hls::stream<coeff_t_stream> &output)
{
#pragma HLS dataflow
hls::stream<coeff_t> s0o("s0o"), s1o("s1o"), s2o("s2o"), s3o("s3o"),
s4o("s4o"), s5o("s5o"), s6o("s6o"), s7o("s7o"),
s0e("s0e"), s1e("s1e"), s2e("s2e"), s3e("s3e"),
s4e("s4e"), s5e("s5e"), s6e("s6e"), s7e("s7e");
coeff_t fo7[128], fo6[96], fo5[80], fo4[72], fo3[68], fo2[66], fo1[65];
coeff_t fe7[128], fe6[96], fe5[80], fe4[72], fe3[68], fe2[66], fe1[65];
coeff_t_stream x, y;
#pragma HLS STREAM variable = s7o depth = 1
#pragma HLS STREAM variable = s6o depth = 2
#pragma HLS STREAM variable = s5o depth = 4
#pragma HLS STREAM variable = s4o depth = 8
#pragma HLS STREAM variable = s3o depth = 16
#pragma HLS STREAM variable = s2o depth = 32
#pragma HLS STREAM variable = s1o depth = 64
#pragma HLS STREAM variable = s0o depth = 128
#pragma HLS STREAM variable = s7e depth = 1
#pragma HLS STREAM variable = s6e depth = 2
#pragma HLS STREAM variable = s5e depth = 4
#pragma HLS STREAM variable = s4e depth = 8
#pragma HLS STREAM variable = s3e depth = 16
#pragma HLS STREAM variable = s2e depth = 32
#pragma HLS STREAM variable = s1e depth = 64
#pragma HLS STREAM variable = s0e depth = 128
read_inputs(input, s0e, s0o);
intt_stage1 (s0e, s1e, fe1);
intt_stage1 (s0o, s1o, fo1);
intt_stage2 (s1e, s2e, fe2);
intt_stage2 (s1o, s2o, fo2);
intt_stage3 (s2e, s3e, fe3);
intt_stage3 (s2o, s3o, fo3);
intt_stage4 (s3e, s4e, fe4);
intt_stage4 (s3o, s4o, fo4);
intt_stage5 (s4e, s5e, fe5);
intt_stage5 (s4o, s5o, fo5);
intt_stage6 (s5e, s6e, fe6);
intt_stage6 (s5o, s6o, fo6);
intt_stage7 (s6e, s7e, fe7);
intt_stage7 (s6o, s7o, fo7);
write_outputs(s7e, s7o, output);
}
void stream_split (hls::stream<coeff_t_stream_big> &input,
hls::stream<coeff_t_stream> &input1,
hls::stream<coeff_t_stream> &input2)
{
coeff_t_stream_big x;
double_coeff_t a;
coeff_t_stream x1, x2;
coeff_t a1, a2;
int i;
for (i=0; i<Nt; i++)
{
#pragma HLS pipeline II = 1
x = input.read();
a = x.value;
a1 = a(double_coeff_t::width - 1, coeff_t::width);
a2 = a(coeff_t::width - 1, 0);
if (i == Nt-1)
{
x1.last = 1;
x2.last = 1;
}
else
{
x1.last = 0;
x2.last = 0;
}
x1.value = a1;
x2.value = a2;
input1.write(x1);
input2.write(x2);
}
}
void point_wise_mult (hls::stream<coeff_t_stream> &input1,
hls::stream<coeff_t_stream> &input2,
hls::stream<coeff_t_stream> &output)
{
coeff_t_stream xe, xo, ye, yo, z;
coeff_t ae, be, ce, ao, bo, co, c1, c2, c2s, c3, c4;
int i;
coeff_t pm_factors[128] = {17, 3312, 2761, 568, 583, 2746, 2649, 680,
1637, 1692, 723, 2606, 2288, 1041, 1100, 2229,
1409, 1920, 2662, 667, 3281, 48, 233, 3096,
756, 2573, 2156, 1173, 3015, 314, 3050, 279,
1703, 1626, 1651, 1678, 2789, 540, 1789, 1540,
1847, 1482, 952, 2377, 1461, 1868, 2687, 642,
939, 2390, 2308, 1021, 2437, 892, 2388, 941,
733, 2596, 2337, 992, 268, 3061, 641, 2688,
1584, 1745, 2298, 1031, 2037, 1292, 3220, 109,
375, 2954, 2549, 780, 2090, 1239, 1645, 1684,
1063, 2266, 319, 3010, 2773, 556, 757, 2572,
2099, 1230, 561, 2768, 2466, 863, 2594, 735,
2804, 525, 1092, 2237, 403, 2926, 1026, 2303,
1143, 2186, 2150, 1179, 2775, 554, 886, 2443,
1722, 1607, 1212, 2117, 1874, 1455, 1029, 2300,
2110, 1219, 2935, 394, 885, 2444, 2154, 1175};
z.last = 0;
for (i=0; i<N; i++)
{
#pragma HLS pipeline II = 1
xe = input1.read();
xo = input1.read();
ye = input2.read();
yo = input2.read();
ao = xo.value;
bo = yo.value;
ae = xe.value;
be = ye.value;
c1 = mod (ae * be);
c2 = mod (ao * bo);
c2s = mod (c2 * pm_factors[i]);
c3 = mod (ae * bo);
c4 = mod (ao * be);
ce = modadd (c1, c2s);
co = modadd (c3, c4);
z.value = ce;
output.write(z);
if (i == N-1)
z.last = 1;
z.value = co;
output.write(z);
}
}
// -----------------------------------------------------------------------------
// AXI4-Stream <-> internal stream conversion helpers (only at top level)
// -----------------------------------------------------------------------------
static void axis_to_internal_input(hls::stream<coeff_axis_big_t> &axis_in,
hls::stream<coeff_t_stream_big> &int_in)
{
coeff_axis_big_t a;
coeff_t_stream_big x;
for (int i = 0; i < Nt; i++)
{
#pragma HLS pipeline II = 1
a = axis_in.read();
x.value = (double_coeff_t)a.data;
x.last = a.last;
int_in.write(x);
// Optional: break on TLAST if you want to be robust to shorter packets
if (a.last)
break;
}
}
static void internal_to_axis_output(hls::stream<coeff_t_stream> &int_out,
hls::stream<coeff_axis_t> &axis_out)
{
coeff_t_stream x;
coeff_axis_t a;
for (int i = 0; i < Nt; i++)
{
#pragma HLS pipeline II = 1
x = int_out.read();
a.data = (ap_uint<16>)x.value;
a.last = x.last;
// Mark all bytes valid; side channels are disabled in this ap_axiu config
a.keep = -1;
a.strb = -1;
axis_out.write(a);
if (x.last)
break;
}
}
// -----------------------------------------------------------------------------
// Top-level function with AXI4-Stream ports (for DMA) and internal NTT pipeline
// -----------------------------------------------------------------------------
int poly_mult (hls::stream<coeff_axis_big_t> &input,
hls::stream<coeff_axis_t> &output)
{
#pragma HLS INTERFACE axis register port=input
#pragma HLS INTERFACE axis register port=output
#pragma HLS INTERFACE s_axilite port=return bundle=CTRL_BUS
#pragma HLS dataflow
// Internal streams using the original coeff_t_stream{,_big} types
hls::stream<coeff_t_stream_big> in_internal("in_internal");
hls::stream<coeff_t_stream> input1("input1"), input2("input2");
hls::stream<coeff_t_stream> middle1("middle1"), middle2("middle2");
hls::stream<coeff_t_stream> middle3("middle3"), out_internal("out_internal");
axis_to_internal_input(input, in_internal);
stream_split(in_internal, input1, input2);
ct_ntt(input1, middle1);
ct_ntt(input2, middle2);
point_wise_mult(middle1, middle2, middle3);
gs_intt(middle3, out_internal);
internal_to_axis_output(out_internal, output);
return 0;
}