diff --git a/PYNQ-zcu104_Files/Dilithium.ipynb b/PYNQ-zcu104_Files/Dilithium.ipynb new file mode 100644 index 0000000..bbfeef1 --- /dev/null +++ b/PYNQ-zcu104_Files/Dilithium.ipynb @@ -0,0 +1,1566 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "33632a5e", + "metadata": {}, + "outputs": [], + "source": [ + "# oho A\n", + "# Install and import liboqs-python (software Dilithium)\n", + "# If you don't have liboqs / liboqs-python yet, uncomment the next lines.\n", + "# On a ZCU104 this will compile on ARM; it may take a while.\n", + "#\n", + "#!git clone --depth=1 https://github.com/open-quantum-safe/liboqs-python.git\n", + "#%cd liboqs-python\n", + "#!pip install .\n", + "#%cd -" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "6093010b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Enabled signature mechanisms:\n", + "('Dilithium2', 'Dilithium3', 'Dilithium5', 'ML-DSA-44', 'ML-DSA-65', 'ML-DSA-87', 'Falcon-512', 'Falcon-1024', 'Falcon-padded-512', 'Falcon-padded-1024', 'SPHINCS+-SHA2-128f-simple', 'SPHINCS+-SHA2-128s-simple', 'SPHINCS+-SHA2-192f-simple', 'SPHINCS+-SHA2-192s-simple', 'SPHINCS+-SHA2-256f-simple', 'SPHINCS+-SHA2-256s-simple', 'SPHINCS+-SHAKE-128f-simple', 'SPHINCS+-SHAKE-128s-simple', 'SPHINCS+-SHAKE-192f-simple', 'SPHINCS+-SHAKE-192s-simple', 'SPHINCS+-SHAKE-256f-simple', 'SPHINCS+-SHAKE-256s-simple', 'MAYO-1', 'MAYO-2', 'MAYO-3', 'MAYO-5', 'cross-rsdp-128-balanced', 'cross-rsdp-128-fast', 'cross-rsdp-128-small', 'cross-rsdp-192-balanced', 'cross-rsdp-192-fast', 'cross-rsdp-192-small', 'cross-rsdp-256-balanced', 'cross-rsdp-256-fast', 'cross-rsdp-256-small', 'cross-rsdpg-128-balanced', 'cross-rsdpg-128-fast', 'cross-rsdpg-128-small', 'cross-rsdpg-192-balanced', 'cross-rsdpg-192-fast', 'cross-rsdpg-192-small', 'cross-rsdpg-256-balanced', 'cross-rsdpg-256-fast', 'cross-rsdpg-256-small', 'OV-Is', 'OV-Ip', 'OV-III', 'OV-V', 'OV-Is-pkc', 'OV-Ip-pkc', 'OV-III-pkc', 'OV-V-pkc', 'OV-Is-pkc-skc', 'OV-Ip-pkc-skc', 'OV-III-pkc-skc', 'OV-V-pkc-skc', 'SNOVA_24_5_4', 'SNOVA_24_5_4_SHAKE', 'SNOVA_24_5_4_esk', 'SNOVA_24_5_4_SHAKE_esk', 'SNOVA_37_17_2', 'SNOVA_25_8_3', 'SNOVA_56_25_2', 'SNOVA_49_11_3', 'SNOVA_37_8_4', 'SNOVA_24_5_5', 'SNOVA_60_10_4', 'SNOVA_29_6_5')\n", + "Using algorithm: ML-DSA-44\n", + "Software-only Dilithium verify OK? True\n" + ] + } + ], + "source": [ + "# Cell 1: \n", + "\n", + "\n", + "import numpy as np\n", + "import math\n", + "import time\n", + "from pynq import Overlay\n", + "from pynq import allocate\n", + "from pynq import MMIO\n", + "\n", + "import oqs\n", + "\n", + "# Check which signature mechanisms are available\n", + "print(\"Enabled signature mechanisms:\")\n", + "print(oqs.get_enabled_sig_mechanisms())\n", + "\n", + "# Prefer the ML-DSA name; fall back to legacy Dilithium2 if needed\n", + "if \"ML-DSA-44\" in oqs.get_enabled_sig_mechanisms():\n", + " DILITHIUM_ALG = \"ML-DSA-44\"\n", + "elif \"Dilithium2\" in oqs.get_enabled_sig_mechanisms():\n", + " DILITHIUM_ALG = \"Dilithium2\"\n", + "else:\n", + " raise RuntimeError(\"No Dilithium / ML-DSA-44 implementation enabled in liboqs.\")\n", + "\n", + "print(\"Using algorithm:\", DILITHIUM_ALG)\n", + "\n", + "# Simple software-only keygen / sign / verify\n", + "message = b\"Hello from ZCU104 + Dilithium\"\n", + "\n", + "with oqs.Signature(DILITHIUM_ALG) as signer:\n", + " public_key = signer.generate_keypair()\n", + " secret_key = signer.export_secret_key()\n", + "\n", + " signature = signer.sign(message)\n", + " ok = signer.verify(message, signature, public_key)\n", + "\n", + "print(\"Software-only Dilithium verify OK?\", ok)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d32bcf5f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8913f4a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "088eb969", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 19388\n", + "drwxr-xr-x 3 root root 4096 Dec 13 15:53 .\n", + "drwxrwxrwx 9 xilinx xilinx 4096 Dec 13 15:57 ..\n", + "-rw-r--r-- 1 root root 19311209 Dec 13 15:53 base.bit\n", + "-rw-r--r-- 1 root root 527952 Dec 13 15:52 base.hwh\n", + "drwxr-xr-x 2 root root 4096 Dec 13 15:23 .ipynb_checkpoints\n", + "Sat Dec 13 03:57:25 PM UTC 2025\n" + ] + } + ], + "source": [ + "#oho B\n", + "!ls -la /home/xilinx/jupyter_notebooks/kyber_ntt_and_dilithium_ntt\n", + "!date\n", + "# Loading the kyber-ntt_and_dilithium_ntt bit file to configure the PL\n", + "ol = Overlay('/home/xilinx/jupyter_notebooks/kyber_ntt_and_dilithium_ntt/base.bit')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "fc922ce1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dict_keys(['axi_dma_0', 'poly_mult_0', 'poly_mult_dil_0', 'axi_dma_1', 'ps_e_0'])\n", + "poly_mult_dil_0 @ 0x80020000\n", + "send _max_size: 16383\n", + "recv _max_size: 16383\n", + "{'C_DLYTMR_RESOLUTION': '125', 'C_ENABLE_MULTI_CHANNEL': '0', 'C_FAMILY': 'zynquplus', 'C_INCLUDE_MM2S': '1', 'C_INCLUDE_MM2S_DRE': '0', 'C_INCLUDE_MM2S_SF': '1', 'C_INCLUDE_S2MM': '1', 'C_INCLUDE_S2MM_DRE': '0', 'C_INCLUDE_S2MM_SF': '1', 'C_INCLUDE_SG': '0', 'C_INCREASE_THROUGHPUT': '0', 'C_MICRO_DMA': '0', 'C_MM2S_BURST_SIZE': '16', 'C_M_AXIS_MM2S_CNTRL_TDATA_WIDTH': '32', 'C_M_AXIS_MM2S_TDATA_WIDTH': '64', 'C_M_AXI_MM2S_ADDR_WIDTH': '32', 'C_M_AXI_MM2S_DATA_WIDTH': '64', 'C_M_AXI_S2MM_ADDR_WIDTH': '32', 'C_M_AXI_S2MM_DATA_WIDTH': '32', 'C_M_AXI_SG_ADDR_WIDTH': '32', 'C_M_AXI_SG_DATA_WIDTH': '32', 'C_NUM_MM2S_CHANNELS': '1', 'C_NUM_S2MM_CHANNELS': '1', 'C_PRMRY_IS_ACLK_ASYNC': '0', 'C_S2MM_BURST_SIZE': '16', 'C_SG_INCLUDE_STSCNTRL_STRM': '0', 'C_SG_LENGTH_WIDTH': '14', 'C_SG_USE_STSAPP_LENGTH': '0', 'C_S_AXIS_S2MM_STS_TDATA_WIDTH': '32', 'C_S_AXIS_S2MM_TDATA_WIDTH': '32', 'C_S_AXI_LITE_ADDR_WIDTH': '10', 'C_S_AXI_LITE_DATA_WIDTH': '32', 'Component_Name': 'base_axi_dma_1_0', 'c_addr_width': '32', 'c_dlytmr_resolution': '125', 'c_enable_multi_channel': '0', 'c_include_mm2s': '1', 'c_include_mm2s_dre': '0', 'c_include_mm2s_sf': '1', 'c_include_s2mm': '1', 'c_include_s2mm_dre': '0', 'c_include_s2mm_sf': '1', 'c_include_sg': '0', 'c_increase_throughput': '0', 'c_m_axi_mm2s_data_width': '64', 'c_m_axi_s2mm_data_width': '32', 'c_m_axis_mm2s_tdata_width': '64', 'c_micro_dma': '0', 'c_mm2s_burst_size': '16', 'c_num_mm2s_channels': '1', 'c_num_s2mm_channels': '1', 'c_prmry_is_aclk_async': '0', 'c_s2mm_burst_size': '16', 'c_s_axis_s2mm_tdata_width': '32', 'c_sg_include_stscntrl_strm': '0', 'c_sg_length_width': '14', 'c_sg_use_stsapp_length': '0', 'c_single_interface': '0', 'EDK_IPTYPE': 'PERIPHERAL', 'C_BASEADDR': '0x80030000', 'C_HIGHADDR': '0x8003FFFF', 'ADDR_WIDTH': '10', 'ARUSER_WIDTH': '0', 'AWUSER_WIDTH': '0', 'BUSER_WIDTH': '0', 'CLK_DOMAIN': 'base_ps_e_0_0_pl_clk0', 'DATA_WIDTH': '32', 'FREQ_HZ': '99999001', 'HAS_BRESP': '1', 'HAS_BURST': '0', 'HAS_CACHE': '0', 'HAS_LOCK': '0', 'HAS_PROT': '0', 'HAS_QOS': '0', 'HAS_REGION': '0', 'HAS_RRESP': '1', 'HAS_WSTRB': '0', 'ID_WIDTH': '0', 'INSERT_VIP': '0', 'MAX_BURST_LENGTH': '1', 'NUM_READ_OUTSTANDING': '1', 'NUM_READ_THREADS': '1', 'NUM_WRITE_OUTSTANDING': '1', 'NUM_WRITE_THREADS': '1', 'PHASE': '0.0', 'PROTOCOL': 'AXI4LITE', 'READ_WRITE_MODE': 'READ_WRITE', 'RUSER_BITS_PER_BYTE': '0', 'RUSER_WIDTH': '0', 'SUPPORTS_NARROW_BURST': '0', 'WUSER_BITS_PER_BYTE': '0', 'WUSER_WIDTH': '0', 'HAS_TKEEP': '1', 'HAS_TLAST': '1', 'HAS_TREADY': '1', 'HAS_TSTRB': '0', 'LAYERED_METADATA': 'undef', 'TDATA_NUM_BYTES': '8', 'TDEST_WIDTH': '0', 'TID_WIDTH': '0', 'TUSER_WIDTH': '0'}\n" + ] + } + ], + "source": [ + "#oho E\n", + "#oho inquiry for ZCU104 Board\n", + "\n", + "# Discover the real base address from the .hwh\n", + "print(ol.ip_dict.keys()) # just to see exact IP names\n", + "\n", + "poly_name = 'poly_mult_dil_0' # adjust if Vivado named it differently\n", + "poly_info = ol.ip_dict[poly_name]\n", + "print(\"poly_mult_dil_0 @\", hex(poly_info['phys_addr']))\n", + "mmio = MMIO(poly_info['phys_addr'], poly_info['addr_range'])\n", + "\n", + "# oho !!! dma_1 gehört zu dilithium accelerator, not dma_0 !\n", + "dma = ol.axi_dma_1\n", + "dma_send = dma.sendchannel\n", + "dma_recv = dma.recvchannel\n", + "\n", + "\n", + "# 1) What PYNQ thinks the max dma channel size is:\n", + "print(\"send _max_size:\", dma.sendchannel._max_size)\n", + "print(\"recv _max_size:\", dma.recvchannel._max_size)\n", + "\n", + "# 2) versus: Dump the raw IP parameters from the .hwh:\n", + "print(ol.ip_dict['axi_dma_1']['parameters'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dafae649", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3572bb8e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "1db1b137", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# 4. Dilithium ring parameters\n", + "Q = 8380417\n", + "N = 256\n", + "\n", + "def center_mod_q(x):\n", + " \"\"\"Map integer x to centered representative in (-Q/2, Q/2].\"\"\"\n", + " x = int(x) % Q\n", + " if x > Q // 2:\n", + " x -= Q\n", + " return x\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "af55b05f", + "metadata": {}, + "outputs": [], + "source": [ + "def hw_poly_mult(a, b):\n", + " \"\"\"\n", + " Hardware polynomial multiplication in R_q[X]/(X^N + 1).\n", + "\n", + " a, b: 1D numpy arrays of length N with integer entries (Python ints or np.int32/int64).\n", + " Returns: numpy array length N, centered mod Q.\n", + " \"\"\"\n", + " a = np.asarray(a, dtype=np.int64)\n", + " b = np.asarray(b, dtype=np.int64)\n", + " assert a.shape == (N,)\n", + " assert b.shape == (N,)\n", + "\n", + " # Allocate DMA buffers\n", + " # Input: N 64-bit words, each packing two 32-bit coefficients.\n", + " in_buf = allocate(shape=(N,), dtype=np.uint64)\n", + " # Output: N 32-bit coefficients\n", + " out_buf = allocate(shape=(N,), dtype=np.uint32)\n", + "\n", + " # Pack two 32-bit coeffs into one 64-bit word\n", + " # Convention: low 32 bits = a[i], high 32 bits = b[i]\n", + " for i in range(N):\n", + " a_i = int(a[i] % Q) & 0xFFFFFFFF\n", + " b_i = int(b[i] % Q) & 0xFFFFFFFF\n", + " word = ((b_i << 32) | a_i) # pure Python ints, no NumPy ufuncs\n", + " in_buf[i] = np.uint64(word)\n", + "\n", + "\n", + " \n", + " # oho vorbild kyber \n", + " # Start IP core (ap_start = 1 at control register 0x00)\n", + " mmio.write(0x00, 0x1)\n", + " \n", + " \n", + " \n", + " # Start DMA transfers\n", + " dma.sendchannel.transfer(in_buf)\n", + " dma.recvchannel.transfer(out_buf)\n", + " dma.sendchannel.wait()\n", + " dma.recvchannel.wait()\n", + "\n", + " # Convert back to centered coefficients\n", + " c = np.empty(N, dtype=np.int64)\n", + " for i in range(N):\n", + " c[i] = center_mod_q(out_buf[i])\n", + "\n", + " # Optional: free buffers (PYNQ also cleans them up via GC, but explicit is nice on small boards)\n", + " in_buf.freebuffer()\n", + " out_buf.freebuffer()\n", + "\n", + " return c\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "cb8a287b", + "metadata": {}, + "outputs": [], + "source": [ + "# Cell 3: Software negacyclic polynomial multiplication in R_q[X]/(X^N + 1)\n", + "\n", + "def sw_poly_mult(a, b):\n", + " \"\"\"\n", + " O(N^2) negacyclic polynomial multiplication:\n", + " c(X) = a(X) * b(X) mod (X^N + 1, Q)\n", + " \"\"\"\n", + " a = np.asarray(a, dtype=np.int64)\n", + " b = np.asarray(b, dtype=np.int64)\n", + " assert a.shape == (N,)\n", + " assert b.shape == (N,)\n", + "\n", + " c = np.zeros(N, dtype=np.int64)\n", + "\n", + " for i in range(N):\n", + " acc = 0\n", + " for j in range(N):\n", + " k = i - j\n", + " if k >= 0:\n", + " acc += int(a[j]) * int(b[k])\n", + " else:\n", + " # Wrap with a minus sign: X^N = -1\n", + " k += N\n", + " acc -= int(a[j]) * int(b[k])\n", + " c[i] = center_mod_q(acc)\n", + "\n", + " return c\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1e0e420", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "7dfe972f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Max |c_sw - c_hw| mod Q = 0\n", + "Number of mismatching coefficients: 0\n", + "HW matches SW golden for this test vector.\n" + ] + } + ], + "source": [ + "# Cell 4: Compare HW vs SW polynomial multiplication\n", + "\n", + "from numpy.random import default_rng\n", + "\n", + "rng = default_rng(12345)\n", + "\n", + "# Generate random centered coefficients\n", + "a = rng.integers(low=-Q//2, high=Q//2, size=N, dtype=np.int64)\n", + "b = rng.integers(low=-Q//2, high=Q//2, size=N, dtype=np.int64)\n", + "\n", + "c_sw = sw_poly_mult(a, b)\n", + "\n", + "c_hw = hw_poly_mult(a, b)\n", + "\n", + "# Compare modulo Q (in case of small representation differences)\n", + "diff = (c_sw - c_hw) % Q\n", + "max_diff = int(np.max(np.abs(diff)))\n", + "num_mism = int(np.count_nonzero(diff))\n", + "\n", + "print(\"Max |c_sw - c_hw| mod Q =\", max_diff)\n", + "print(\"Number of mismatching coefficients:\", num_mism)\n", + "\n", + "if num_mism == 0:\n", + " print(\"HW matches SW golden for this test vector.\")\n", + "else:\n", + " print(\"Mismatch: investigate HLS core / packing / reduction.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa590582", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a7991e6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "99023a64", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== Software Dilithium using liboqs ===\n", + "Keygen time (SW): 0.001 s\n", + "Sign time (SW): 0.005 s\n", + "Verify time (SW): 0.001 s\n", + "Verification OK? True\n", + "\n", + "=== Hardware-accelerated poly_mult_dil ===\n", + "Software poly_mult time: 0.282400 s\n", + "Hardware poly_mult time (incl. DMA): 0.009862 s\n", + "Mismatch count: 0\n" + ] + } + ], + "source": [ + "# Cell 5: Tie it together\n", + "\n", + "import time\n", + "\n", + "# 1. Software Dilithium keygen/sign/verify (liboqs)\n", + "message = b\"Hardware-software co-design demo on ZCU104\"\n", + "\n", + "with oqs.Signature(DILITHIUM_ALG) as signer:\n", + " print(\"\\n=== Software Dilithium using liboqs ===\")\n", + " t0 = time.time()\n", + " public_key = signer.generate_keypair()\n", + " secret_key = signer.export_secret_key()\n", + " t1 = time.time()\n", + " print(\"Keygen time (SW): %.3f s\" % (t1 - t0))\n", + "\n", + " t0 = time.time()\n", + " signature = signer.sign(message)\n", + " t1 = time.time()\n", + " print(\"Sign time (SW): %.3f s\" % (t1 - t0))\n", + "\n", + " t0 = time.time()\n", + " ok = signer.verify(message, signature, public_key)\n", + " t1 = time.time()\n", + " print(\"Verify time (SW): %.3f s\" % (t1 - t0))\n", + " print(\"Verification OK?\", ok)\n", + "\n", + "# 2. Hardware-accelerated polynomial multiplication demo\n", + "print(\"\\n=== Hardware-accelerated poly_mult_dil ===\")\n", + "\n", + "a = rng.integers(low=-Q//2, high=Q//2, size=N, dtype=np.int64)\n", + "b = rng.integers(low=-Q//2, high=Q//2, size=N, dtype=np.int64)\n", + "\n", + "t0 = time.time()\n", + "c_sw = sw_poly_mult(a, b)\n", + "t1 = time.time()\n", + "print(\"Software poly_mult time: %.6f s\" % (t1 - t0))\n", + "\n", + "t0 = time.time()\n", + "c_hw = hw_poly_mult(a, b)\n", + "t1 = time.time()\n", + "print(\"Hardware poly_mult time (incl. DMA): %.6f s\" % (t1 - t0))\n", + "\n", + "diff = (c_sw - c_hw) % Q\n", + "num_mism = int(np.count_nonzero(diff))\n", + "print(\"Mismatch count:\", num_mism)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33dcd211", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36fb8ddb", + "metadata": {}, + "outputs": [], + "source": [ + "###########################################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac4e49fd", + "metadata": {}, + "outputs": [], + "source": [ + "## Kyber-Notebook analoge Test Verfahren" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9ec9ac1", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "818b97e0", + "metadata": {}, + "outputs": [], + "source": [ + "# oho D_dil\n", + "# 512-point NTT / INTT in pure Python, matching the HLS core\n", + "# and a SW NTT-based polynomial multiplication in R_q[X]/(X^256 + 1).\n", + "\n", + "import numpy as np\n", + "\n", + "# Ring parameters (keep them in sync with your hardware)\n", + "Q = 8380417\n", + "N = 256\n", + "N2 = 2 * N\n", + "\n", + "# These are exactly the twiddle constants we used in the HLS core\n", + "NTT_WLEN = [\n", + " 8380416, 4808194, 4614810, 2883726,\n", + " 6250525, 7044481, 3241972, 6644104,\n", + " 1921994\n", + "]\n", + "\n", + "NTT_WLEN_INV = [\n", + " 8380416, 3572223, 3761513, 5234739,\n", + " 3764867, 3227876, 6621070, 6125690,\n", + " 527981\n", + "]\n", + "\n", + "INV_NTT512 = 8364049 # 512^{-1} mod Q\n", + "\n", + "def mod_q(x: int) -> int:\n", + " \"\"\"Reduce integer x modulo Q into [0, Q).\"\"\"\n", + " r = x % Q\n", + " if r < 0:\n", + " r += Q\n", + " return r\n", + "\n", + "def modadd(a: int, b: int) -> int:\n", + " s = a + b\n", + " if s >= Q:\n", + " s -= Q\n", + " return s\n", + "\n", + "def modsub(a: int, b: int) -> int:\n", + " d = a - b\n", + " if d < 0:\n", + " d += Q\n", + " return d\n", + "\n", + "def mul_mod(a: int, b: int) -> int:\n", + " return mod_q(a * b)\n", + "\n", + "def ntt_512_py(a, invert: bool = False):\n", + " \"\"\"\n", + " In-place iterative radix-2 NTT of size 512, pure Python.\n", + "\n", + " a: iterable of length 512, entries in Z_Q.\n", + " invert = False: forward NTT\n", + " invert = True: inverse NTT (includes multiply by 512^{-1} mod Q)\n", + " \"\"\"\n", + " # Copy into a plain Python list of ints\n", + " a = [mod_q(int(x)) for x in a]\n", + " n2 = N2\n", + "\n", + " # Bit-reversal permutation\n", + " j = 0\n", + " for i in range(1, n2):\n", + " bit = n2 >> 1\n", + " while j & bit:\n", + " j ^= bit\n", + " bit >>= 1\n", + " j ^= bit\n", + " if i < j:\n", + " a[i], a[j] = a[j], a[i]\n", + "\n", + " length = 2\n", + " stage = 0\n", + " while length <= n2:\n", + " wlen = NTT_WLEN_INV[stage] if invert else NTT_WLEN[stage]\n", + " half = length >> 1\n", + "\n", + " for i in range(0, n2, length):\n", + " w = 1\n", + " for k in range(half):\n", + " u = a[i + k]\n", + " v = mul_mod(a[i + k + half], w)\n", + " a[i + k] = modadd(u, v)\n", + " a[i + k + half] = modsub(u, v)\n", + " w = mul_mod(w, wlen)\n", + "\n", + " length <<= 1\n", + " stage += 1\n", + "\n", + " if invert:\n", + " # Multiply by 512^{-1} mod Q\n", + " for i in range(n2):\n", + " a[i] = mul_mod(a[i], INV_NTT512)\n", + "\n", + " return a\n", + "\n", + "def poly_mul_sw_ntt(a, b):\n", + " \"\"\"\n", + " SW NTT-based polynomial multiplication:\n", + " c(X) = a(X) * b(X) mod (X^256 + 1, Q)\n", + " using the same 512-pt NTT+fold as the HLS core.\n", + "\n", + " a, b: 1D arrays of length N (can be centered ints).\n", + " Returns: length-N numpy array, centered mod Q.\n", + " \"\"\"\n", + " a = np.asarray(a, dtype=np.int64)\n", + " b = np.asarray(b, dtype=np.int64)\n", + " assert a.shape == (N,) and b.shape == (N,)\n", + "\n", + " # Map inputs into [0, Q)\n", + " A = [mod_q(int(x)) for x in a] + [0] * (N2 - N)\n", + " B = [mod_q(int(x)) for x in b] + [0] * (N2 - N)\n", + "\n", + " # Forward NTT\n", + " A = ntt_512_py(A, invert=False)\n", + " B = ntt_512_py(B, invert=False)\n", + "\n", + " # Pointwise multiply in NTT domain\n", + " C = [mul_mod(A[i], B[i]) for i in range(N2)]\n", + "\n", + " # Inverse NTT\n", + " C = ntt_512_py(C, invert=True)\n", + "\n", + " # Negacyclic fold: c[k] = C[k] - C[k+N] mod Q\n", + " c = np.empty(N, dtype=np.int64)\n", + " for k in range(N):\n", + " val = modsub(C[k], C[k + N])\n", + " # Use your existing centered representation helper if you have it\n", + " # Otherwise: center in (-Q/2, Q/2]\n", + " if 'center_mod_q' in globals():\n", + " c[k] = center_mod_q(val)\n", + " else:\n", + " # fallback centered mapping\n", + " r = val % Q\n", + " if r > Q // 2:\n", + " r -= Q\n", + " c[k] = r\n", + "\n", + " return c\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12e59835", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "f676296e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "liboqs available, using algorithm: ML-DSA-44\n" + ] + } + ], + "source": [ + "# oho F_dil – liboqs setup for Dilithium\n", + "\n", + "HAS_OQS = False\n", + "DILITHIUM_ALG = None\n", + "oqs_signer = None\n", + "oqs_pk = None\n", + "\n", + "try:\n", + " import oqs\n", + "\n", + " enabled = oqs.get_enabled_sig_mechanisms()\n", + " # Prefer ML-DSA-44 if present, else fall back to Dilithium2\n", + " if \"ML-DSA-44\" in enabled:\n", + " DILITHIUM_ALG = \"ML-DSA-44\"\n", + " elif \"Dilithium2\" in enabled:\n", + " DILITHIUM_ALG = \"Dilithium2\"\n", + " else:\n", + " print(\"No Dilithium / ML-DSA scheme enabled in liboqs; skipping liboqs benchmarks.\")\n", + " if DILITHIUM_ALG is not None:\n", + " oqs_signer = oqs.Signature(DILITHIUM_ALG)\n", + " oqs_pk = oqs_signer.generate_keypair() # one keypair reused for all timings\n", + " # sk = oqs_signer.export_secret_key() # not needed explicitly here\n", + " HAS_OQS = True\n", + " print(\"liboqs available, using algorithm:\", DILITHIUM_ALG)\n", + "except ImportError:\n", + " print(\"liboqs-python (oqs) not installed; skipping liboqs benchmarks.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91689702", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "4aaa17d9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Naive vs SW NTT mismatches: 0\n", + "Naive vs HW mismatches: 0\n", + "t_naive = 0.286735 s\n", + "t_sw_ntt = 0.081213 s\n", + "t_hw_ntt = 0.008756 s\n", + "Speedup naive / SW NTT: 3.53 x\n", + "Speedup SW NTT / HW NTT: 9.28 x\n" + ] + } + ], + "source": [ + "# oho E_dil\n", + "# Compare SW naive vs SW NTT vs HW NTT for Dilithium ring multiplication.\n", + "\n", + "import time\n", + "from numpy.random import default_rng\n", + "\n", + "rng = default_rng(12345)\n", + "\n", + "# Alias your existing naive golden for clarity\n", + "def poly_mul_sw_naive(a, b):\n", + " # This assumes you already have sw_poly_mult(a,b) as the O(N^2) golden.\n", + " return sw_poly_mult(a, b)\n", + "\n", + "def benchmark_once():\n", + " # Random centered coefficients in (-Q/2, Q/2]\n", + " a = rng.integers(low=-Q//2, high=Q//2, size=N, dtype=np.int64)\n", + " b = rng.integers(low=-Q//2, high=Q//2, size=N, dtype=np.int64)\n", + "\n", + " # 1) Naive SW (O(N^2))\n", + " t0 = time.perf_counter()\n", + " c_naive = poly_mul_sw_naive(a, b)\n", + " t1 = time.perf_counter()\n", + " t_naive = t1 - t0\n", + "\n", + " # 2) SW NTT (same algorithm as HW, but in Python)\n", + " t0 = time.perf_counter()\n", + " c_sw_ntt = poly_mul_sw_ntt(a, b)\n", + " t1 = time.perf_counter()\n", + " t_sw_ntt = t1 - t0\n", + "\n", + " # 3) HW NTT (HLS core via DMA)\n", + " t0 = time.perf_counter()\n", + " c_hw = hw_poly_mult(a, b)\n", + " t1 = time.perf_counter()\n", + " t_hw = t1 - t0\n", + "\n", + " # Correctness checks vs naive, modulo Q\n", + " diff_sw_ntt = (c_naive - c_sw_ntt) % Q\n", + " diff_hw = (c_naive - c_hw) % Q\n", + "\n", + " print(\"Naive vs SW NTT mismatches:\",\n", + " int(np.count_nonzero(diff_sw_ntt)))\n", + " print(\"Naive vs HW mismatches:\",\n", + " int(np.count_nonzero(diff_hw)))\n", + "\n", + " print(f\"t_naive = {t_naive:.6f} s\")\n", + " print(f\"t_sw_ntt = {t_sw_ntt:.6f} s\")\n", + " print(f\"t_hw_ntt = {t_hw:.6f} s\")\n", + "\n", + " if t_sw_ntt > 0 and t_hw > 0:\n", + " print(\"Speedup naive / SW NTT: %.2f x\" % (t_naive / t_sw_ntt))\n", + " print(\"Speedup SW NTT / HW NTT: %.2f x\" % (t_sw_ntt / t_hw))\n", + "\n", + "benchmark_once()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d500e63b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "bee9cd02", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Naive vs SW NTT mismatches: 0\n", + "Naive vs HW mismatches: 0\n", + "t_naive = 0.293554 s\n", + "t_sw_ntt = 0.081380 s\n", + "t_hw_ntt = 0.008733 s\n", + "Speedup naive / SW NTT: 3.61 x\n", + "Speedup SW NTT / HW NTT: 9.32 x\n", + "liboqs Dilithium sign+verify OK? True, time: 0.002986 s\n" + ] + } + ], + "source": [ + "# oho G_dil – benchmark: SW naive vs SW NTT vs HW NTT vs liboqs Dilithium\n", + "\n", + "import time\n", + "from numpy.random import default_rng\n", + "\n", + "rng = default_rng(12345)\n", + "\n", + "# Alias: your existing naive golden\n", + "def poly_mul_sw_naive(a, b):\n", + " # This assumes sw_poly_mult(a,b) is the O(N^2) negacyclic multiply you already defined.\n", + " return sw_poly_mult(a, b)\n", + "\n", + "def benchmark_once():\n", + " # Random centered coefficients in (-Q/2, Q/2]\n", + " a = rng.integers(low=-Q//2, high=Q//2, size=N, dtype=np.int64)\n", + " b = rng.integers(low=-Q//2, high=Q//2, size=N, dtype=np.int64)\n", + "\n", + " # 1) Naive SW (O(N^2))\n", + " t0 = time.perf_counter()\n", + " c_naive = poly_mul_sw_naive(a, b)\n", + " t1 = time.perf_counter()\n", + " t_naive = t1 - t0\n", + "\n", + " # 2) SW NTT (same algorithm as HW, but in Python)\n", + " t0 = time.perf_counter()\n", + " c_sw_ntt = poly_mul_sw_ntt(a, b)\n", + " t1 = time.perf_counter()\n", + " t_sw_ntt = t1 - t0\n", + "\n", + " # 3) HW NTT (HLS core via DMA)\n", + " t0 = time.perf_counter()\n", + " c_hw = hw_poly_mult(a, b)\n", + " t1 = time.perf_counter()\n", + " t_hw = t1 - t0\n", + "\n", + " # Correctness checks vs naive, modulo Q\n", + " diff_sw_ntt = (c_naive - c_sw_ntt) % Q\n", + " diff_hw = (c_naive - c_hw) % Q\n", + "\n", + " print(\"Naive vs SW NTT mismatches:\",\n", + " int(np.count_nonzero(diff_sw_ntt)))\n", + " print(\"Naive vs HW mismatches:\",\n", + " int(np.count_nonzero(diff_hw)))\n", + "\n", + " print(f\"t_naive = {t_naive:.6f} s\")\n", + " print(f\"t_sw_ntt = {t_sw_ntt:.6f} s\")\n", + " print(f\"t_hw_ntt = {t_hw:.6f} s\")\n", + "\n", + " if t_sw_ntt > 0 and t_hw > 0:\n", + " print(\"Speedup naive / SW NTT: %.2f x\" % (t_naive / t_sw_ntt))\n", + " print(\"Speedup SW NTT / HW NTT: %.2f x\" % (t_sw_ntt / t_hw))\n", + "\n", + " # 4) liboqs Dilithium sign+verify (full scheme, not just one poly mult)\n", + " if HAS_OQS and oqs_signer is not None:\n", + " # Use some message derived from the polynomials just to keep it tied together\n", + " msg = a.tobytes()[:32] # first 32 bytes as a \"message\"\n", + " t0 = time.perf_counter()\n", + " sig = oqs_signer.sign(msg)\n", + " ok = oqs_signer.verify(msg, sig, oqs_pk)\n", + " t1 = time.perf_counter()\n", + " t_oqs = t1 - t0\n", + " print(f\"liboqs Dilithium sign+verify OK? {ok}, time: {t_oqs:.6f} s\")\n", + " else:\n", + " print(\"liboqs Dilithium not available in this environment.\")\n", + "\n", + "benchmark_once()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ffbb055", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f592b8e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d26c9518", + "metadata": {}, + "outputs": [], + "source": [ + "#########################################################\n", + "# Fair comparison\n", + "#########################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "963d4b71", + "metadata": {}, + "outputs": [], + "source": [ + "# oho H_dil – simple R_q vector/matrix utilities and pluggable poly_mul backend\n", + "\n", + "import numpy as np\n", + "\n", + "Q = 8380417\n", + "N = 256\n", + "\n", + "def poly_mul_naive(a, b):\n", + " # Your existing O(N^2) golden\n", + " return sw_poly_mult(a, b)\n", + "\n", + "def poly_mul_sw_ntt_backend(a, b):\n", + " # Your Python NTT-based multiply from the previous cells\n", + " return poly_mul_sw_ntt(a, b)\n", + "\n", + "def poly_mul_hw_backend(a, b):\n", + " # Your FPGA-based multiply via DMA\n", + " return hw_poly_mult(a, b)\n", + "\n", + "def poly_add(a, b):\n", + " a = np.asarray(a, dtype=np.int64)\n", + " b = np.asarray(b, dtype=np.int64)\n", + " c = (a + b) % Q\n", + " # center if you prefer centered reps; else leave mod Q\n", + " return np.array([center_mod_q(int(x)) for x in c], dtype=np.int64)\n", + "\n", + "def poly_sub(a, b):\n", + " a = np.asarray(a, dtype=np.int64)\n", + " b = np.asarray(b, dtype=np.int64)\n", + " c = (a - b) % Q\n", + " return np.array([center_mod_q(int(x)) for x in c], dtype=np.int64)\n", + "\n", + "def sample_poly_small(eta=2):\n", + " # Very crude: coefficients uniform in [-eta, eta]\n", + " return np.random.randint(-eta, eta+1, size=N, dtype=np.int64)\n", + "\n", + "def sample_poly_uniform():\n", + " # Uniform in [0, Q)\n", + " return np.random.randint(0, Q, size=N, dtype=np.int64)\n", + "\n", + "def vec_poly_add(v1, v2):\n", + " assert len(v1) == len(v2)\n", + " return [poly_add(v1[i], v2[i]) for i in range(len(v1))]\n", + "\n", + "def vec_poly_sub(v1, v2):\n", + " assert len(v1) == len(v2)\n", + " return [poly_sub(v1[i], v2[i]) for i in range(len(v1))]\n", + "\n", + "def mat_poly_mul_vec(A, x, poly_mul_func):\n", + " \"\"\"\n", + " A: list of k rows, each row is list of l polynomials (shape k x l).\n", + " x: list of l polynomials (length l).\n", + " poly_mul_func: function(a,b) -> polynomial of length N.\n", + " Returns: list of k polynomials (A * x).\n", + " \"\"\"\n", + " k = len(A)\n", + " l = len(A[0])\n", + " assert len(x) == l\n", + " result = []\n", + " for i in range(k):\n", + " acc = np.zeros(N, dtype=np.int64)\n", + " for j in range(l):\n", + " acc = poly_add(acc, poly_mul_func(A[i][j], x[j]))\n", + " result.append(acc)\n", + " return result\n", + "\n", + "def vec_poly_mul_scalar(v, c_poly, poly_mul_func):\n", + " \"\"\"\n", + " v: list of polynomials\n", + " c_poly: polynomial\n", + " returns: [v[i] * c_poly] via poly_mul_func\n", + " \"\"\"\n", + " return [poly_mul_func(v[i], c_poly) for i in range(len(v))]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "425937d6", + "metadata": {}, + "outputs": [], + "source": [ + "# oho I_dil – toy Dilithium-style keygen / sign / verify with pluggable poly_mul\n", + "\n", + "# Parameters mimicking Dilithium-II\n", + "K = 4 # rows of A\n", + "L = 4 # cols of A\n", + "\n", + "def toy_dil_keygen(poly_mul_func):\n", + " \"\"\"\n", + " Keygen-like flow:\n", + " - Sample A (KxL matrix of polynomials)\n", + " - Sample s1 (L small polys), s2 (K small polys)\n", + " - t = A*s1 + s2\n", + " Returns (pk, sk) where:\n", + " pk = (A, t)\n", + " sk = (s1, s2)\n", + " \"\"\"\n", + " # Sample A\n", + " A = [[sample_poly_uniform() for _ in range(L)] for _ in range(K)]\n", + " # Secret vectors\n", + " s1 = [sample_poly_small() for _ in range(L)]\n", + " s2 = [sample_poly_small() for _ in range(K)]\n", + " # t = A*s1 + s2\n", + " As1 = mat_poly_mul_vec(A, s1, poly_mul_func)\n", + " t = vec_poly_add(As1, s2)\n", + " pk = (A, t)\n", + " sk = (s1, s2)\n", + " return pk, sk\n", + "\n", + "def toy_dil_sign(poly_mul_func, sk, pk, msg_bytes):\n", + " \"\"\"\n", + " Sign-like flow:\n", + " - Sample y (L small polys)\n", + " - w = A*y\n", + " - c = hash-like dummy poly derived from msg (here: simple deterministic map)\n", + " - z = y + c*s1\n", + " - r = w - c*s2\n", + " Returns a toy signature (z, c, r).\n", + " \"\"\"\n", + " A, t = pk\n", + " s1, s2 = sk\n", + "\n", + " # Sample y\n", + " y = [sample_poly_small() for _ in range(L)]\n", + " # w = A*y\n", + " w = mat_poly_mul_vec(A, y, poly_mul_func)\n", + "\n", + " # Dummy \"hash\" c: a polynomial derived deterministically from msg_bytes\n", + " # This is NOT secure; it's just to inject msg into the flow.\n", + " np.random.seed(int.from_bytes(msg_bytes[:4], \"little\", signed=False))\n", + " c_poly = sample_poly_small(eta=1)\n", + "\n", + " # z = y + c*s1\n", + " cs1 = vec_poly_mul_scalar(s1, c_poly, poly_mul_func)\n", + " z = vec_poly_add(y, cs1)\n", + "\n", + " # r = w - c*s2\n", + " cs2 = vec_poly_mul_scalar(s2, c_poly, poly_mul_func)\n", + " r = vec_poly_sub(w, cs2)\n", + "\n", + " sig = (z, c_poly, r)\n", + " return sig\n", + "\n", + "def toy_dil_verify(poly_mul_func, pk, msg_bytes, sig):\n", + " \"\"\"\n", + " Verify-like flow:\n", + " - Reconstruct w' = A*z - c*t\n", + " - Check that w' is \"small-ish\" in some toy sense\n", + " Returns True/False.\n", + " \"\"\"\n", + " A, t = pk\n", + " z, c_poly, r = sig\n", + "\n", + " # Recompute w' = A*z - c*t\n", + " Az = mat_poly_mul_vec(A, z, poly_mul_func)\n", + " ct = vec_poly_mul_scalar(t, c_poly, poly_mul_func)\n", + " w_prime = vec_poly_sub(Az, ct)\n", + "\n", + " # Compare w' and r (toy condition): they should be \"close\"\n", + " # Here just check exact equality; in real Dilithium you'd have norm bounds.\n", + " for wp, rv in zip(w_prime, r):\n", + " if not np.array_equal((wp % Q), (rv % Q)):\n", + " return False\n", + " return True\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "8ea91187", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Toy Dilithium-style flows (same harness, different poly_mul) ===\n", + "\n", + "[toy-naive] message: b'Test message for toy Dilithium'\n", + "[toy-naive] signature preview:\n", + " z[0]: [6, 12, 7, 17, ...]\n", + " c : [0, -1, 1, -1, ...]\n", + " r[0]: [-657352, 4065526, -3395326, 1460624, ...]\n", + "[toy-naive] keygen: 4.116052 s\n", + "[toy-naive] sign: 6.019188 s\n", + "[toy-naive] verify: 5.229953 s\n", + "[toy-naive] verify OK? True\n", + "\n", + "[toy-sw-ntt] message: b'Test message for toy Dilithium'\n", + "[toy-sw-ntt] signature preview:\n", + " z[0]: [6, 12, 7, 17, ...]\n", + " c : [0, -1, 1, -1, ...]\n", + " r[0]: [-657352, 4065526, -3395326, 1460624, ...]\n", + "[toy-sw-ntt] keygen: 1.292702 s\n", + "[toy-sw-ntt] sign: 1.921250 s\n", + "[toy-sw-ntt] verify: 1.606101 s\n", + "[toy-sw-ntt] verify OK? True\n", + "\n", + "[toy-hw-ntt] message: b'Test message for toy Dilithium'\n", + "[toy-hw-ntt] signature preview:\n", + " z[0]: [6, 12, 7, 17, ...]\n", + " c : [0, -1, 1, -1, ...]\n", + " r[0]: [-657352, 4065526, -3395326, 1460624, ...]\n", + "[toy-hw-ntt] keygen: 0.154008 s\n", + "[toy-hw-ntt] sign: 0.222962 s\n", + "[toy-hw-ntt] verify: 0.184250 s\n", + "[toy-hw-ntt] verify OK? True\n", + "\n", + "Speedups (toy flows, sign only):\n", + " naive / sw-ntt: 3.13 x\n", + " sw-ntt / hw-ntt: 8.62 x\n", + "\n", + "=== liboqs real Dilithium (C implementation) ===\n", + "\n", + "[liboqs] message: b'Test message for liboqs Dilithium'\n", + "[liboqs] signature preview: len=2420, first 16 bytes: 12954eb436346ddf827f047c121627f6\n", + "[liboqs] keygen: 0.001172 s\n", + "[liboqs] sign: 0.001966 s\n", + "[liboqs] verify: 0.000734 s\n", + "[liboqs] verify OK? True\n" + ] + }, + { + "data": { + "text/plain": [ + "(0.0011719209996954305, 0.0019655719997899723, 0.0007336500002566027)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# oho J_dil – end-to-end benchmark: toy flows vs liboqs, with message & signature previews\n", + "\n", + "import time\n", + "import binascii\n", + "\n", + "def preview_poly(poly, n=4):\n", + " \"\"\"Return a short string preview of the first n coefficients of a polynomial.\"\"\"\n", + " poly = np.asarray(poly, dtype=np.int64)\n", + " coeffs = \", \".join(str(int(x)) for x in poly[:n])\n", + " return f\"[{coeffs}{', ...' if poly.size > n else ''}]\"\n", + "\n", + "def preview_sig_toy(sig, n=4):\n", + " \"\"\"Preview toy signature (z, c_poly, r) in a compact way.\"\"\"\n", + " z, c_poly, r = sig\n", + " # z: list of L polys, r: list of K polys\n", + " z0 = z[0] if len(z) > 0 else np.zeros(N, dtype=np.int64)\n", + " r0 = r[0] if len(r) > 0 else np.zeros(N, dtype=np.int64)\n", + " return {\n", + " \"z[0]\": preview_poly(z0, n),\n", + " \"c\": preview_poly(c_poly, n),\n", + " \"r[0]\": preview_poly(r0, n),\n", + " }\n", + "\n", + "def preview_sig_liboqs(sig_bytes, n=16):\n", + " \"\"\"Preview liboqs signature as first n bytes in hex.\"\"\"\n", + " sig = bytes(sig_bytes)\n", + " prefix = sig[:n]\n", + " return f\"len={len(sig)}, first {n} bytes: {binascii.hexlify(prefix).decode()}\"\n", + "\n", + "def run_toy_flow(poly_mul_func, label):\n", + " msg = b\"Test message for toy Dilithium\"\n", + " print(f\"\\n[{label}] message:\", msg)\n", + "\n", + " t0 = time.perf_counter()\n", + " pk, sk = toy_dil_keygen(poly_mul_func)\n", + " t1 = time.perf_counter()\n", + "\n", + " sig = toy_dil_sign(poly_mul_func, sk, pk, msg)\n", + " t2 = time.perf_counter()\n", + "\n", + " ok = toy_dil_verify(poly_mul_func, pk, msg, sig)\n", + " t3 = time.perf_counter()\n", + "\n", + " # Signature preview\n", + " sig_preview = preview_sig_toy(sig, n=4)\n", + "\n", + " print(f\"[{label}] signature preview:\")\n", + " print(f\" z[0]: {sig_preview['z[0]']}\")\n", + " print(f\" c : {sig_preview['c']}\")\n", + " print(f\" r[0]: {sig_preview['r[0]']}\")\n", + "\n", + " print(f\"[{label}] keygen: {t1 - t0:.6f} s\")\n", + " print(f\"[{label}] sign: {t2 - t1:.6f} s\")\n", + " print(f\"[{label}] verify: {t3 - t2:.6f} s\")\n", + " print(f\"[{label}] verify OK? {ok}\")\n", + " return (t1 - t0), (t2 - t1), (t3 - t2)\n", + "\n", + "def run_liboqs_flow():\n", + " if not HAS_OQS or oqs_signer is None:\n", + " print(\"[liboqs] not available.\")\n", + " return None\n", + "\n", + " msg = b\"Test message for liboqs Dilithium\"\n", + " print(\"\\n[liboqs] message:\", msg)\n", + "\n", + " t0 = time.perf_counter()\n", + " with oqs.Signature(DILITHIUM_ALG) as signer:\n", + " pk = signer.generate_keypair()\n", + " sk = signer.export_secret_key()\n", + " t1 = time.perf_counter()\n", + "\n", + " sig = signer.sign(msg)\n", + " t2 = time.perf_counter()\n", + "\n", + " ok = signer.verify(msg, sig, pk)\n", + " t3 = time.perf_counter()\n", + "\n", + " sig_preview = preview_sig_liboqs(sig, n=16)\n", + " print(f\"[liboqs] signature preview: {sig_preview}\")\n", + " print(f\"[liboqs] keygen: {t1 - t0:.6f} s\")\n", + " print(f\"[liboqs] sign: {t2 - t1:.6f} s\")\n", + " print(f\"[liboqs] verify: {t3 - t2:.6f} s\")\n", + " print(f\"[liboqs] verify OK? {ok}\")\n", + " return (t1 - t0), (t2 - t1), (t3 - t2)\n", + "\n", + "print(\"=== Toy Dilithium-style flows (same harness, different poly_mul) ===\")\n", + "\n", + "tkg_n, ts_n, tv_n = run_toy_flow(poly_mul_naive, \"toy-naive\")\n", + "tkg_s, ts_s, tv_s = run_toy_flow(poly_mul_sw_ntt_backend, \"toy-sw-ntt\")\n", + "tkg_h, ts_h, tv_h = run_toy_flow(poly_mul_hw_backend, \"toy-hw-ntt\")\n", + "\n", + "print(\"\\nSpeedups (toy flows, sign only):\")\n", + "print(\" naive / sw-ntt: %.2f x\" % (ts_n / ts_s))\n", + "print(\" sw-ntt / hw-ntt: %.2f x\" % (ts_s / ts_h))\n", + "\n", + "print(\"\\n=== liboqs real Dilithium (C implementation) ===\")\n", + "run_liboqs_flow()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3071d384", + "metadata": {}, + "outputs": [], + "source": [ + "#######################\n", + "# Interoperability Test\n", + "#######################" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "bacd163d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[interop] generating pk, sk with naive backend\n", + "\n", + "[interop] sign=naive, verify=sw-ntt\n", + " sign backend: poly_mul_naive\n", + " verify backend: poly_mul_sw_ntt_backend\n", + " sign time: 6.004031 s\n", + " verify time: 1.602216 s\n", + " verify OK? True\n", + "\n", + "[interop] sign=sw-ntt, verify=naive\n", + " sign backend: poly_mul_sw_ntt_backend\n", + " verify backend: poly_mul_naive\n", + " sign time: 1.920178 s\n", + " verify time: 5.216772 s\n", + " verify OK? True\n", + "\n", + "[interop] sign=hw-ntt, verify=sw-ntt\n", + " sign backend: poly_mul_hw_backend\n", + " verify backend: poly_mul_sw_ntt_backend\n", + " sign time: 0.222985 s\n", + " verify time: 1.599958 s\n", + " verify OK? True\n", + "\n", + "[interop] sign=sw-ntt, verify=hw-ntt\n", + " sign backend: poly_mul_sw_ntt_backend\n", + " verify backend: poly_mul_hw_backend\n", + " sign time: 1.918987 s\n", + " verify time: 0.183846 s\n", + " verify OK? True\n", + "\n", + "[interop] sign=naive, verify=hw-ntt\n", + " sign backend: poly_mul_naive\n", + " verify backend: poly_mul_hw_backend\n", + " sign time: 5.994883 s\n", + " verify time: 0.183779 s\n", + " verify OK? True\n", + "\n", + "[interop] sign=hw-ntt, verify=naive\n", + " sign backend: poly_mul_hw_backend\n", + " verify backend: poly_mul_naive\n", + " sign time: 0.225005 s\n", + " verify time: 5.256011 s\n", + " verify OK? True\n", + "\n", + "[interop] summary:\n", + " naive ↔ sw-ntt: True\n", + " hw-ntt ↔ sw-ntt: True\n", + " hw-ntt ↔ naive: True\n" + ] + } + ], + "source": [ + "# oho K_dil – interoperability tests between poly_mul backends\n", + "\n", + "import time\n", + "\n", + "def run_interop_case(label, poly_mul_sign, poly_mul_verify, pk, sk, msg):\n", + " \"\"\"Sign with one backend, verify with another.\"\"\"\n", + " print(f\"\\n[interop] {label}\")\n", + " t0 = time.perf_counter()\n", + " sig = toy_dil_sign(poly_mul_sign, sk, pk, msg)\n", + " t1 = time.perf_counter()\n", + " ok = toy_dil_verify(poly_mul_verify, pk, msg, sig)\n", + " t2 = time.perf_counter()\n", + " print(f\" sign backend: {poly_mul_sign.__name__}\")\n", + " print(f\" verify backend: {poly_mul_verify.__name__}\")\n", + " print(f\" sign time: {t1 - t0:.6f} s\")\n", + " print(f\" verify time: {t2 - t1:.6f} s\")\n", + " print(f\" verify OK? {ok}\")\n", + " return ok\n", + "\n", + "# Fix a message and a single keypair (so only backends differ)\n", + "msg = b\"Interop test message for toy Dilithium\"\n", + "print(\"[interop] generating pk, sk with naive backend\")\n", + "pk, sk = toy_dil_keygen(poly_mul_naive)\n", + "\n", + "# 1) Sign by SW naive, verify by SW NTT and vice versa\n", + "ok_1a = run_interop_case(\n", + " \"sign=naive, verify=sw-ntt\",\n", + " poly_mul_naive,\n", + " poly_mul_sw_ntt_backend,\n", + " pk, sk, msg,\n", + ")\n", + "ok_1b = run_interop_case(\n", + " \"sign=sw-ntt, verify=naive\",\n", + " poly_mul_sw_ntt_backend,\n", + " poly_mul_naive,\n", + " pk, sk, msg,\n", + ")\n", + "\n", + "# 2) Sign by HW NTT, verify by SW NTT and vice versa\n", + "ok_2a = run_interop_case(\n", + " \"sign=hw-ntt, verify=sw-ntt\",\n", + " poly_mul_hw_backend,\n", + " poly_mul_sw_ntt_backend,\n", + " pk, sk, msg,\n", + ")\n", + "ok_2b = run_interop_case(\n", + " \"sign=sw-ntt, verify=hw-ntt\",\n", + " poly_mul_sw_ntt_backend,\n", + " poly_mul_hw_backend,\n", + " pk, sk, msg,\n", + ")\n", + "\n", + "# Also useful: cross all three backends for sanity\n", + "ok_3a = run_interop_case(\n", + " \"sign=naive, verify=hw-ntt\",\n", + " poly_mul_naive,\n", + " poly_mul_hw_backend,\n", + " pk, sk, msg,\n", + ")\n", + "ok_3b = run_interop_case(\n", + " \"sign=hw-ntt, verify=naive\",\n", + " poly_mul_hw_backend,\n", + " poly_mul_naive,\n", + " pk, sk, msg,\n", + ")\n", + "\n", + "print(\"\\n[interop] summary:\")\n", + "print(\" naive ↔ sw-ntt:\", ok_1a and ok_1b)\n", + "print(\" hw-ntt ↔ sw-ntt:\", ok_2a and ok_2b)\n", + "print(\" hw-ntt ↔ naive:\", ok_3a and ok_3b)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98b90350", + "metadata": {}, + "outputs": [], + "source": [ + "######################################\n", + "# Throughput Testing\n", + "######################################" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "509500db", + "metadata": {}, + "outputs": [], + "source": [ + "# oho L_dil – reusable-buffer variant of the HW poly multiply for throughput\n", + "\n", + "from pynq import allocate\n", + "\n", + "def make_hw_poly_mult_reusable():\n", + " \"\"\"\n", + " Returns a callable hw_poly_mult_reuse(a, b) that uses preallocated\n", + " DMA buffers under the hood, for high-throughput benchmarking.\n", + " \"\"\"\n", + " in_buf = allocate(shape=(N,), dtype=np.uint64)\n", + " out_buf = allocate(shape=(N,), dtype=np.uint32)\n", + "\n", + " def hw_poly_mult_reuse(a, b):\n", + " a = np.asarray(a, dtype=np.int64)\n", + " b = np.asarray(b, dtype=np.int64)\n", + " assert a.shape == (N,) and b.shape == (N,)\n", + "\n", + " # Pack two 32-bit coeffs into one 64-bit word\n", + " # Convention: low 32 bits = a[i], high 32 bits = b[i]\n", + " for i in range(N):\n", + " a_i = int(a[i] % Q) & 0xFFFFFFFF\n", + " b_i = int(b[i] % Q) & 0xFFFFFFFF\n", + " word = ((b_i << 32) | a_i)\n", + " in_buf[i] = np.uint64(word)\n", + "\n", + " # Start hardware core (ap_start = 1)\n", + " mmio.write(0x00, 0x01)\n", + "\n", + " # Kick DMA transfers\n", + " dma.sendchannel.transfer(in_buf)\n", + " dma.recvchannel.transfer(out_buf)\n", + " dma.sendchannel.wait()\n", + " dma.recvchannel.wait()\n", + "\n", + " # We don't need to return the result for pure throughput measurement,\n", + " # but it's useful to keep it functional.\n", + " c = np.empty(N, dtype=np.int64)\n", + " for i in range(N):\n", + " c[i] = center_mod_q(int(out_buf[i]))\n", + " return c\n", + "\n", + " return hw_poly_mult_reuse, in_buf, out_buf\n", + "\n", + "# Create a reusable HW function once\n", + "hw_poly_mult_reuse, hw_in_buf, hw_out_buf = make_hw_poly_mult_reusable()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "f9b43699", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Throughput benchmark with num_ops = 10 ===\n", + "[naive] total time: 2.871808 s, ops/s: 3.48\n", + "[sw-ntt] total time: 0.811113 s, ops/s: 12.33\n", + "[hw-ntt] total time: 0.076152 s, ops/s: 131.32\n", + "\n", + "Throughput ratios (ops/s):\n", + " sw-ntt / naive: 3.54 x\n", + " hw-ntt / sw-ntt: 10.65 x\n", + " hw-ntt / naive: 37.71 x\n", + "=== Throughput benchmark with num_ops = 100 ===\n", + "[naive] total time: 28.092749 s, ops/s: 3.56\n", + "[sw-ntt] total time: 8.087545 s, ops/s: 12.36\n", + "[hw-ntt] total time: 0.763630 s, ops/s: 130.95\n", + "\n", + "Throughput ratios (ops/s):\n", + " sw-ntt / naive: 3.47 x\n", + " hw-ntt / sw-ntt: 10.59 x\n", + " hw-ntt / naive: 36.79 x\n", + "=== Throughput benchmark with num_ops = 1000 ===\n", + "[naive] total time: 275.062409 s, ops/s: 3.64\n", + "[sw-ntt] total time: 77.406778 s, ops/s: 12.92\n", + "[hw-ntt] total time: 7.654999 s, ops/s: 130.63\n", + "\n", + "Throughput ratios (ops/s):\n", + " sw-ntt / naive: 3.55 x\n", + " hw-ntt / sw-ntt: 10.11 x\n", + " hw-ntt / naive: 35.93 x\n" + ] + } + ], + "source": [ + "# oho M_dil – throughput benchmark: many independent multiplies\n", + "\n", + "import time\n", + "from numpy.random import default_rng\n", + "\n", + "rng = default_rng(123456)\n", + "\n", + "def throughput_benchmark(num_ops=100):\n", + " print(f\"=== Throughput benchmark with num_ops = {num_ops} ===\")\n", + "\n", + " # Pre-generate inputs so all backends see the same workload\n", + " A_batch = rng.integers(low=-Q//2, high=Q//2, size=(num_ops, N), dtype=np.int64)\n", + " B_batch = rng.integers(low=-Q//2, high=Q//2, size=(num_ops, N), dtype=np.int64)\n", + "\n", + " # 1) Naive SW (O(N^2))\n", + " t0 = time.perf_counter()\n", + " for k in range(num_ops):\n", + " _ = poly_mul_sw_naive(A_batch[k], B_batch[k])\n", + " t1 = time.perf_counter()\n", + " t_naive_total = t1 - t0\n", + " naive_ops_per_s = num_ops / t_naive_total\n", + "\n", + " print(f\"[naive] total time: {t_naive_total:.6f} s, ops/s: {naive_ops_per_s:.2f}\")\n", + "\n", + " # 2) SW NTT (Python 512-NTT)\n", + " t0 = time.perf_counter()\n", + " for k in range(num_ops):\n", + " _ = poly_mul_sw_ntt(A_batch[k], B_batch[k])\n", + " t1 = time.perf_counter()\n", + " t_sw_ntt_total = t1 - t0\n", + " sw_ntt_ops_per_s = num_ops / t_sw_ntt_total\n", + "\n", + " print(f\"[sw-ntt] total time: {t_sw_ntt_total:.6f} s, ops/s: {sw_ntt_ops_per_s:.2f}\")\n", + "\n", + " # 3) HW NTT (FPGA via reusable DMA buffers)\n", + " t0 = time.perf_counter()\n", + " for k in range(num_ops):\n", + " _ = hw_poly_mult_reuse(A_batch[k], B_batch[k])\n", + " t1 = time.perf_counter()\n", + " t_hw_total = t1 - t0\n", + " hw_ops_per_s = num_ops / t_hw_total\n", + "\n", + " print(f\"[hw-ntt] total time: {t_hw_total:.6f} s, ops/s: {hw_ops_per_s:.2f}\")\n", + "\n", + " # Aggregate ratios\n", + " print(\"\\nThroughput ratios (ops/s):\")\n", + " print(\" sw-ntt / naive: %.2f x\" % (sw_ntt_ops_per_s / naive_ops_per_s))\n", + " print(\" hw-ntt / sw-ntt: %.2f x\" % (hw_ops_per_s / sw_ntt_ops_per_s))\n", + " print(\" hw-ntt / naive: %.2f x\" % (hw_ops_per_s / naive_ops_per_s))\n", + "\n", + "# Example: varying load\n", + "throughput_benchmark(num_ops=10)\n", + "throughput_benchmark(num_ops=100)\n", + "# next line: Do you want to wait so long here ?\n", + "#throughput_benchmark(num_ops=1000)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61736ef2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/PYNQ-zcu104_Files/Kyber.ipynb b/PYNQ-zcu104_Files/Kyber.ipynb new file mode 100644 index 0000000..258a8f8 --- /dev/null +++ b/PYNQ-zcu104_Files/Kyber.ipynb @@ -0,0 +1,2563 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "163cf7a1", + "metadata": {}, + "outputs": [], + "source": [ + "#oho A\n", + "\n", + "import numpy as np\n", + "import math\n", + "import time\n", + "from pynq import Overlay\n", + "from pynq import allocate\n", + "from pynq import MMIO\n", + "\n", + "\n", + "# Kyber-v2 parameters\n", + "q = 3329\n", + "n2 = 256\n", + "n = 128\n", + "inv_n = 3303\n", + "psin = 17\n", + "inv_psin = 1175\n", + "k = 2\n", + "eta1 = 2\n", + "eta2 = 3\n", + "du = 10\n", + "dv = 4" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3abcc796", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 19224\n", + "drwxr-xr-x 3 root root 4096 Dec 5 17:26 .\n", + "drwxrwxrwx 9 xilinx xilinx 4096 Dec 13 13:00 ..\n", + "-rw-r--r-- 1 root root 19311209 Dec 5 17:26 base.bit\n", + "-rw-r--r-- 1 root root 359733 Dec 5 17:26 base.hwh\n", + "drwxr-xr-x 2 root root 4096 Nov 2 16:11 .ipynb_checkpoints\n", + "Sat Dec 13 01:00:16 PM UTC 2025\n" + ] + } + ], + "source": [ + "#oho B\n", + "!ls -la /home/xilinx/jupyter_notebooks/kyber-ntt\n", + "!date\n", + "\n", + "# Loading the kyber-ntt bit file to configure the PL\n", + "ol = Overlay('/home/xilinx/jupyter_notebooks/kyber-ntt/base.bit')" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "18296772", + "metadata": {}, + "outputs": [], + "source": [ + "#oho C\n", + "# Function to perform bit-reversal\n", + "def bitReverse(num, logn):\n", + " rev_num = 0\n", + " for i in range(logn):\n", + " if (num >> i) & 1:\n", + " rev_num |= 1 << (logn - 1 - i)\n", + " return rev_num\n", + "\n", + "# Function to generate twiddle factors (for both forward and inverse NTT)\n", + "def gen_tf(psin, inv_psin, n, q):\n", + " positions = [bitReverse(x, int(np.log2(n))) for x in range(n)]\n", + " tmp1, tmp2 = [], []\n", + " psis, inv_psis = [], []\n", + " psi = 1\n", + " inv_psi = 1\n", + " for x in range(n):\n", + " tmp1.append(psi)\n", + " tmp2.append(inv_psi)\n", + " psi = psi * psin % q\n", + " inv_psi = inv_psi * inv_psin % q\n", + " for x in range(n):\n", + " val = tmp1[positions[x]]\n", + " inv_val = tmp2[positions[x]]\n", + " psis.append(val)\n", + " inv_psis.append(inv_val)\n", + " return psis, inv_psis\n", + "\n", + "# Function to generate scaling factors for point wise multiplication\n", + "def gen_pwmf(psin, n, q):\n", + " pwmf = []\n", + " for i in range(n):\n", + " val = (psin**(2*bitReverse(i, int(np.log2(n))) + 1))%q\n", + " pwmf.append(val)\n", + " return pwmf\n", + "\n", + "# Functions to generate Centered Binomial Distribution\n", + "def _cbd(n, eta):\n", + " i = 0\n", + " while i < eta:\n", + " p1 = np.random.randint(0, 2, n)\n", + " if i == 0:\n", + " p = p1\n", + " else:\n", + " p = p + p1\n", + " i = i + 1\n", + " return p\n", + "\n", + "def cbd(n, eta):\n", + " a = _cbd(n, eta)\n", + " b = _cbd(n, eta)\n", + " return a - b\n", + " \n", + "def cbd_vector(n, eta, k):\n", + " result = []\n", + "\n", + " for i in range(k):\n", + " result.append(cbd(n, eta))\n", + "\n", + " return np.squeeze(np.array(result, dtype=np.int16))\n", + "\n", + "# Compression function\n", + "def compress(x, q, d):\n", + " q1 = 2**d\n", + " x = np.round(q1 / q * x).astype(np.int16)\n", + " x = np.remainder(x, q1)\n", + " return x\n", + "\n", + "# De-compression function\n", + "def decompress(x, q, d):\n", + " q1 = 2**d\n", + " x = np.round(q / q1 * x).astype(np.int16)\n", + " x = np.remainder(x, q)\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "299a54df", + "metadata": {}, + "outputs": [], + "source": [ + "#oho D\n", + "# NTT / INTT - all in SW\n", + "\n", + "# 128 point Forward NTT using Cooley-Tukey (TC) algorithm\n", + "def ct_ntt(a, psis, q, n):\n", + " t = n\n", + " m = 1\n", + " while m < n:\n", + " t = t // 2\n", + " for i in range(m):\n", + " j1 = 2 * i * t\n", + " j2 = j1 + t - 1\n", + " S = psis[m + i]\n", + " for j in range(j1, j2 + 1):\n", + " U = a[j]\n", + " V = a[j + t] * S\n", + " a[j] = (U + V) % q\n", + " a[j + t] = (U - V) % q\n", + " m = 2 * m\n", + " return a\n", + " \n", + "# 128 point Inverse NTT using Gentleman-Sande (GS) algorithm\n", + "def gs_intt(a, inv_psis, q, n, inv_n):\n", + " t = 1\n", + " m = n\n", + " while m > 1:\n", + " j1 = 0\n", + " h = m // 2\n", + " for i in range(h):\n", + " j2 = j1 + t - 1\n", + " S = inv_psis[h + i]\n", + " for j in range(j1, j2 + 1):\n", + " U = a[j]\n", + " V = a[j + t]\n", + " a[j] = (U + V) % q\n", + " a[j + t] = (U - V) * S % q\n", + " j1 = j1 + 2 * t\n", + " t = 2 * t\n", + " m = m // 2\n", + " for i in range(n):\n", + " a[i] = a[i] * inv_n % q\n", + " return a\n", + "\n", + "# 256 point NTT using two 128 point NTTs\n", + "def ntt_256(x, psis, q, n):\n", + " xe, xo = [], []\n", + " for i in range(n2):\n", + " if i%2 == 0:\n", + " xe.append(x[i])\n", + " else:\n", + " xo.append(x[i])\n", + " ye = ct_ntt(xe, psis, q, n)\n", + " yo = ct_ntt(xo, psis, q, n)\n", + " return ye, yo\n", + "\n", + "# 256 point INTT using two 128 point INTTs\n", + "def intt_256(ye, yo, inv_psis, q, n, inv_n):\n", + " ze = gs_intt(ye, inv_psis, q, n, inv_n)\n", + " zo = gs_intt(yo, inv_psis, q, n, inv_n)\n", + " z = []\n", + " for i in range(n):\n", + " z.append(ze[i])\n", + " z.append(zo[i])\n", + " return z\n", + "\n", + "# Point-wise multiplication in NTT domain\n", + "def point_wise_mult(y1e, y1o, y2e, y2o, pwmf):\n", + " y3e, y3o = [], []\n", + " for i in range(n):\n", + " y3e.append(((y1e[i] * y2e[i]) % q + (((y1o[i] * y2o[i]) % q) * pwmf[i]) % q) % q)\n", + " y3o.append(((y1e[i] * y2o[i]) % q + (y1o[i] * y2e[i]) % q) % q)\n", + " return y3e, y3o" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "44f2dd33", + "metadata": {}, + "outputs": [], + "source": [ + "###################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "844faabe", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dict_keys(['axi_dma_0', 'poly_mult_0', 'poly_mult_dil_0', 'axi_dma_1', 'ps_e_0'])\n", + "poly_mult_dil_0 @ 0x80020000\n" + ] + } + ], + "source": [ + "#oho E\n", + "#oho inquiry for ZCU104 Board\n", + "\n", + "# Discover the real base address from the .hwh\n", + "print(ol.ip_dict.keys()) # just to see exact IP names\n", + "\n", + "poly_name = 'poly_mult_0' # adjust if Vivado named it differently\n", + "poly_info = ol.ip_dict[poly_name]\n", + "print(\"poly_mult_0 @\", hex(poly_info['phys_addr']))\n", + "\n", + "mmio = MMIO(poly_info['phys_addr'], poly_info['addr_range'])\n", + "\n", + "dma = ol.axi_dma_0\n", + "dma_send = dma.sendchannel\n", + "dma_recv = dma.recvchannel" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "af023c05", + "metadata": {}, + "outputs": [], + "source": [ + "####################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "5a6323d2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "n2 = 256\n", + "Number of mismatches: 0\n", + "Hardware poly_mult matches test_case.h vectors.\n" + ] + } + ], + "source": [ + "#oho F\n", + "# Golden Test for Golden Record analog Vitis HLS testbench\n", + "# pm_test.cpp Equivalent (== HW Simulator with golden testvestors in Vitis HLS)\n", + "\n", + "import numpy as np\n", + "from pynq import allocate\n", + "\n", + "# -------------------------------------------------------------------\n", + "# 1. Test vectors from test_case.h\n", + "# Paste the FULL arrays from the header in place of the \"...\" parts\n", + "# -------------------------------------------------------------------\n", + "input1_vals = np.array(\n", + " [\n", + " 1477, 218, 784, 251, 747, 1051, 1924, 133, 2953, 1295, 2989, 1519, 1701, 1874, 2806, 423, 2883, 327, 47, 2525, 1508, 214, 2998, 217, 1852, 2624, 2286, 3039, 3076, 1213, 1808, 2554, 1129, 1353, 2690, 2839, 1778, 2752, 1378, 601, 914, 2335, 2497, 1139, 2611, 129, 1318, 1570, 3190, 1868, 940, 2901, 2626, 2473, 3195, 2621, 2436, 3046, 1018, 1139, 1729, 3021, 2064, 945, 690, 1700, 1836, 1943, 2333, 2131, 1618, 1741, 2639, 2653, 301, 2013, 2744, 2406, 2995, 2463, 2366, 1495, 442, 224, 1349, 11, 2342, 1712, 2847, 1578, 2654, 2734, 3131, 1245, 1862, 527, 2400, 2043, 1360, 451, 573, 898, 2018, 3100, 161, 284, 1949, 362, 755, 2916, 1288, 1616, 876, 1682, 853, 2772, 2956, 1101, 2, 214, 2589, 211, 1025, 610, 1225, 2118, 224, 1296, 2612, 2634, 2056, 3227, 1712, 1258, 552, 1345, 786, 2124, 2915, 1226, 1233, 2654, 2786, 2636, 2234, 727, 2444, 199, 600, 2262, 3221, 915, 63, 318, 74, 2396, 1690, 2390, 1711, 414, 10, 2298, 1082, 1419, 3151, 1723, 2744, 3274, 2518, 2954, 1208, 2941, 2089, 3288, 1370, 783, 2517, 3190, 3069, 2505, 2840, 1427, 1670, 3091, 655, 96, 1935, 880, 2511, 876, 2371, 341, 196, 2849, 919, 161, 603, 2993, 2903, 1721, 139, 3326, 1876, 379, 2508, 2094, 1929, 430, 1033, 2604, 1955, 1333, 2274, 3312, 2604, 1585, 2317, 3230, 3068, 2905, 3268, 2844, 1023, 2824, 1731, 643, 820, 462, 2975, 314, 2218, 2011, 649, 383, 874, 2181, 866, 1192, 2914, 2290, 1820, 1572, 1030, 3076, 1526, 2760, 12, 529, 1242, 560, 2723, 2894, 1097, 778, 1495, 371\n", + " ],\n", + " dtype=np.int16,\n", + ")\n", + "\n", + "input2_vals = np.array(\n", + " [\n", + " 2960, 3124, 509, 485, 2525, 385, 608, 2893, 2423, 1802, 2556, 1090, 775, 2059, 898, 864, 2459, 1116, 551, 188, 3262, 2728, 3134, 2451, 427, 858, 1927, 830, 2688, 2388, 2818, 1418, 3298, 24, 2491, 1448, 1153, 178, 2489, 2126, 1772, 669, 1238, 633, 1919, 2222, 2673, 1918, 2202, 3312, 208, 976, 2267, 107, 2905, 1137, 2921, 2471, 2796, 1313, 485, 1982, 1557, 1203, 2930, 241, 3089, 890, 2193, 179, 952, 2057, 2444, 1378, 1466, 1362, 1808, 2343, 1532, 2651, 727, 3254, 1328, 1604, 967, 2418, 1266, 1826, 684, 2869, 3149, 1874, 1691, 1507, 339, 2473, 102, 3153, 969, 1551, 548, 3059, 2841, 1369, 148, 2510, 2025, 1369, 1579, 2474, 1093, 527, 1416, 981, 2320, 2305, 227, 2173, 812, 1703, 2952, 17, 1129, 2223, 1894, 959, 73, 339, 553, 1466, 1065, 617, 1749, 1896, 1838, 1771, 3092, 297, 996, 198, 521, 567, 3256, 2783, 1044, 2644, 744, 2986, 3178, 1522, 942, 2045, 236, 1866, 853, 2303, 2383, 3095, 418, 2752, 2105, 2896, 3081, 3067, 1696, 978, 102, 1961, 3120, 2741, 1029, 885, 2852, 2659, 2815, 3032, 2358, 3252, 1195, 3304, 878, 70, 3069, 2726, 2455, 182, 108, 2868, 1744, 1697, 1060, 1803, 1752, 829, 2434, 862, 2287, 2860, 352, 634, 2626, 1920, 2425, 239, 831, 2527, 1190, 1469, 2602, 1711, 2185, 1403, 3189, 1188, 2649, 2079, 2215, 790, 409, 2413, 627, 2268, 2507, 2102, 1727, 1146, 2711, 355, 1143, 1225, 430, 82, 3015, 2699, 642, 863, 241, 450, 440, 338, 365, 2621, 3022, 204, 149, 2986, 2191, 1793, 3085, 2128, 373, 290, 835, 580, 2530, 1948\n", + " ],\n", + " dtype=np.int16,\n", + ")\n", + "\n", + "output_vals = np.array(\n", + " [\n", + " 2762, 3061, 1101, 3267, 2744, 1349, 182, 1761, 3089, 751, 137, 368, 1461, 2956, 493, 1653, 2617, 721, 356, 3034, 2234, 1556, 809, 2290, 1597, 457, 811, 259, 685, 2478, 319, 2519, 1049, 837, 644, 2571, 1029, 2997, 762, 1710, 2110, 1099, 2513, 1038, 2176, 1938, 3214, 261, 1604, 2474, 5, 1211, 2816, 2848, 2286, 3146, 1777, 1630, 2412, 1457, 889, 671, 822, 2369, 1409, 2059, 1121, 1871, 303, 1178, 2241, 1827, 2046, 628, 2869, 749, 1666, 895, 580, 1770, 2082, 3123, 1192, 520, 168, 2461, 1032, 163, 1421, 2792, 2148, 1735, 220, 1896, 2887, 2163, 357, 2301, 1830, 163, 1812, 805, 1850, 2017, 2313, 1205, 2226, 703, 866, 1708, 1426, 1920, 2911, 267, 3134, 629, 2120, 2022, 2847, 2945, 2967, 1977, 1449, 2028, 1381, 2738, 1098, 2977, 2217, 2060, 710, 845, 2807, 509, 2512, 2444, 2355, 550, 2965, 2517, 1802, 1755, 1065, 1938, 388, 2365, 776, 2453, 1799, 1532, 384, 2266, 1071, 2063, 2858, 1414, 663, 2886, 2734, 209, 1061, 2142, 841, 1081, 977, 799, 2661, 588, 3222, 2140, 2383, 3044, 394, 231, 1090, 917, 1840, 3002, 2315, 1182, 2744, 2815, 2612, 2586, 970, 3301, 3028, 2890, 1849, 269, 2936, 1525, 3102, 3144, 1605, 2746, 1556, 537, 2918, 2549, 976, 250, 2137, 492, 729, 392, 1115, 2422, 2100, 2317, 1636, 1743, 1279, 1393, 2079, 2874, 2148, 233, 1469, 3143, 2109, 1211, 2318, 1138, 2979, 1383, 125, 1995, 1614, 1435, 2216, 782, 671, 662, 988, 2826, 2162, 605, 2955, 2478, 2375, 1449, 2307, 1921, 1285, 2208, 2422, 1035, 2765, 923, 2138, 3053, 812, 146, 1175, 61\n", + " ],\n", + " dtype=np.int16,\n", + ")\n", + "\n", + "# HLS core expects Nt = 256 words, and each word packs two 16-bit coeffs\n", + "n2 = len(input1_vals)\n", + "assert n2 == len(input2_vals) == len(output_vals), \"Vector length mismatch\"\n", + "print(\"n2 =\", n2)\n", + "assert n2 == 256, \"HLS core expects Nt = 256\"\n", + "\n", + "\n", + "# -------------------------------------------------------------------\n", + "# 2. Hardware poly_mult wrapper matching pm_test.cpp packing\n", + "# -------------------------------------------------------------------\n", + "def poly_mul_hw_golden(x1: np.ndarray, x2: np.ndarray) -> np.ndarray:\n", + " \"\"\"\n", + " Hardware polynomial multiplication:\n", + " - x1, x2: length-n2 arrays of 16-bit coefficients (numpy.int16)\n", + " - Returns: length-n2 array of 16-bit coeffs (numpy.int16)\n", + "\n", + " Packing is exactly like pm_test.cpp:\n", + " data[15:0] = input1_vals[i]\n", + " data[31:16] = input2_vals[i]\n", + " \"\"\"\n", + "\n", + " assert x1.shape == (n2,)\n", + " assert x2.shape == (n2,)\n", + "\n", + " # Allocate DMA buffers\n", + " input_buffer = allocate(shape=(n2,), dtype=np.int32)\n", + " output_buffer = allocate(shape=(n2,), dtype=np.int16)\n", + "\n", + " # Pack two 16-bit coeffs into one 32-bit word: [x2 | x1]\n", + " x1_32 = x1.astype(np.int32)\n", + " x2_32 = x2.astype(np.int32)\n", + " packed = (x1_32 & 0xFFFF) | ((x2_32 & 0xFFFF) << 16)\n", + " input_buffer[:] = packed\n", + "\n", + " # Start IP core (ap_start = 1 at control register 0x00)\n", + " mmio.write(0x00, 0x1)\n", + "\n", + " # Launch DMA transfers\n", + " dma_send.transfer(input_buffer)\n", + " dma_recv.transfer(output_buffer)\n", + " dma_send.wait()\n", + " dma_recv.wait()\n", + "\n", + " # Copy result out\n", + " y = np.array(output_buffer, dtype=np.int16)\n", + "\n", + " # Free buffers (depending on your Pynq version, freebuffer() may exist)\n", + " try:\n", + " input_buffer.freebuffer()\n", + " output_buffer.freebuffer()\n", + " except AttributeError:\n", + " del input_buffer, output_buffer\n", + "\n", + " return y\n", + "\n", + "\n", + "# -------------------------------------------------------------------\n", + "# 3. Run hardware test against golden output_vals from test_case.h\n", + "# -------------------------------------------------------------------\n", + "hw_out = poly_mul_hw_golden(input1_vals, input2_vals)\n", + "\n", + "diff = np.where(hw_out != output_vals)[0]\n", + "print(\"Number of mismatches:\", diff.size)\n", + "\n", + "if diff.size > 0:\n", + " print(\"First mismatches (idx, hw, expected):\")\n", + " for idx in diff[:20]:\n", + " print(f\"{idx:3d}: hw={int(hw_out[idx])}, expected={int(output_vals[idx])}\")\n", + "else:\n", + " print(\"Hardware poly_mult matches test_case.h vectors.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "c4dc2c5a", + "metadata": {}, + "outputs": [], + "source": [ + "############################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "5c5f4f54", + "metadata": {}, + "outputs": [], + "source": [ + "############################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "3a2c0a39", + "metadata": {}, + "outputs": [], + "source": [ + "############################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "bc452d9d", + "metadata": {}, + "outputs": [], + "source": [ + "# oho G-1\n", + "# Naive polynomial multiplication under mod (x^n2 + 1) in Software\n", + "# [i.e. negative wrapped convolution, quadratic cost O(n^2)]\n", + "def poly_mul_sw_naive(x1, x2):\n", + " \"\"\"\n", + " Schoolbook polynomial multiplication in R_q = Z_q[X]/(X^n2 + 1).\n", + "\n", + " Convention:\n", + " if i + j < n2: res[i+j] += x1[i] * x2[j]\n", + " else: res[i+j-n2] -= x1[i] * x2[j]\n", + "\n", + " This matches the (X^n + 1) / \"negacyclic\" convention.\n", + " \"\"\"\n", + "\n", + " # ensure proper types and shapes\n", + " a = np.asarray(x1, dtype=np.int64)\n", + " b = np.asarray(x2, dtype=np.int64)\n", + "\n", + " assert a.shape == (n2,), f\"a.shape={a.shape}, expected ({n2},)\"\n", + " assert b.shape == (n2,), f\"b.shape={b.shape}, expected ({n2},)\"\n", + "\n", + " res = np.zeros(n2, dtype=np.int64)\n", + "\n", + " for i in range(n2):\n", + " ai = int(a[i])\n", + " for j in range(n2):\n", + " t = ai * int(b[j])\n", + " k = i + j\n", + " if k < n2:\n", + " res[k] += t\n", + " else:\n", + " res[k - n2] -= t\n", + "\n", + " res %= q\n", + " return res.astype(np.int16)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "e3ba800f", + "metadata": {}, + "outputs": [], + "source": [ + "# oho G-2\n", + "# encrypt-decrypt declarations in SW using *naive* polynomial multiplication\n", + "\n", + "# Kyber PKE functions entirely in SW (schoolbook poly mult)\n", + "\n", + "# Key generation function (to be performed by server)\n", + "def key_gen_naive():\n", + " a = np.random.randint(q, size=(k, k, n2))\n", + " s = cbd_vector(n2, eta1, k)\n", + " e = cbd_vector(n2, eta1, k)\n", + "\n", + " b0 = (poly_mul_sw_naive(a[0,0], s[0]) + e[0]) % q\n", + " b1 = (poly_mul_sw_naive(a[0,1], s[1]) + e[1]) % q\n", + " b2 = (poly_mul_sw_naive(a[1,0], s[0]) + e[0]) % q\n", + " b3 = (poly_mul_sw_naive(a[1,1], s[1]) + e[1]) % q\n", + "\n", + " b01 = (b0 + b1) % q\n", + " b23 = (b2 + b3) % q\n", + " b = np.array([b01, b23])\n", + " return s, a, b\n", + "\n", + "# Encryption function (to be performed by client)\n", + "def encrypt_naive(a, b, m):\n", + " r = cbd_vector(n2, eta1, k)\n", + " e1 = cbd_vector(n2, eta2, k)\n", + " e2 = cbd(n2, eta2)\n", + "\n", + " u0 = (poly_mul_sw_naive(a[0,0], r[0]) + e1[0]) % q\n", + " u1 = (poly_mul_sw_naive(a[1,0], r[1]) + e1[1]) % q\n", + " u2 = (poly_mul_sw_naive(a[0,1], r[0]) + e1[0]) % q\n", + " u3 = (poly_mul_sw_naive(a[1,1], r[1]) + e1[1]) % q\n", + "\n", + " u01 = (u0 + u1) % q\n", + " u23 = (u2 + u3) % q\n", + " u = np.array([u01, u23])\n", + "\n", + " v0 = np.array(poly_mul_sw_naive(b[0], r[0]))\n", + " v1 = np.array(poly_mul_sw_naive(b[1], r[1]))\n", + " v = (v0 + v1 + e2 + m) % q\n", + "\n", + " u = compress(u, q, du)\n", + " v = compress(v, q, dv)\n", + " return u, v\n", + "\n", + "# Decryption function (to be performed by server)\n", + "def decrypt_naive(s, u, v):\n", + " u_dec = decompress(u, q, du)\n", + " v_dec = decompress(v, q, dv)\n", + "\n", + " p0 = np.array(poly_mul_sw_naive(s[0], u_dec[0]))\n", + " p1 = np.array(poly_mul_sw_naive(s[1], u_dec[1]))\n", + " p = (p0 + p1) % q\n", + " d = (v_dec - p) % q\n", + " return d\n" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "bd77ef1d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Actual message :\n", + " [0 0 1 1 0 1 1 0 1 0 1 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0 1 1 1 1 0 0 0 0 1 0 1\n", + " 0 1 1 0 0 0 0 0 1 0 1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 1 0 1 1 1 0 1 0 1 1 1 1\n", + " 0 0 0 0 0 1 0 0 0 1 1 0 1 0 1 1 1 1 1 0 0 1 1 1 1 1 0 0 0 1 0 1 1 1 1 1 1\n", + " 0 0 1 1 0 1 1 0 0 0 1 1 0 0 0 1 0 0 0 0 1 1 1 1 0 0 1 0 1 1 0 0 0 0 1 1 0\n", + " 0 1 0 0 1 0 1 1 0 0 0 1 1 1 1 0 0 0 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 0\n", + " 1 1 1 1 0 1 0 1 0 0 1 1 0 1 1 1 0 1 0 1 1 1 1 1 0 0 0 1 0 0 1 0 0 0 0 1 0\n", + " 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 1 1 1 0 0 0 0 1 0 1 0 0 0 1]\n", + "Decrypted message :\n", + " [0 0 1 1 0 1 1 0 1 0 1 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0 1 1 1 1 0 0 0 0 1 0 1\n", + " 0 1 1 0 0 0 0 0 1 0 1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 1 0 1 1 1 0 1 0 1 1 1 1\n", + " 0 0 0 0 0 1 0 0 0 1 1 0 1 0 1 1 1 1 1 0 0 1 1 1 1 1 0 0 0 1 0 1 1 1 1 1 1\n", + " 0 0 1 1 0 1 1 0 0 0 1 1 0 0 0 1 0 0 0 0 1 1 1 1 0 0 1 0 1 1 0 0 0 0 1 1 0\n", + " 0 1 0 0 1 0 1 1 0 0 0 1 1 1 1 0 0 0 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 0\n", + " 1 1 1 1 0 1 0 1 0 0 1 1 0 1 1 1 0 1 0 1 1 1 1 1 0 0 0 1 0 0 1 0 0 0 0 1 0\n", + " 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 1 1 1 0 0 0 0 1 0 1 0 0 0 1]\n", + "Actual message and decrypted message are the same!\n", + "\n", + "Time taken by SW(naive) only = 4.159699201583862 seconds\n" + ] + } + ], + "source": [ + "# oho G-3\n", + "# Full SW Run Encrypt-Decrypt using *naive* polynomial multiplication\n", + "\n", + "# (No NTT precomputation needed, but harmless if psis/pwmf already exist)\n", + "\n", + "start_sw_naive = time.time()\n", + "\n", + "# Randomly generated binary message, m\n", + "m = np.random.randint(2, size=(n2,))\n", + "ms = decompress(m, q, 1)\n", + "\n", + "# Generating private key (s) and public keys (a,b) with naive poly-mult\n", + "s, a, b = key_gen_naive()\n", + "\n", + "# Encrypting the message using public keys to provide cipher texts (u,v)\n", + "u, v = encrypt_naive(a, b, ms)\n", + "\n", + "# Decrypt the cipher using private key to obtain back the message (d)\n", + "d = decrypt_naive(s, u, v)\n", + "\n", + "# Decoding the decrypted message\n", + "md = []\n", + "for x in d:\n", + " if x > math.floor(q/4) and x < math.floor(3*q/4):\n", + " md.append(1)\n", + " else:\n", + " md.append(0)\n", + "md = np.array(md)\n", + "\n", + "end_sw_naive = time.time()\n", + "\n", + "# Comparison and printing results\n", + "print(\"Actual message :\\n\", m)\n", + "print(\"Decrypted message :\\n\", md)\n", + "\n", + "if (list(m) == list(md)):\n", + " print(\"Actual message and decrypted message are the same!\")\n", + "else:\n", + " print(\"There is mismatch ....\")\n", + "\n", + "print()\n", + "print(\"Time taken by SW(naive) only =\", end_sw_naive - start_sw_naive, \"seconds\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "7497a808", + "metadata": {}, + "outputs": [], + "source": [ + "######################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "b632edb3", + "metadata": {}, + "outputs": [], + "source": [ + "#oho H-1\n", + "# Polynomial multiplication under mod (x^n + 1) in Software\n", + "# NTT/INTT version (not schoolbook)\n", + "# [i.e negative wrapped convolution]\n", + "def poly_mul_sw(x1, x2):\n", + "\n", + " y1e, y1o = ntt_256(x1, psis, q, n)\n", + " y2e, y2o = ntt_256(x2, psis, q, n)\n", + "\n", + " y3e, y3o = point_wise_mult(y1e, y1o, y2e, y2o, pwmf)\n", + "\n", + " z = intt_256(y3e, y3o, inv_psis, q, n, inv_n)\n", + "\n", + " return z" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "25429654", + "metadata": {}, + "outputs": [], + "source": [ + "#oho H-2\n", + "# encrypt-decrypt declarations in SW (NTT Version)\n", + "\n", + "# Kyber PKE functions entirely in SW\n", + "# Key generation function (to be performed by server)\n", + "def key_gen():\n", + " a = np.random.randint(q, size=(k,k,n2))\n", + " s = cbd_vector(n2, eta1, k)\n", + " e = cbd_vector(n2, eta1, k)\n", + " b0 = (poly_mul_sw(a[0,0], s[0]) + e[0]) % q\n", + " b1 = (poly_mul_sw(a[0,1], s[1]) + e[1]) % q\n", + " b2 = (poly_mul_sw(a[1,0], s[0]) + e[0]) % q\n", + " b3 = (poly_mul_sw(a[1,1], s[1]) + e[1]) % q\n", + " b01 = (b0 + b1) % q\n", + " b23 = (b2 + b3) % q\n", + " b = np.array([b01, b23])\n", + " return s, a, b\n", + "\n", + "# Encryption function (to be performed by client)\n", + "def encrypt(a, b, m):\n", + " r = cbd_vector(n2, eta1, k)\n", + " e1 = cbd_vector(n2, eta2, k)\n", + " e2 = cbd(n2, eta2)\n", + " u0 = (poly_mul_sw(a[0,0], r[0]) + e1[0]) % q\n", + " u1 = (poly_mul_sw(a[1,0], r[1]) + e1[1]) % q\n", + " u2 = (poly_mul_sw(a[0,1], r[0]) + e1[0]) % q\n", + " u3 = (poly_mul_sw(a[1,1], r[1]) + e1[1]) % q\n", + " u01 = (u0 + u1) % q\n", + " u23 = (u2 + u3) % q\n", + " u = np.array([u01, u23])\n", + " v0 = np.array(poly_mul_sw(b[0], r[0]))\n", + " v1 = np.array(poly_mul_sw(b[1], r[1]))\n", + " v = (v0 + v1 + e2 + m) % q\n", + " u = compress(u, q, du)\n", + " v = compress(v, q, dv)\n", + " return u, v\n", + "\n", + "# Decryption function (to be performed by server)\n", + "def decrypt(s, u, v):\n", + " u = decompress(u, q, du)\n", + " v = decompress(v, q, dv)\n", + " p0 = np.array(poly_mul_sw(s[0], u[0]))\n", + " p1 = np.array(poly_mul_sw(s[1], u[1]))\n", + " p = (p0 + p1) % q\n", + " d = (v - p) % q\n", + " return d" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "70c0b30e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Actual message :\n", + " [0 1 1 0 1 1 0 1 0 0 0 0 0 1 1 0 1 1 1 1 0 0 1 1 1 0 0 0 1 1 1 1 0 0 0 1 1\n", + " 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0 1 0 1 1 1\n", + " 0 0 0 0 1 0 0 1 0 0 1 0 0 1 1 1 1 0 0 1 0 0 0 1 1 1 0 0 0 1 1 1 1 1 1 0 1\n", + " 1 1 0 0 1 0 0 1 0 0 0 0 1 0 0 0 1 0 0 1 1 1 0 1 0 1 0 0 1 0 1 1 1 0 1 1 0\n", + " 1 0 1 1 1 0 0 1 0 0 1 1 0 1 0 1 1 1 1 1 0 1 1 0 1 1 0 0 0 0 0 1 0 0 0 0 1\n", + " 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 1 0 0 1 0 0 1 0 0 1 0 1 1 1 1 1 0 1 1 1 0 0\n", + " 0 1 0 0 0 0 0 1 0 1 0 0 0 1 1 0 1 0 0 0 0 1 0 1 1 0 1 0 0 0 0 1 0 0]\n", + "Decrypted message :\n", + " [0 1 1 0 1 1 0 1 0 0 0 0 0 1 1 0 1 1 1 1 0 0 1 1 1 0 0 0 1 1 1 1 0 0 0 1 1\n", + " 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0 1 0 1 1 1\n", + " 0 0 0 0 1 0 0 1 0 0 1 0 0 1 1 1 1 0 0 1 0 0 0 1 1 1 0 0 0 1 1 1 1 1 1 0 1\n", + " 1 1 0 0 1 0 0 1 0 0 0 0 1 0 0 0 1 0 0 1 1 1 0 1 0 1 0 0 1 0 1 1 1 0 1 1 0\n", + " 1 0 1 1 1 0 0 1 0 0 1 1 0 1 0 1 1 1 1 1 0 1 1 0 1 1 0 0 0 0 0 1 0 0 0 0 1\n", + " 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 1 0 0 1 0 0 1 0 0 1 0 1 1 1 1 1 0 1 1 1 0 0\n", + " 0 1 0 0 0 0 0 1 0 1 0 0 0 1 1 0 1 0 0 0 0 1 0 1 1 0 1 0 0 0 0 1 0 0]\n", + "Actual message and decrypted message are the same!\n", + "\n", + "Time taken by SW(NTT) only = 0.37024855613708496 seconds\n" + ] + } + ], + "source": [ + "#oho H-3\n", + "# Full SW Run Encrypt-Decrypt (NTT Version)\n", + "\n", + "# Get pre-computed factors\n", + "psis, inv_psis = gen_tf(psin, inv_psin, n, q)\n", + "pwmf = gen_pwmf(psin, n, q)\n", + "\n", + "start_sw = time.time()\n", + "\n", + "# Randomly generated binary message, m\n", + "m = np.random.randint(2, size=(n2,))\n", + "ms = decompress(m, q, 1)\n", + "\n", + "# Generating private key (s) and publik keys (a,b)\n", + "s, a, b = key_gen()\n", + "\n", + "# Encrypting the message using public keys to provide cipher texts (u,v)\n", + "u, v = encrypt(a, b, ms)\n", + "\n", + "# Decrypt the cipher using private key to obtain back the message (d)\n", + "d = decrypt(s, u, v)\n", + "\n", + "# Decoding the decrypted message\n", + "md = []\n", + "for i in d:\n", + " if i > math.floor(q/4) and i < math.floor(3*q/4):\n", + " md.append(1)\n", + " else:\n", + " md.append(0)\n", + "md = np.array(md)\n", + "\n", + "end_sw = time.time()\n", + "\n", + "# Comparision and printing results\n", + "print(\"Actual message :\\n\", m)\n", + "print(\"Decrypted message :\\n\", md)\n", + "\n", + "if (list(m) == list(md)):\n", + " print(\"Actual message and decrypted message are the same!\")\n", + "else:\n", + " print(\"There is mismatch ....\")\n", + "\n", + "print()\n", + "print(\"Time taken by SW(NTT) only =\", end_sw - start_sw, \"seconds\")" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "75735ba1", + "metadata": {}, + "outputs": [], + "source": [ + "##############################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "fcce2793", + "metadata": {}, + "outputs": [], + "source": [ + "# oho I-1\n", + "\n", + "# HW declaration poly-mult !\n", + "\n", + "import numpy as np\n", + "from pynq import allocate\n", + "\n", + "# n2 should be 256 for this HLS core\n", + "n2 = 256\n", + "\n", + "def poly_mul_hw(x1: np.ndarray, x2: np.ndarray) -> np.ndarray:\n", + " \"\"\"\n", + " Hardware polynomial multiplication:\n", + " - x1, x2: length-n2 arrays of 16-bit coefficients (numpy.int16 / any int)\n", + " - Returns: length-n2 array of 16-bit coeffs (numpy.int16)\n", + "\n", + " Packing matches the Vitis C testbench (pm_test.cpp):\n", + " TDATA[15:0] = x1[i]\n", + " TDATA[31:16] = x2[i]\n", + " \"\"\"\n", + "\n", + " x1 = np.asarray(x1)\n", + " x2 = np.asarray(x2)\n", + "\n", + " assert x1.shape == (n2,), f\"x1 shape {x1.shape} != ({n2},)\"\n", + " assert x2.shape == (n2,), f\"x2 shape {x2.shape} != ({n2},)\"\n", + "\n", + " # Allocate DMA buffers\n", + " input_buffer = allocate(shape=(n2,), dtype=np.int32)\n", + " output_buffer = allocate(shape=(n2,), dtype=np.int16)\n", + "\n", + " # Pack two 16-bit coeffs into one 32-bit word: [x2 | x1]\n", + " x1_32 = x1.astype(np.int32)\n", + " x2_32 = x2.astype(np.int32)\n", + " packed = (x1_32 & 0xFFFF) | ((x2_32 & 0xFFFF) << 16)\n", + " input_buffer[:] = packed\n", + "\n", + " # Start IP core (ap_start = 1 at control register 0x00)\n", + " mmio.write(0x00, 0x1)\n", + "\n", + " # Launch DMA transfers\n", + " dma_send.transfer(input_buffer)\n", + " dma_recv.transfer(output_buffer)\n", + " dma_send.wait()\n", + " dma_recv.wait()\n", + "\n", + " # Copy result out\n", + " y = np.array(output_buffer, dtype=np.int16)\n", + "\n", + " # Free buffers\n", + " try:\n", + " input_buffer.freebuffer()\n", + " output_buffer.freebuffer()\n", + " except AttributeError:\n", + " del input_buffer, output_buffer\n", + "\n", + " return y\n" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "309918cd", + "metadata": {}, + "outputs": [], + "source": [ + "# oho I-2\n", + "# encrypt-Decrypt Declarations in HW\n", + "\n", + "# ------------------------------------------------------------------\n", + "# Fixed noise profile that is known to work with your HW poly_mul:\n", + "# - compression: ON (original compress/decompress with du,dv)\n", + "# - noise scaled down deterministically by 1/4 in keygen & encrypt\n", + "# ------------------------------------------------------------------\n", + "\n", + "NOISE_DIV_KEY = 4 # *** added: scale s,e in keygen by 1/4 ***\n", + "NOISE_DIV_ENC = 4 # *** added: scale r,e1,e2 in encrypt by 1/4 ***\n", + "\n", + "def scale_noise(x, div):\n", + " # *** added: helper to reduce noise magnitude while preserving sign ***\n", + " x = np.asarray(x, dtype=np.int32)\n", + " if div == 1:\n", + " return x.astype(np.int16)\n", + " return (np.sign(x) * (np.abs(x) // div)).astype(np.int16)\n", + "\n", + "# Kyber PKE function-declarations with PolyMult in Hardware (and rest in SW)\n", + "\n", + "# Key generation function (to be performed by server)\n", + "def key_gen2():\n", + " # *** changed: explicit int16 dtype for a ***\n", + " a = np.random.randint(q, size=(k, k, n2), dtype=np.int16)\n", + "\n", + " # original CBD noise\n", + " s_raw = cbd_vector(n2, eta1, k)\n", + " e_raw = cbd_vector(n2, eta1, k)\n", + "\n", + " # *** added: scale noise for HW robustness ***\n", + " s = scale_noise(s_raw, NOISE_DIV_KEY)\n", + " e = scale_noise(e_raw, NOISE_DIV_KEY)\n", + "\n", + " b0 = (poly_mul_hw(a[0,0], s[0]) + e[0]) % q\n", + " b1 = (poly_mul_hw(a[0,1], s[1]) + e[1]) % q\n", + " b2 = (poly_mul_hw(a[1,0], s[0]) + e[0]) % q\n", + " b3 = (poly_mul_hw(a[1,1], s[1]) + e[1]) % q\n", + " b01 = (b0 + b1) % q\n", + " b23 = (b2 + b3) % q\n", + " b = np.array([b01, b23], dtype=np.int16)\n", + " return s, a, b\n", + "\n", + "# Encryption function (to be performed by client)\n", + "def encrypt2(a, b, m):\n", + " # original CBD noise\n", + " r_raw = cbd_vector(n2, eta1, k)\n", + " e1_raw = cbd_vector(n2, eta2, k)\n", + " e2_raw = cbd(n2, eta2)\n", + "\n", + " # *** added: scale noise for HW robustness ***\n", + " r = scale_noise(r_raw, NOISE_DIV_ENC)\n", + " e1 = scale_noise(e1_raw, NOISE_DIV_ENC)\n", + " e2 = scale_noise(e2_raw, NOISE_DIV_ENC)\n", + "\n", + " u0 = (poly_mul_hw(a[0,0], r[0]) + e1[0]) % q\n", + " u1 = (poly_mul_hw(a[1,0], r[1]) + e1[1]) % q\n", + " u2 = (poly_mul_hw(a[0,1], r[0]) + e1[0]) % q\n", + " u3 = (poly_mul_hw(a[1,1], r[1]) + e1[1]) % q\n", + " u01 = (u0 + u1) % q\n", + " u23 = (u2 + u3) % q\n", + " u = np.array([u01, u23], dtype=np.int16)\n", + "\n", + " v0 = np.array(poly_mul_hw(b[0], r[0]), dtype=np.int16)\n", + " v1 = np.array(poly_mul_hw(b[1], r[1]), dtype=np.int16)\n", + " v = (v0 + v1 + e2 + m) % q\n", + "\n", + " # keep your original compression (du, dv from cell A)\n", + " u_c = compress(u, q, du)\n", + " v_c = compress(v, q, dv)\n", + " return u_c, v_c\n", + "\n", + "# Decryption function (to be performed by server)\n", + "def decrypt2(s, u, v):\n", + " # original compression/decompression path\n", + " u_dec = decompress(u, q, du)\n", + " v_dec = decompress(v, q, dv)\n", + "\n", + " p0 = np.array(poly_mul_hw(s[0], u_dec[0]), dtype=np.int16)\n", + " p1 = np.array(poly_mul_hw(s[1], u_dec[1]), dtype=np.int16)\n", + " p = (p0 + p1) % q\n", + " d = (v_dec - p) % q\n", + " return d\n" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "87afda4a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Actual message :\n", + " [1 1 1 1 1 1 1 0 0 0 1 1 0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 0 1 0 1 1 0 1 0 1 0\n", + " 1 1 1 0 1 0 0 1 1 1 0 1 1 1 0 1 1 1 1 0 0 1 1 1 1 0 0 0 1 0 1 0 0 1 0 0 0\n", + " 0 0 0 1 0 0 0 1 1 1 0 0 1 0 0 1 0 1 0 1 0 1 1 1 0 0 1 0 1 0 1 0 1 1 1 0 0\n", + " 1 1 0 1 1 0 0 1 1 0 1 1 1 1 0 1 1 0 1 0 1 1 1 0 0 0 0 0 1 1 1 0 0 1 1 1 1\n", + " 1 1 1 0 0 0 0 0 0 1 1 1 0 1 0 1 0 0 1 1 0 0 1 0 0 1 0 0 1 0 1 0 1 0 1 1 1\n", + " 1 1 1 1 1 1 1 1 0 0 1 0 0 0 1 1 1 0 0 1 1 0 1 0 1 1 1 0 0 0 0 0 0 0 1 0 0\n", + " 1 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 0 1 0 0 1 0 0 0 1 0 0 1 0 0 1 1 1 0]\n", + "Decrypted message :\n", + " [1 1 1 1 1 1 1 0 0 0 1 1 0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 0 1 0 1 1 0 1 0 1 0\n", + " 1 1 1 0 1 0 0 1 1 1 0 1 1 1 0 1 1 1 1 0 0 1 1 1 1 0 0 0 1 0 1 0 0 1 0 0 0\n", + " 0 0 0 1 0 0 0 1 1 1 0 0 1 0 0 1 0 1 0 1 0 1 1 1 0 0 1 0 1 0 1 0 1 1 1 0 0\n", + " 1 1 0 1 1 0 0 1 1 0 1 1 1 1 0 1 1 0 1 0 1 1 1 0 0 0 0 0 1 1 1 0 0 1 1 1 1\n", + " 1 1 1 0 0 0 0 0 0 1 1 1 0 1 0 1 0 0 1 1 0 0 1 0 0 1 0 0 1 0 1 0 1 0 1 1 1\n", + " 1 1 1 1 1 1 1 1 0 0 1 0 0 0 1 1 1 0 0 1 1 0 1 0 1 1 1 0 0 0 0 0 0 0 1 0 0\n", + " 1 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 0 1 0 0 1 0 0 0 1 0 0 1 0 0 1 1 1 0]\n", + "Actual message and decrypted message are the same!\n", + "\n", + "Time taken by HW-SW = 0.0352785587310791 seconds\n" + ] + } + ], + "source": [ + "# oho I-3\n", + "# Full HW RUN mit Zeitnahme\n", + "\n", + "# Get pre-computed factors\n", + "psis, inv_psis = gen_tf(psin, inv_psin, n, q)\n", + "pwmf = gen_pwmf(psin, n, q)\n", + "\n", + "start_hw = time.time()\n", + "\n", + "# Randomly generated binary message, m\n", + "m = np.random.randint(2, size=(n2,))\n", + "ms = decompress(m, q, 1)\n", + "\n", + "# Generating private key (s) and publik keys (a,b)\n", + "s, a, b = key_gen2()\n", + "\n", + "# Encrypting the message using public keys to provide cipher texts (u,v)\n", + "u, v = encrypt2(a, b, ms)\n", + "\n", + "# Decrypt the cipher using private key to obtain back the message (d)\n", + "d = decrypt2(s, u, v)\n", + "\n", + "# Decoding the decrypted message\n", + "md = []\n", + "for i in d:\n", + " if i > math.floor(q/4) and i < math.floor(3*q/4):\n", + " md.append(1)\n", + " else:\n", + " md.append(0)\n", + "md = np.array(md)\n", + "\n", + "end_hw = time.time()\n", + "\n", + "# Comparision and printing results\n", + "print(\"Actual message :\\n\", m)\n", + "print(\"Decrypted message :\\n\", md)\n", + "\n", + "if (list(m) == list(md)):\n", + " print(\"Actual message and decrypted message are the same!\")\n", + "else:\n", + " print(\"There is mismatch ....\")\n", + " \n", + "print()\n", + "print(\"Time taken by HW-SW =\", end_hw - start_hw, \"seconds\")" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "c5879ca3", + "metadata": {}, + "outputs": [], + "source": [ + "###################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "1b207fc9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Speed-up factor SW(naive) vs SW(NTT) = 11.234882979648214\n", + "Speed-up factor HW vs SW(NTT) = 10.49500233156945\n" + ] + } + ], + "source": [ + "# oho J\n", + "# Zeitvergleich SW(naive vs. SW (NTT) && SW(NTT) vs. HW\n", + "SF1 = (end_sw_naive - start_sw_naive)/(end_sw - start_sw)\n", + "print(\"Speed-up factor SW(naive) vs SW(NTT) =\", SF1)\n", + "SF2 = (end_sw - start_sw)/(end_hw - start_hw)\n", + "print(\"Speed-up factor HW vs SW(NTT) =\", SF2)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "7199a8be", + "metadata": {}, + "outputs": [], + "source": [ + "###################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "d9b4e9e8", + "metadata": {}, + "outputs": [], + "source": [ + "###################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "c0723f32", + "metadata": {}, + "outputs": [], + "source": [ + "###################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "b56bbcb8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Load test config: N_POLYS=32, N_ITERS=2000, BATCH_BUFS=4\n", + "Running one sanity check vs your original poly_mul_hw...\n", + " OK: poly_mul_hw_fast matches poly_mul_hw on one sample.\n", + "\n", + "HW load test finished.\n", + "Total ops: 2000\n", + "Total time: 1.034378 s\n", + "Throughput: 1933.53 poly_mult/s\n" + ] + } + ], + "source": [ + "# oho Last-Test 1\n", + "# HW\n", + "# Load test for HW poly_mult: many back-to-back calls with preallocated buffers\n", + "\n", + "import numpy as np\n", + "import time\n", + "from pynq import allocate\n", + "\n", + "# ----------------------------------------------------------------------\n", + "# Configuration knobs for the load test\n", + "# ----------------------------------------------------------------------\n", + "N_POLYS = 32 # size of the pool of random input polynomials\n", + "N_ITERS = 2000 # total number of HW poly_mult invocations\n", + "BATCH_BUFS = 4 # number of DMA buffer pairs to rotate over (>=1)\n", + "\n", + "print(f\"Load test config: N_POLYS={N_POLYS}, N_ITERS={N_ITERS}, BATCH_BUFS={BATCH_BUFS}\")\n", + "\n", + "# Sanity\n", + "assert n2 == 256, f\"Expected n2=256, got {n2}\"\n", + "\n", + "# ----------------------------------------------------------------------\n", + "# Random input pool (kept in BRAM/DDR)\n", + "# ----------------------------------------------------------------------\n", + "A_pool = np.random.randint(q, size=(N_POLYS, n2), dtype=np.int16)\n", + "B_pool = np.random.randint(q, size=(N_POLYS, n2), dtype=np.int16)\n", + "\n", + "# ----------------------------------------------------------------------\n", + "# Pre-allocate DMA buffers we will *reuse* (important for realistic load)\n", + "# ----------------------------------------------------------------------\n", + "input_buffers = [allocate(shape=(n2,), dtype=np.int32) for _ in range(BATCH_BUFS)]\n", + "output_buffers = [allocate(shape=(n2,), dtype=np.int16) for _ in range(BATCH_BUFS)]\n", + "\n", + "def poly_mul_hw_fast(a: np.ndarray, b: np.ndarray, buf_idx: int) -> np.ndarray:\n", + " \"\"\"\n", + " Minimal-overhead HW polymult using pre-allocated buffers.\n", + " - a, b: int16 polynomials of length n2\n", + " - buf_idx: which buffer pair to use [0..BATCH_BUFS-1]\n", + " Returns: int16 polynomial of length n2\n", + " \"\"\"\n", + "\n", + " a = np.asarray(a, dtype=np.int16).reshape(n2,)\n", + " b = np.asarray(b, dtype=np.int16).reshape(n2,)\n", + "\n", + " ibuf = input_buffers[buf_idx]\n", + " obuf = output_buffers[buf_idx]\n", + "\n", + " # Pack two 16-bit coeffs into one 32-bit word: [b | a]\n", + " a32 = a.astype(np.int32)\n", + " b32 = b.astype(np.int32)\n", + " ibuf[:] = (a32 & 0xFFFF) | ((b32 & 0xFFFF) << 16)\n", + "\n", + " # Start IP core (ap_start = 1 at control register 0x00)\n", + " mmio.write(0x00, 0x1)\n", + "\n", + " # Launch DMA transfers (blocking until completion)\n", + " dma_send.transfer(ibuf)\n", + " dma_recv.transfer(obuf)\n", + " dma_send.wait()\n", + " dma_recv.wait()\n", + "\n", + " return np.array(obuf, dtype=np.int16)\n", + "\n", + "\n", + "# ----------------------------------------------------------------------\n", + "# Optional: one quick correctness sanity check against existing poly_mul_hw\n", + "# (comment this out if you don't want it)\n", + "# ----------------------------------------------------------------------\n", + "try:\n", + " from math import isfinite # just to avoid NameError for \"try\" block\n", + "\n", + " print(\"Running one sanity check vs your original poly_mul_hw...\")\n", + " test_a = A_pool[0]\n", + " test_b = B_pool[0]\n", + " y_fast = poly_mul_hw_fast(test_a, test_b, buf_idx=0)\n", + " y_orig = poly_mul_hw(test_a, test_b) # your existing wrapper from earlier cells\n", + "\n", + " if np.array_equal(y_fast % q, y_orig % q):\n", + " print(\" OK: poly_mul_hw_fast matches poly_mul_hw on one sample.\")\n", + " else:\n", + " diff = np.where((y_fast % q) != (y_orig % q))[0]\n", + " print(f\" WARNING: mismatch on sanity check, first diff index = {int(diff[0])}\")\n", + "except Exception as e:\n", + " print(\"Sanity check skipped due to error:\", e)\n", + "\n", + "\n", + "# ----------------------------------------------------------------------\n", + "# Actual load test\n", + "# ----------------------------------------------------------------------\n", + "t0 = time.time()\n", + "\n", + "for it in range(N_ITERS):\n", + " # Rotate through input pool and buffer pool\n", + " idx = it % N_POLYS\n", + " buf_idx = it % BATCH_BUFS\n", + "\n", + " a = A_pool[idx]\n", + " b = B_pool[idx]\n", + "\n", + " _ = poly_mul_hw_fast(a, b, buf_idx) # discard result; we only care about throughput\n", + "\n", + "t1 = time.time()\n", + "\n", + "elapsed = t1 - t0\n", + "ops_per_sec = N_ITERS / elapsed if elapsed > 0 else float('inf')\n", + "\n", + "print(f\"\\nHW load test finished.\")\n", + "print(f\"Total ops: {N_ITERS}\")\n", + "print(f\"Total time: {elapsed:.6f} s\")\n", + "print(f\"Throughput: {ops_per_sec:.2f} poly_mult/s\")\n", + "\n", + "# ----------------------------------------------------------------------\n", + "# Cleanup: free DMA buffers (if your Pynq version supports freebuffer)\n", + "# ----------------------------------------------------------------------\n", + "for buf in input_buffers + output_buffers:\n", + " try:\n", + " buf.freebuffer()\n", + " except AttributeError:\n", + " pass\n" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "id": "44500fb9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SW NTT load test config: N_ITERS_SW=2000\n", + "Sanity check: poly_mul_sw deterministic on identical inputs.\n", + "\n", + "SW NTT load test finished.\n", + "Total ops: 2000\n", + "Total time: 70.849309 s\n", + "Throughput: 28.23 poly_mult/s\n" + ] + } + ], + "source": [ + "# oho SW_LOAD_NTT\n", + "# Load test for NTT-based poly_mul_sw (NOT the naive schoolbook version)\n", + "\n", + "import numpy as np\n", + "import time\n", + "\n", + "# --------------------------------------------------------------------\n", + "# Config knobs for the SW NTT load test\n", + "# --------------------------------------------------------------------\n", + "N_ITERS_SW = 2000 # number of poly_mult operations to run in the loop\n", + "\n", + "print(f\"SW NTT load test config: N_ITERS_SW={N_ITERS_SW}\")\n", + "\n", + "# Sanity: ensure psis / inv_psis / pwmf exist (from your oho C / D)\n", + "try:\n", + " _ = psis[0]\n", + " _ = inv_psis[0]\n", + " _ = pwmf[0]\n", + "except NameError as e:\n", + " raise RuntimeError(\n", + " \"psis, inv_psis, pwmf must be defined before this cell \"\n", + " \"(run your gen_tf/gen_pwmf cell).\"\n", + " ) from e\n", + "\n", + "# --------------------------------------------------------------------\n", + "# Prepare random input polynomials\n", + "# --------------------------------------------------------------------\n", + "# We pre-generate inputs so the measured time is dominated by the NTT work,\n", + "# not by RNG calls.\n", + "a_sw = np.random.randint(q, size=(N_ITERS_SW, n2), dtype=np.int16)\n", + "b_sw = np.random.randint(q, size=(N_ITERS_SW, n2), dtype=np.int16)\n", + "\n", + "# --------------------------------------------------------------------\n", + "# Sanity check: poly_mul_sw is deterministic for same input\n", + "# (and we are indeed calling the NTT-based version)\n", + "# --------------------------------------------------------------------\n", + "z0 = np.array(poly_mul_sw(a_sw[0].copy(), b_sw[0].copy()), dtype=np.int16)\n", + "z1 = np.array(poly_mul_sw(a_sw[0].copy(), b_sw[0].copy()), dtype=np.int16)\n", + "\n", + "if not np.array_equal(z0 % q, z1 % q):\n", + " print(\"WARNING: poly_mul_sw appears non-deterministic on repeated calls.\")\n", + "else:\n", + " print(\"Sanity check: poly_mul_sw deterministic on identical inputs.\")\n", + "\n", + "# --------------------------------------------------------------------\n", + "# Load test: run N_ITERS_SW NTT-based multiplications\n", + "# --------------------------------------------------------------------\n", + "start_sw_load = time.time()\n", + "\n", + "for i in range(N_ITERS_SW):\n", + " # NOTE: we pass the arrays directly. poly_mul_sw/ct_ntt mutate their\n", + " # inputs, but we never reuse a_sw[i] or b_sw[i] later, so this is fine\n", + " # and avoids extra copy overhead in the hot loop.\n", + " _ = poly_mul_sw(a_sw[i], b_sw[i])\n", + "\n", + "end_sw_load = time.time()\n", + "\n", + "elapsed_sw = end_sw_load - start_sw_load\n", + "throughput_sw = N_ITERS_SW / elapsed_sw\n", + "\n", + "print(\"\\nSW NTT load test finished.\")\n", + "print(f\"Total ops: {N_ITERS_SW}\")\n", + "print(f\"Total time: {elapsed_sw:.6f} s\")\n", + "print(f\"Throughput: {throughput_sw:.2f} poly_mult/s\")\n", + "\n", + "# If you stored the HW throughput in a variable (say hw_throughput),\n", + "# you can compare directly, e.g.:\n", + "# print(f\\\"HW throughput: {hw_throughput:.2f} poly_mult/s\\\")\n", + "# print(f\\\"HW / SW speedup (NTT-based SW): {hw_throughput / throughput_sw:.2f}x\\\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "id": "2e06ff20", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Matplotlib is building the font cache; this may take a moment.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== PolyMult HW vs SW NTT benchmark ===\n", + "N_ITERS : 2000\n", + "\n", + "Hardware (NTT accelerator via DMA):\n", + " Total time : 3.083 s\n", + " Throughput : 648.67 poly_mult/s\n", + " Latency per op : 1.542 ms\n", + "\n", + "Software (NTT in Python/NumPy):\n", + " Total time : 71.589 s\n", + " Throughput : 27.94 poly_mult/s\n", + " Latency per op : 35.795 ms\n", + "\n", + "Speedup (HW / SW NTT): 23.2×\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAEICAYAAABbOlNNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAkF0lEQVR4nO3debwU1Zn/8c8XiWLiHlFZRRNGwVEQb8QtUaMZMCbRn1swzgSVDFnMhIkxRjNONAuOcVzHicmgE0Pc0cTAJE6UYBCjuACiKGpkBAVRwYXNBUSe3x91Wqubvpe6cPt2w/2+X69+ddepU1VPd1f3U3XO6WpFBGZmZiWd6h2AmZk1FicGMzMr48RgZmZlnBjMzKyME4OZmZVxYjAzszJODDUiaZ6kI9txexdIumED1zFZ0lfaKqYN0d6vX0fTSO+1NR4nhnVIX1BvS1oh6RVJ10naqg3Xf6qkkHRZRfmxqfxX67HOPmnZzi3U2eBEsrFKr83HW1H/VEl/qVI+T9KRkjqn/WP/3LxT0nYqy57e8GfQ2Bp535L0fUlz0/u1QNKtqfxkSbMr6k5spuycZtYdkmZJ6pQr+4mkX0n6ZNrmCklvprql6TXpVpqOVKc0/clavBYtcWIo5vMRsRUwCPgEcF4br///gC9WfJF/GfhrG2+nzbSUdDqaiFgNTAUOzRV/Cni6StmUdgytzbXH+16rbUgaDvwDcGT6PDcBk9Lse4F+krrmYhgAfLii7EBafg+7A8MqCyPivojYKm13r1S8XSrrlG6l+QADStMRcd8GPfH14MTQChHxIvC/wN8CSPqCpCclLUmn5v0ql5G0i6S3JH00V7afpMWSPpSKXgZmAUPS/B2Ag4AJuWUOk7SgYt3NNbeUdtwl6YjjwIrlhgLfJ0tGKyQ9lpu9q6T7JS2XdLekHdMypbOQEZJeAO6R1EnSeZKel7RI0q8lbVskXklbShor6Q1JT0k6u7I+MFDS45KWSrpVUpf8utPR36tpvafktlPWTJI/4pdUem0eS8/9i1Vev/UxheyLv+STwE+rlFX9UpF0tKRHJS2TNF/SBRXzD5H0QNrX5ks6NZVvKenS9B4slfQXSVumeQfklnlM0mHNBS/p9PQ+vCHpLkm75uaFpDMkPQs8m8quTHEskzS9dFTb3L4lqbukCZJelzRH0j/m1n+BpNsl3SBpGXBqRWwHSHpZ0ma5sv8n6fH0eH9J01Isr6ji7DvnE8BdEfF/ABHxckSMSY8XAs/xwfs1CHiSLGHkyzoB05p7HYGLgR9qIz9wcmJoBUm9gM8Cj0r6G+Bm4J+BrsCdwP9I2jy/TES8DEwGTsoV/z1wS0S8myv7NdlZAmRHHOOBlesZamlHLh2RTK2I6Y/AhcCtaf6A3OwvAacBOwGbA2dVrPtQoB9ZEjs13Q4Hdge2Av6zYIznA33Scp8he00qnQQMBXYD9qH8C2MXYEegBzAcGCNpj3VtNCJKr03piKzUlLBE0iEFY69mCnBwSpY7Ah8BxgH758r2pPmjzTfJ3v/tgKOBr0s6NsXWm+yA5CqyfW0gMDMtdwmwH9mBxA7A2cAaST2APwA/SeVnAb9ROvrNS9v5PnBcWv99ZPt23rHAYKB/mn4kxbEDcBNwm6QuLexbNwMLyI6oTwAulHREbv3HALen539jfsMR8WB6fT6dK/5S2i7AlcCVEbEN8DGy172aB4EvS/qupKZ8oknyyf1T6XX4S0XZgxGxqpn1A/wWWCu5bWycGIr5naQlZDvJvWQ7/heBP0TExPQFfwmwJdkHtNJY0hdf2hlPBq6vqHMHcFg64v4yWaKoh+si4q8R8TbZB2xgxfwLIuLNNP8U4LKIeC4iVgDnAsMKHi2dBFwYEW9ExALgP6rU+Y+IWBgRrwP/UyWWf42IlRFxL9mX4EmVKygqIraLiLX6EXIOSMnj/RvQOzf/IeDDwN5kZwZ/iYi3gLm5sucj4oVmtj85ImZFxJqIeJzsi7TUDHUK8KeIuDki3o2I1yJiprK27NOBURHxYkS8FxEPRMRKsv3tzoi4M61zItmR7merbP6rwL9FxFOpWexCsrO1XXN1/i0iXk/vOxFxQ4pjdURcCmwBVE3M6YDqEOB7EfFORMwEriVr1imZGhG/S7G+XWU1N5N9bpC0dXoepeT1LvBxSTtGxIqUSKq9xjcA/0R2UHMvsEjl/QX5s4NPkiWG+yrK7q227vxmgH8FfiBpi3XUbVhODMUcm744do2Ib6QdtzvwfKlCRKwB5pMdwVYaD/SXVDo6XhoRD+crpHX+gaz/YseIuL9Gz2VdXs49fovsLCBvfu5x2WuQHncGdi6wne4V65pfpU5LsbwREW9WbLt7ge2urwfTPvD+DXj/Sz4i3gEeJvsSKR1twgdHnC32L0gaLOnPypoYlwJfIzsjAuhF1g9VaUegSzPzdgVOrEhkhwDdmql7Za7e64Ao35fL3h9J30lNT0vTMtvm4q3UHXg9Ipbnyp5vaf1V3AQcl75sjwNmRERp3xsB/A3wtKRHJH2uuZVExI0RcSTZmcnXgB9JGpJmTwH2kbQ9cABZsnoa6JbKDqFAH1FE3Em2b4xcV91G5cSw/haSfaAAkCSyD/CLlRXTl8Y4siO/f2Dts4WSXwPfaWb+m2RHpKXtbUZ22l9NkUvmru9ldfPLlb0GZEfQq4FXWHe8LwE9c9O9WhnH9pI+UrHthelx2bbJmp3aQ6kponS0CR8ccTbbv5DcRNan1CsitgV+QfblDNmX5seqLPMq8E4z8+YD11cks49ExEXN1P1qRd0tI+KBXJ333/fUn/A9sjO07VOSXJqLt3LfWgjskI70S3pT/llpcX+MiNlkyeQoypuRiIhnI+JksubPnwK3V+wb1db3bkTcBjxO6jOMiOdSrCOBF9JZMGQDC0aSHZhUPRup4jzgXyjfDzcaTgzrbxxwtKQjlHUif4esT+CBZur/mqzd8QtAc0P57iU7o7iqyry/Al2UdVJ+iGzHa+5UdTGwhqz9vjmvAH2UG1q3Hm4Gvi1pN2VDeEtty6sLxDsOOFfS9qk9/Jvrsf0fSto8fVF9Drgtlc8kO7r8sLJhqSMqlnuFll+b9TWFrL+lF1Aa5vgX4DCyZrCWEsPWZEfV7ygb4vql3LwbgSMlnaRsaOxHJQ1MZ6m/BC5LnbubSTowHVXfAHxe0pBU3kVZp33PtTfNL8jei70AJG0r6cR1xLqabD/rLOkHwDa5+WX7VkTMJ/tc/FuKYx+y9+RGWucm4Ftkibb0XiPp7yV1Ta/HklT8XuXCygYhHC1p69TvcxTZCKGHctXuA87kg8QO2Xt4JjCtmWautUTEZLIBJcMLPreG4sSwniLiGbJ23KvIjtw+TzastWrHVGoaWkN2CjyvmToREZNSm3rlvKXAN8jaZl8kOyquHMVTqvsWMBq4PzUPHFClWumD9ZqkGc0+0Zb9kuzsZgpZW/o7ZG24ReL9UZqeC/yJrOOxNZ3tLwNvkB3h3Qh8LZ32A1wOrCL7ghrL2l9AFwBj02tzEoDaZrz4A2RNKg9FZH90EhGvkX2BLoqIZ1tY9htkzRrLgR+Q60BN/RKfJTv4eJ0s8ZU6dc8i+wJ6JM37KdApfRkfQ9apvJjsrOC7VPnMR8QdablblI0KeoLsyLw5d5F1hv+V7Cj+HcqbgqrtWyeTDTZYSNafdn7q92iNm8mS7D0R8WqufCjwpKQVZB3Rw9JZeqVlZK/HC2QJ5GLg6xV9S/eSnXnky+5LZa0danweWef8RkfhP+ppN5LuAW6KiGvrHUujkfR1sg/0oQXqHgbcEBHVjn7NbAP5jKGdSPoE2TjoW+sdSyOQ1E1SaXjnHmRHw3fUOy4zy0aQWI1JGks2DnxUxciMjmxz4L/IfqOwBLgFuLqeAZlZxk1JZmZWxk1JZmZWZqNvStpxxx2jT58+9Q7DzGyjMn369FcjoupvoTb6xNCnTx+mTWvpmlZmZlZJ0vPNzXNTUgeyZMkSTjjhBPbcc0/69evH1KkfXFvvkksuQRKvvpoND1+1ahWnnXYae++9NwMGDGDy5Ml1itrM2ttGf8ZgxY0aNYqhQ4dy++23s2rVKt566y0A5s+fz8SJE+nd+4Nrwl1zzTUAzJo1i0WLFnHUUUfxyCOP0KmTjyXMNnX+lHcQy5YtY8qUKYwYkV0dYvPNN2e77bYD4Nvf/jYXX3wx2eWeMrNnz+aII7KrIu+0005st912brIz6yCcGDqI5557jq5du3Laaaex77778pWvfIU333yTCRMm0KNHDwYMGFBWf8CAAYwfP57Vq1czd+5cpk+fzvz567oAppltCtyU1EGsXr2aGTNmcNVVVzF48GBGjRrFBRdcwJQpU7j77rvXqn/66afz1FNP0dTUxK677spBBx1E587eXcw6Ap8xdBA9e/akZ8+eDB48GIATTjiBGTNmMHfuXAYMGECfPn1YsGABgwYN4uWXX6Zz585cfvnlzJw5k/Hjx7NkyRL69u1b52dhZu3BiaGD2GWXXejVqxfPPPMMAJMmTWLQoEEsWrSIefPmMW/ePHr27MmMGTPYZZddeOutt3jzzex/cCZOnEjnzp3p379/S5sws02E2wY6kKuuuopTTjmFVatWsfvuu3Pdddc1W3fRokUMGTKETp060aNHD66/vrn/FjKzTc1Gf62kpqam8GgZM7PWkTQ9IpqqzevQZwx9zvlDvUOwBjbvoqPrHYJZXbiPwczMyjgxmJlZGScGMzMr48RgZmZlnBjMzKyME4OZmZVxYjAzszJODGZmVsaJwczMyjgxmJlZGScGMzMrU/PEIGk7SbdLelrSU5IOlLSDpImSnk332+fqnytpjqRnJA2pdXxmZlauPc4YrgT+GBF7AgOAp4BzgEkR0ReYlKaR1B8YBuwFDAWulrRZO8RoZmZJTRODpG2ATwH/DRARqyJiCXAMMDZVGwscmx4fA9wSESsjYi4wB9i/ljGamVm5Wp8x7A4sBq6T9KikayV9BNg5Il4CSPc7pfo9gPw/zi9IZWUkjZQ0TdK0xYsX1/YZmJl1MLVODJ2BQcDPI2Jf4E1Ss1EzVKVsrX8SiogxEdEUEU1du3Ztm0jNzAyofWJYACyIiIfS9O1kieIVSd0A0v2iXP1eueV7AgtrHKOZmeXUNDFExMvAfEl7pKIjgNnABGB4KhsOjE+PJwDDJG0haTegL/BwLWM0M7Ny7fHXnv8E3Chpc+A54DSyhDRO0gjgBeBEgIh4UtI4suSxGjgjIt5rhxjNzCypeWKIiJlAtT+cPqKZ+qOB0bWMyczMmudfPpuZWRknBjMzK+PEYGZmZZwYzMysjBODmZmVcWIwM7MyTgxmZlbGicHMzMo4MZiZWRknBjMzK+PEYGZmZZwYzMysjBODmZmVcWIwM7MyTgxmZlbGicHMzMo4MZiZWRknBjMzK+PEYGZmZZwYzMysjBODmZmVcWIwM7MyTgxmZlam5olB0jxJsyTNlDQtle0gaaKkZ9P99rn650qaI+kZSUNqHZ+ZmZVrrzOGwyNiYEQ0pelzgEkR0ReYlKaR1B8YBuwFDAWulrRZO8VoZmZA55ZmShrU0vyImLGe2z0GOCw9HgtMBr6Xym+JiJXAXElzgP2Bqeu5HTMza6UWEwNwabrvAjQBjwEC9gEeAg4psI0A7pYUwH9FxBhg54h4CSAiXpK0U6rbA3gwt+yCVFZG0khgJEDv3r0LhGBmZkW12JQUEYdHxOHA88CgiGiKiP2AfYE5BbdxcEQMAo4CzpD0qRbqqloYVeIak2Jp6tq1a8EwzMysiKJ9DHtGxKzSREQ8AQwssmBELEz3i4A7yJqGXpHUDSDdL0rVFwC9cov3BBYWjNHMzNpA0cTwlKRrJR0m6VBJ1wBPrWshSR+RtHXpMfB3wBPABGB4qjYcGJ8eTwCGSdpC0m5AX+Dh4k/HzMw21Lr6GEpOA74OjErTU4CfF1huZ+AOSaVt3RQRf5T0CDBO0gjgBeBEgIh4UtI4YDawGjgjIt4r+mTMzGzDFUoMEfEOcHm6FRYRzwEDqpS/BhzRzDKjgdGt2Y6ZmbWdQolB0sHABcCu+WUiYvfahGVmZvVStCnpv4FvA9MBN+2YmW3CiiaGpRHxvzWNxMzMGkLRxPBnSf8O/BZYWSrcgF8+m5lZgyqaGAan+6ZcWQCfbttwzMys3oqOSjq81oGYmVljKPQDN0nbSrpM0rR0u1TStrUOzszM2l/RXz7/ElgOnJRuy4DrahWUmZnVT9E+ho9FxPG56R9KmlmDeMzMrM6KnjG8Len9S2ynH7y9XZuQzMysnoqeMXwdGJvrV3gDOLUmEZmZWV0VHZU0ExggaZs0vayWQZmZWf0UHZV0oaTtImJZRCyTtL2kn9Q6ODMza39F+xiOioglpYmIeAP4bE0iMjOzuiqaGDaTtEVpQtKWwBYt1Dczs41U0c7nG4BJkq4juxTG6cDYmkVlZmZ1U7Tz+WJJjwNHAgJ+HBF31TQyMzOri6JnDJD9x/PqiPiTpA9L2joiltcqMDMzq4+io5L+Ebgd+K9U1AP4XY1iMjOzOira+XwGcDDZNZKIiGeBnWoVlJmZ1U/RxLAyIlaVJiR1JuuENjOzTUzRxHCvpO8DW0r6DHAb8D+1C8vMzOqlaGI4B1gMzAK+CtwJnFeroMzMrH4KJYaIWBMR10TEicBI4KGIKNyUJGkzSY9K+n2a3kHSREnPpvvtc3XPlTRH0jOShrT2CZmZ2YYpOippsqRtJO0AzASuk3RZK7Yzimy4a8k5wKSI6AtMStNI6g8MA/YChgJXS9qsFdsxM7MNVLQpadt0RdXjgOsiYj+yH7utk6SewNHAtbniY/jgl9NjgWNz5bdExMqImAvMAfYvGKOZmbWBoomhs6RuZH/r+ftWbuMK4GxgTa5s54h4CSDdl4a+9gDm5+otSGVlJI0s/f/04sWLWxmOmZm1pGhi+BFwFzAnIh6RtDvw7LoWkvQ5YFFETC+4HVUpW6svIyLGRERTRDR17dq14KrNzKyIotdKuo1siGpp+jng+OaXeN/BwBckfRboAmwj6QbgFUndIuKldCayKNVfAPTKLd8TWFgkRjMzaxtFzxjWS0ScGxE9I6IPWafyPRHx98AEYHiqNhwYnx5PAIZJ2kLSbkBf4OFaxmhmZuVacxG9tnQRME7SCOAF4ESAiHhS0jhgNrAaOCMi3qtTjGZmHdI6E4OkTsAJETFuQzYUEZOByenxa8ARzdQbDYzekG2Zmdn6W2dTUkSsAb7ZDrGYmVkDKNrHMFHSWZJ6pV8t75B+7GZmZpuYon0Mp6f7M3JlAezetuGYmVm9FR2uulutAzEzs8ZQ9FpJH5Z0nqQxabpv+vGamZltYor2MVwHrAIOStMLgJ/UJCIzM6uroonhYxFxMfAuQES8TfXLV5iZ2UauaGJYJWlL0nWLJH0MWFmzqMzMrG6Kjko6H/gj0EvSjWTXQDq1VkGZmVn9FB2VNFHSDOAAsiakURHxak0jMzOzumjNtZIOBQ4ha076EHBHTSIyM7O6Kjpc9Wrga8As4Angq5J+VsvAzMysPoqeMRwK/G1ElDqfx5IlCTMz28QUHZX0DNA7N90LeLztwzEzs3oresbwUeApSaU/zfkEMFXSBICI+EItgjMzs/ZXNDH8oKZRmJlZwyg6XPXeWgdiZmaNoab/+WxmZhsfJwYzMytT9HcMo4qUmZnZxq/oGcPwKmWntmEcZmbWIFrsfJZ0MvAlYLfS0NRka+C1WgZmZmb1sa5RSQ8ALwE7ApfmypfjH7iZmW2SWkwMEfE88Dxw4PqsXFIXYAqwRdrW7RFxvqQdgFuBPsA84KSIeCMtcy4wAngP+FZE3LU+2zYzs/XTYh+DpOWSllW5LZe0rMD6VwKfjogBwEBgqKQDgHOASRHRF5iUppHUHxgG7AUMBa6WtNl6PzszM2u1FhNDRGwdEdtUuW0dEdusa+WRWZEmP5RuARwDjE3lY4Fj0+NjgFsiYmVEzAXmAPu3/mmZmdn6KvTLZ0m9q5VHxAsFlt0MmA58HPhZRDwkaeeIeCmt4yVJO6XqPYAHc4svSGWV6xwJjATo3btqaGZmtp6KXivpD7nHXYDdyK64ute6FoyI94CBkrYD7pD0ty1UV7VVVFnnGGAMQFNT01rzzcxs/RW9VtLe+WlJg4CvtmZDEbFE0mSyvoNXJHVLZwvdgEWp2gKyS3qX9AQWtmY7Zma2YdbrkhgRMYPs0tstktQ1nSkgaUvgSOBpYAIf/GhuODA+PZ4ADJO0haTdgL7Aw5iZWbsp2sdwZm6yEzAIWFxg0W7A2NTP0AkYFxG/lzQVGCdpBPACcCJARDwpaRwwG1gNnJGaoszMrJ0U7WPYOvd4NVmfw2/WtVBEPA7sW6X8NeCIZpYZDYwuGJeZmbWxon0MP6x1IGZm1hiKNiU1Af8C7JpfJiL2qVFcZmZWJ0Wbkm4EvgvMAtbULhwzM6u3oolhcURMWHc1MzPb2BVNDOdLupbsukYrS4UR8duaRGVmZnVTNDGcBuxJdq2jUlNSAE4MZmabmKKJYUDlr5/NzGzTVPSXzw+mS2KbmdkmrugZwyHAcElzyfoYRHZVbQ9XNTPbxBRNDENbmilp+9I/sJmZ2cat6C+fn19HlUlk108yM7ON3HpdXbWKav+jYGZmG6G2Sgz+sxwzs01EWyUGMzPbRLgpyczMyhRKDJIukdTS/ztX/W8FMzPb+BQ9Y3gaGCPpIUlfk7RtfmZEvN72oZmZWT0USgwRcW1EHAx8GegDPC7pJkmH1zI4MzNrf4X7GNL/Nu+Zbq8CjwFnSrqlRrGZmVkdFP0Ht8uAL5D9kO3CiHg4zfqppGdqFZyZmbW/opfEeAI4LyLeqjJv/zaMx8zM6qzFxCCpdJmLmcCeUvmo1IiYERFLaxOamZnVw7rOGC5tYV4An27DWMzMrAG0mBgiYoNGHUnqBfwa2IXsn9/GRMSVknYAbiUb4TQPOKl0dVZJ5wIjgPeAb0XEXRsSg5mZtU7RH7h9SNK3JN2ebt+U9KECi64GvhMR/YADgDPSH/6cA0yKiL5kHdrnpO30B4YBe5Fd6vvqNBrKzMzaSdHhqj8H9gOuTrf9UlmLIuKliJiRHi8HngJ6AMcAY1O1scCx6fExwC0RsTIi5gJzcOe2mVm7Kjoq6RMRMSA3fY+kx1qzIUl9gH2Bh4CdI+IlyJKHpJ1StR7Ag7nFFqSyynWNBEYC9O7duzVhmJnZOhQ9Y3hP0sdKE5J2J+sDKETSVsBvgH+OiGUtVa1SttYlvSNiTEQ0RURT165di4ZhZmYFFD1j+C7wZ0nPkX157wqcVmTB1BfxG+DGiPhtKn5FUrd0ttANWJTKFwC9cov3BBYWjNHMzNpA0b/2nCSpL7AHWWJ4OiJWrms5ZT98+G/gqYi4LDdrAjAcuCjdj8+V35R+ad0d6As8jJmZtZuil8ToAnwDOISsaec+Sb+IiHfWsejBwD8AsyTNTGXfJ0sI4ySNAF4ATgSIiCcljQNmk41oOiMiCjdZmZnZhivalPRrYDlwVZo+Gbie9IXenIj4C83/iU/V/3CIiNHA6IJxmZlZGyuaGPaoGJX059aOSjIzs41D0VFJj0o6oDQhaTBwf21CMjOzeip6xjAY+LKkF9J0b+ApSbOAiIh9ahKdmZm1u6KJYWhNozAzs4ZRdLjq87UOxMzMGkPhv/Y0M7OOwYnBzMzKODGYmVkZJwYzMyvjxGBmZmWcGMzMrIwTg5mZlXFiMDOzMk4MZmZWxonBzMzKODGYmVkZJwYzMyvjxGBmZmWcGMzMrIwTg5mZlXFiMDOzMk4MZmZWxonBzMzK1DQxSPqlpEWSnsiV7SBpoqRn0/32uXnnSpoj6RlJQ2oZm5mZVVfrM4ZfAUMrys4BJkVEX2BSmkZSf2AYsFda5mpJm9U4PjMzq1DTxBARU4DXK4qPAcamx2OBY3Plt0TEyoiYC8wB9q9lfGZmtrZ69DHsHBEvAaT7nVJ5D2B+rt6CVLYWSSMlTZM0bfHixTUN1syso2mkzmdVKYtqFSNiTEQ0RURT165daxyWmVnHUo/E8IqkbgDpflEqXwD0ytXrCSxs59jMzDq8eiSGCcDw9Hg4MD5XPkzSFpJ2A/oCD9chPjOzDq1zLVcu6WbgMGBHSQuA84GLgHGSRgAvACcCRMSTksYBs4HVwBkR8V4t4zMzs7XVNDFExMnNzDqimfqjgdG1i8jMzNalkTqfzcysATgxmJlZGScGMzMr48RgZmZlnBjMzKyME4OZmZVxYjAzszJODGZmVsaJwcwa0vz58zn88MPp168fe+21F1deeSUAM2fO5IADDmDgwIE0NTXx8MO+ck5bq+kvn83M1lfnzp259NJLGTRoEMuXL2e//fbjM5/5DGeffTbnn38+Rx11FHfeeSdnn302kydPrne4mxQnBjNrSN26daNbt24AbL311vTr148XX3wRSSxbtgyApUuX0r1793qGuUlyYjCzhjdv3jweffRRBg8ezBVXXMGQIUM466yzWLNmDQ888EC9w9vkuI/BzBraihUrOP7447niiivYZptt+PnPf87ll1/O/PnzufzyyxkxYkS9Q9zkODGYWcN69913Of744znllFM47rjjABg7duz7j0888UR3PteAE4OZNaSIYMSIEfTr148zzzzz/fLu3btz7733AnDPPffQt2/feoW4yXIfg5k1pPvvv5/rr7+evffem4EDBwJw4YUXcs011zBq1ChWr15Nly5dGDNmTH0D3QQ5MZhZQzrkkEOIiKrzpk+f3s7RdCxODGYNrs85f6h3CNag5l10dE3W6z4GMzMr48RgZmZlnBjMzKyME4OZmZVxYjAzszINlxgkDZX0jKQ5ks6pdzxmZh1NQyUGSZsBPwOOAvoDJ0vqX9+ozMw6loZKDMD+wJyIeC4iVgG3AMfUOSYzsw6l0X7g1gOYn5teAAyurCRpJDAyTa6Q9Ew7xNYR7Ai8Wu8gGoV+Wu8IrArvozkbuI/u2tyMRksMqlK21m/iI2IM4AuktDFJ0yKiqd5xmDXH+2j7aLSmpAVAr9x0T2BhnWIxM+uQGi0xPAL0lbSbpM2BYcCEOsdkZtahNFRTUkSslvRN4C5gM+CXEfFkncPqSNw8Z43O+2g7UHOXtTUzs46p0ZqSzMyszpwYzMysjBNDg5C0omL6VEn/KWk7Sa9JUio/UFJI6pmmt5X0uqROFctfIOktSTvltyHpo5JmptvLkl5Mj9+TNDs9fl3S3PT4T+3x/HMxTpbk4YibGEn/IulJSY+n/WqwpGMk/S5X51xJc3LTn5e01uCTtI9My003pbIhuX17Rbq0zkxJ83PlqyTNSo8vqvkT30g1VOezrS0ilkh6GegHzAYOAh5N9+OAA4CHImJNlcVfBb4DfC+3vteAgZAlD2BFRFySX0jSr4DfR8Ttbfx0rAOSdCDwOWBQRKyUtCOwOfAc5Z3JBwLLJO0UEYvI9vH7m1ntTpKOioj/LRVExF1kA1eQNBk4KyKm5ReSNA84PCL8I7kW+Ixh43A/2YeEdH95xfQDzSz3S+CLknZoy2Ak/U7S9HQEODJXPlTSDEmPSZqUyraSdF06Sntc0vGp/O8kTU31b5O0VVvGaA2lG/BqRKwEiIhXI2JhRCwGlkr6eKrXA/gNxfbtfwfOq2HMHZoTQ+PYMne6OxP4UW7eA3zwYdkduA0oNbe0dFS1giw5jGrjWE+PiP1SDN9KzVNdgWuA4yNiAHBiqvuvwNKI2Dsi9gHuSUeM5wFHRsQgYBpwZhvHaI3jbqCXpL9KulrSobl5DwAHSdoDeBZ4ME13BvYh+21TNVOBlZIOr2XgHZUTQ+N4OyIGlm7AD3Lz7if7sOwGzIuIdwClo+z9gIdbWO9/AMMlbdOGsX5L0mNkH+JeQF+yJq0pETEXICJeT3WPJLtiLqn8jVS3P3B/SoLDaeG6LbZxi4gVZPvpSGAxcKukU9Ps0tnwQWRf9g+TXR9tX+CZtK835yf4rKEm3MewEYiIZyVtD3ye7MMDMB04DZibPnjNLbtE0k3AN9oiFkmHkX3ZHxgRb6W23C5k17mq9qOYauUCJkbEyW0RkzW+iHgPmAxMljSL7GDgV2RnDP9E9oPWayJiuaQuwGE0fyZcWuc9kn5MdqBhbchnDBuPqWRNQlNz0/9M822weZcBX6VtDgS2Bd5ISWFPPvhQTgUOTWc15Po17ga+WVo4JbgHgYNLbcuSPizpb9ogNmtAkvaQ1DdXNBB4Pj2eDXQHPkk2qAJgJvA1iu3bo4Gz2yRQe58Tw8bjfrJmm9Ioi6lk/Q3r/PCkERh3AFu0QRx/BDpLehz4MdmXPKkjcSTw29TMdGuq/xNge0lPpPLDU91TgZvTeh4E9myD2KwxbQWMTcOhHydrRrwAILJLLzxE1jn9bqrfmn37TrLmKWtDviSGmZmV8RmDmZmVcWIwM7MyTgxmZlbGicHMzMo4MZiZWRknBjMzK+PEYGZmZf4/7SqEr/kO8fgAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEICAYAAAC3Y/QeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAjTklEQVR4nO3de7wVdb3/8dcb1DQBEQUP4oU0DRVxq1sNLUWT8oaaHku6eMvISotfdszTr/NTU0+WlXmyPEeLxEzxlkV4OeIFTUQIElQ08waKIm5UBLwifH5/zHfrsF1772GzZm026/18PNZjz3zn+535rL1mfea7vjNrliICMzOrL906OwAzM6s9J38zszrk5G9mVoec/M3M6pCTv5lZHXLyNzOrQ07+NSRpjqQDa7i9syVdVVZ9qy1JV0g6r7PjsLWDk38HpCT+pqSlkhZI+p2kHlVc/wmSQtLPW5Qfmcqv6MA6B6a261QpxjU6EUkaJmlehfJJkk5O049L+lxu2T7pf9SybGm1/m9rqrTP3dfZcVQi6SuS/iFpSXq/3Sypp6ShkhZL6p6re3krZf/dyrrnpHVumCs7Oe0nW6XXvvkRkl7PzS/PTa/I5YSlkr5Y7n9l9Tn5d9yIiOgB7AbsAfygyut/Cvh8i6RzHPDPKm+nnt0L7Jeb3xf4R4Wy+yPi3VoGVk21OHCVtQ1J+wH/CYyMiJ7ADsB1afF0oDvZe7DZJ4EXWpTtS/Zat2Yd4NstCyPi2Yjo0fxIxbvkyrrnlj1Lygnp8YcOPN2acvJfTRHxPHArMBhA0uGSZktalHoPO7RsI+lfJL0haZNc2e6SmiStm4peBB4GPpOW9wH2Bsbn2nygd9vG0FLzzr8o9UyGtvfcJF0v6UVJr0m6V9JOqXwU8EXgjLSuv6TyzSXdmJ7HM5K+lVvX2ZKuk3Rl6sHNltSYW76lpD+mti9LukTShyS9ImnnXL1+qYfVt734C7iXLDE0+yTw4wplFROHpD0lTUmv9fwU83q55TtJmpiewwJJ30/l3SV9X9JT6X8xQ9KWadmgXJuVPplU2P5hkmam7d8vaUhu2RxJ35P0EPC6pHUknZnb5qOSPpvq7gD8NzA0vZ6LUvlG6fVqkjRX0g8kdUvLTpA0WdJFkl4Bzm4R2+bpdeqTK9tV0kJJ60r6qKR70r61UNK1rTzNPYApEfEgQES8EhFjI2JJRCwDHmh+vST1A9YDrm1Rtn1rr2FyIfBdSb3bqLPWcfJfTelNewjwoKTtgWuA0UBf4BbgL/mEABARLwKTgPwb+0vAuLRDN7uSrLcPcCzwZ+DtDobanNB6p57JlAJtbgW2A/oBfwf+kOK/LE3/JK1rREoKfwFmAQOATwGjJX0mt77DgXFAb7KD2CWQJUNgAjAXGJjaj4uIt1P9L+XWMRK4IyKaJD0k6Qur9F9Y2T3ATpL6pPgbyRJH71zZ3rSeOJYD/wfYFBianvM30nPqCdwB3AZsDnwUuDO1+056HocAvYCTgDeUDT1MBK4m+5+PBH7dfNDNk7QbMAb4GrAJ8D/AeEkfylUbCRxK9pq/S/Zp8pPARsA5wFWS+kfEY8ApZEm2R0T0Tu1/mepuQ/Zp6DjgxNz69wKeTrGen48vIl4ApgBH54q/ANyQ9vFzgduBjYEt0rYqmQp8RtI5yobgPtRief4Avi9wX3rky56JiA8MAeZMJ3s/freNOmufiPBjFR/AHGApsIgsYf0a2AD4D+C6XL1uwPPAsFy7A9P054HJabo7WU9/zzR/AtkOvAGwgOwN+ACwD3AecEWqNwyYVyG25m2cDVyVpgcCAazTxvN6r36FZb1T+43S/BXAebnlewHPtmjz78Dvcuu+I7dsR+DNND0UaKoUW1rvc0C3ND8d+FyB12gYsCK9RvnHu8DJLf5fRwC75l6Pcbmyt4APFdwvRgM3pemRwIOt1HscOKJC+eeBv7Yo+x/grJb/c+BS4NwK690v97xOaifemc1xNO9zuWXdyToaO+bKvgZMytV/tp31nwzclaaVXsd90/yVwGXAFgX+rweTdSwWkb3vfg50z73OL6f1Xwx8FehB9r5pLvtdO+/lA8k+ub9G1mk7ufl5tqgbwEfbWk+R/WRNebjn33FHRkTviNg6Ir4REW+S9fDmNleIiBVkO/yACu3/DOwoaRtgOPBaREzLV0jrvJnsfMKmETG5pOfyAWlo4oI0TLCYbOeGrJdbydbA5mkIYlEaOvg+sFmuzou56TeA9ZWNFW8JzI0K4+oRMRV4HdhP0iCyHvT4lvVa8UJ6jd57kB1U85p7jvsCf01l9+XKpkb2CeQDJG0vaYKyobHFZGPTzf+fLcl62pW0tmxrYK8W/8MvAv/SSt3TW9TdkmwfbPZci3iPyw0TLSJLeK29npuSDaHMzZXNZeV9eaX1V3AD2VDS5mT/y+D9//EZZMl5WhoCPKm1lUTErRExAuhDdlA+gSxBQ9Yp6pGey75kB8+lKbbmsraGfJq38QjZp88z26u7tlirr2DoBC8A+fFpkb0hn29ZMSLeknQd2Zt7EPD7VtZ5JXAX2cf0ll4HPpzbXneynkslq3r71i+QvdEOJEv8GwGvkr1hK63vObKP19ut4naa224laZ1KBwBgLNnQz4tkwwZvdWAbrbmXrEc7F/hdKvsrcHwqaytxXAo8SHYycomk0cC/pmXPkfX+K3kO2BZ4pEL5PRExvEDczwHnR8T5bdR57zWStDVwOdnQ1JSIWC5pJq2/nguBZWQHmUdT2VasvC+3uU9FxCJJt5MNb+4AXBPNXehs6POrKbZPAHdIujcinmxjfSuAOyXdRTrHlt5HfwMOA/pHxD9S9b+msiEUSP7JWWTDmz8rWL9Lc8+/uq4DDpX0KWUnbk8n++h8fyv1ryTrxRwOtHZ9/T1knwwqjYn+k6z3fGja3g+AlmOizZrIhkG2KfA8AHqSxf4y2QHmP1ssX9BiXdOAxekk4wbpk8NgSXsU2NY0YD5wgaQNJa0vaZ/c8t8DnyU7AFxZMP6i7iUb3tkPaP5k9TDwEWB/2k4cPYHFwNL0qeTruWUTgH+RNFrZieuekvZKy34DnCtpO2WGKDv5PwHYXtKX00nRdSXtoQoXDZAl8lMk7ZXWsWHaD3q2EuuGZMm6CUDSiaQEmiwAtmg+PxURy8n25/NT7FuTnatY1e+BXE12ruDoNE3a/jGStkizr6bYlrdsLOkIScdK2jg9zz3JXqsHctXuJRtyy7/P7ktlL0ZEa5/AVpIOPNcC32qv7trAyb+KIuJxsgT1S7Ke0wiyy7/eaaX+ZLKE/PeImNNKnYiIOyPilQrLXiM7wfgbsh7Z60DFE1sR8QbZSbnJ6WP/x9t5OleS9XyfJ+v5PdBi+W/Jhq0WSfpTShYjgAbgGbLn/xuyTwxtyrX9KNklc/PIxr+bl88j65Hlhw1IwwWrdT11RPwTeAmYHxGLUtkKsgNSL1o/cEN2gvALwBKyZPzeFSsRsYTsoD2C7BPLE2QHE8jGrK8jO+G5mOx/uUFq82myk/svpHY/psIBPSKmk/WcLyFLnk+SdSRae56PkvVop5Al+p15/2AH2afL2cCLkhamstPI9qmnyZLp1WQnmVfFeLKLBhZExKxc+R7AVElLU51vR8QzFdq/SvY8nyD7X10FXBgrX0p5D9lJ5/yQ3n2prGivv9kPyQ6Uaz2lT2HWSdJH2Ksj4jedHcuaTNIYsjH8an+fwqwuOfl3ojQkMhHYMvX6rAJJA8muTNm1ld6hma0iD/t0Ekljya4DH+3E3zpJ55KdGL3Qid+setzzNzOrQ+75m5nVoS5znf+mm24aAwcO7OwwzMy6lBkzZiyMiA98/6fLJP+BAwcyffr0zg7DzKxLkTS3UrmHfcysNG+99RZ77rknu+yyCzvttBNnnXUWAGeffTYDBgygoaGBhoYGbrnllg+0ffzxx99b3tDQQK9evfjFL34BwKxZsxg6dCg777wzI0aMYPHixbV8WmuFLnPCt7GxMdzzN+taIoLXX3+dHj16sGzZMj7xiU9w8cUXc9ttt9GjRw+++91iN9Jcvnw5AwYMYOrUqWy99dbsscce/PSnP2W//fZjzJgxPPPMM5x77rklP5uuSdKMiGhsWe6ev5mVRhI9emS/g7Js2TKWLVtGdsurVXPnnXey7bbbsvXWWwPZp4J9983u2jx8+HBuvPHG6gVdJ5z8zaxUy5cvp6GhgX79+jF8+HD22iu7xdEll1zCkCFDOOmkk3j11VfbXMe4ceMYOfL9++QNHjyY8eOzm7tef/31PPdcezcYtZac/M2sVN27d2fmzJnMmzePadOm8cgjj/D1r3+dp556ipkzZ9K/f39OP/30Vtu/8847jB8/nmOOOea9sjFjxvCrX/2K3XffnSVLlrDeeuu12t4qc/I3s5ro3bs3w4YN47bbbmOzzTaje/fudOvWja9+9atMmzat1Xa33noru+22G5tt9v5PQwwaNIjbb7+dGTNmMHLkSLbddttaPIW1ipO/mZWmqamJRYsWAfDmm29yxx13MGjQIObPn/9enZtuuonBgwe3sga45pprVhryAXjppZcAWLFiBeeddx6nnHJK9YNfyzn5m1lp5s+fz/7778+QIUPYY489GD58OIcddhhnnHEGO++8M0OGDOHuu+/moosuAuCFF17gkEMOea/9G2+8wcSJEznqqKNWWu8111zD9ttvz6BBg9h888058cQTsVXjSz3NzNZirV3q2WW+4Wu2Nht45s2dHYKtoeZccGgp6y112Cf9HN80SbPSry6dk8rPlvR8+jHpmZIOaW9dZmZWPWX3/N8GDoiIpek3Zu+TdGtadlFE/LTk7ZuZWQWlJv/ITigsTbPrpkfXOMlgZrYWK/1qH0ndJc0k+5HsiRExNS06VdJDksZI2riVtqMkTZc0vampqexQzczqRunJPyKWR0QDsAWwp6TBwKXAtkADMB/4WSttL4uIxoho7Nv3A7ejNjOzDqrZdf4RsQiYBBwUEQvSQWEFcDmwZ63iMDOz8q/26Supd5reADgQ+Iek/rlqnyX7gW4zM6uRsq/26Q+MldSd7EBzXURMkPR7SQ1kJ3/nAF8rOQ4zM8sp+2qfh4BdK5R/ucztmplZ23xvHzOzOuTkb2ZWh5z8zczqkJO/mVkdcvI3M6tDTv5mZnXIyd/MrA45+ZuZ1SEnfzOzOuTkb2ZWh5z8zczqkJO/mVkdcvI3M6tDTv5mZnXIyd/MrA45+ZuZ1SEnfzOzOuTkb2ZWh5z8zczqkJO/mVkdKjX5S1pf0jRJsyTNlnROKu8jaaKkJ9LfjcuMw8zMVlZ2z/9t4ICI2AVoAA6S9HHgTODOiNgOuDPNm5lZjazT1kJJi9tpL2B+RGxfaWFEBLA0za6bHgEcAQxL5WOBScD3CkVsZmarrb2e/1MR0auNR0/g9bZWIKm7pJnAS8DEiJgKbBYR8wHS336ttB0labqk6U1NTav85MzMrLL2kv/RBdbRZp2IWB4RDcAWwJ6SBheMjYi4LCIaI6Kxb9++RZuZmVk72kz+EfE0gKQNJXVL09tLOlzSuvk67YmIRWTDOwcBCyT1T+vrT/apwMzMaqToCd97gfUlDSA7QXsicEV7jST1ldQ7TW8AHAj8AxgPHJ+qHQ/8eZWiNjOz1dLmCd8cRcQbkr4C/DIifiLpwQLt+gNjJXUnO9BcFxETJE0BrkvrexY4pkPRm5lZhxRO/pKGAl8EvlK0bUQ8BOxaofxl4FNFgzQzs+oqOuwzGvh34KaImC1pG+Du0qIyM7NSFer5R8Q9wD25+aeBb5UVlJmZlatQ8pfUCHwfGJhvExFDygnLzMzKVHTM/w/AvwEPAyvKC8fMzGqhaPJviojxpUZiZmY1UzT5nyXpN2TX+L/dXBgRfywlKjMzK1XR5H8iMIjsxmzNwz4BOPmbmXVBRZP/LhGxc6mRmJlZzRS9zv8BSTuWGomZmdVM0Z7/J4DjJT1DNuYvstv1+1JPM7MuqGjyP6jUKMzMrKaKfsN3btmBmJlZ7bQ55i/p7+2toEgdMzNbs7TX899B0kNtLBewURXjMTOzGmgv+Q8qsI7l1QjEzMxqp83k77F+M7O1U9Hr/M3MbC3i5G9mVocKJX9JPy5SZmZmXUPRnv/wCmUHVzMQMzOrnfau8/+6pIeBj0l6KPd4BmjrEtDm9ltKulvSY5JmS/p2Kj9b0vOSZqbHIdV5OmZmVkR7l3peDdwK/Ag4M1e+JCJeKbD+d4HTI+LvknoCMyRNTMsuioifrnLEZma22tpL/t2BxcA3Wy6Q1Ke9A0BEzAfmp+klkh4DBnQwVjMzq5L2kv8Msh9tgezbvHkBbFN0Q5IGArsCU4F9gFMlHQdMJ/t08GqFNqOAUQBbbbVV0U2ZmVk72hzzj4iPRMQ26fGRFo9VSfw9gBuB0RGxGLgU2BZoIPtk8LNWtn9ZRDRGRGPfvn2Lbs7MzNpR6K6ekvatVB4R9xZouy5Z4v9D82/+RsSC3PLLgQmFojUzs6ooej//f8tNrw/sSTYkdEBbjSQJ+C3wWET8PFfeP50PAPgs8EjhiM3MbLUVvZ//iPy8pC2BnxRoug/wZeBhSTNT2feBkZIayM4bzAG+VixcMzOrhqI9/5bmAYPbqxQR9/HBE8UAt3Rwu2ZmVgVFx/x/yftX/XQjO1E7q6SYzMysZEV7/tNz0+8C10TE5BLiMTOzGig65j+27EDMzKx2it7V8zBJD0p6RdJiSUskLS47ODMzK0fRYZ9fAEcBD0dEtFPXzMzWcEVv6fwc8IgTv5nZ2qFoz/8M4BZJ9wBvNxfmv7hlZmZdR9Hkfz6wlOzbveuVF46ZmdVC0eTfJyI+XWokZmZWM0XH/O+Q5ORvZraWKJr8vwncJulNX+ppZtb1Ff2SV8+2lkvaKSJmVyckMzMrW9Gef3t+X6X1mJlZDVQr+Ve6c6eZma2hqpX8/eUvM7MupFrJ38zMupBqJf93qrQeMzOrgaJ39bxR0qGSKtaPiI9XNywzMytT0Z7/pcAXgCckXSBpUIkxmZlZyQol/4i4IyK+COxG9oPrEyXdL+lESeuWGaCZmVVf4TF/SZsAJwAnAw8CF5MdDCa20WZLSXdLekzSbEnfTuV9JE2U9ET6u/FqPQszM1slRcf8/wj8FfgwMCIiDo+IayPiNKBHG03fBU6PiB2AjwPflLQjcCZwZ0RsB9yZ5s3MrEaK3tXzkoi4q9KCiGhsrVFEzAfmp+klkh4DBgBHAMNStbHAJOB7BWMxM7PV1Gbyl3RUpelmEfHHohuSNBDYFZgKbJYODETEfEn9WmkzChgFsNVWWxXdlJmZtaO9nv+INpYFUCj5S+oB3AiMjojFUrG7QUTEZcBlAI2Njf4WsZlZlbSZ/CPixNXdQLoa6EbgD7lPCgsk9U+9/v7AS6u7HTMzK67oCd+NJP1c0vT0+JmkjQq0E/Bb4LEWv/c7Hjg+TR8P/HlVAzczs44reqnnGGAJ8Ln0WAz8rkC7fYAvAwdImpkehwAXAMMlPQEMT/NmZlYjRa/22TYijs7NnyNpZnuNIuI+Wr/d86cKbtvMzKqsaM//TUmfaJ6RtA/wZjkhmZlZ2Yr2/L8OjE3j/AJe4f0xezMz62KK/obvTGAXSb3SvH+83cysCyt6tc8mkv6L7Ju4d0u6ON3rx8zMuqCiY/7jgCbgaOBf0/S1ZQVlZmblKjrm3ycizs3NnyfpyBLiMTOzGija879b0rGSuqXH54CbywzMzMzKUzT5fw24Gng7PcYB35G0RJJP/pqZdTFFr/bpWXYgZmZWO4V/ycvMzNYeTv5mZnXIyd/MrA61m/zT1T2P1CIYMzOrjXaTf0SsAGZJ8u8ompmtJYp+yas/MFvSNOD15sKIOLyUqMzMrFRFk/85pUZhZmY1VfQ6/3skbQ1sFxF3SPow0L3c0MzMrCxF7+r5VeAG4H9S0QDgTyXFZGZmJSt6qec3yX6PdzFARDwB9CsrKDMzK1fR5P92RLzTPCNpHSDKCcnMzMpWNPnfI+n7wAaShgPXA39pr5GkMZJeyn9PQNLZkp6XNDM9DulY6GZm1lFFk/+ZZD/g8jDZHT5vAX5QoN0VwEEVyi+KiIb0uKVgDGZmViVFr/ZZIWksMJVsuOfxiGh32Cci7pU0cPVCNDOzait6tc+hwFPAfwGXAE9KOng1tnuqpIfSsNDGbWx3lKTpkqY3NTWtxubMzCyv6LDPz4D9I2JYROwH7A9c1MFtXgpsCzQA89O6K4qIyyKiMSIa+/bt28HNmZlZS0WT/0sR8WRu/mngpY5sMCIWRMTydM+gy4E9O7IeMzPruKK3d5gt6RbgOrIx/2OAv0k6CiAi/lh0g5L6R8T8NPtZwHcMNTOrsaLJf31gAbBfmm8C+gAjyA4GFZO/pGuAYcCmkuYBZwHDJDWkdnPIrh4yM7MaKnq1z4kdWXlEjKxQ/NuOrMvMzKrHv+RlZlaHnPzNzOpQ0ev8fftmM7O1SNGe/5OSLpS0Y6nRmJlZTRRN/kOAfwK/kfRA+uZtrxLjMjOzEhVK/hGxJCIuj4i9gTPILtmcL2mspI+WGqGZmVVd4TF/SYdLugm4mOyWDNuQ3dbZd+U0M+tiin7J6wngbuDCiLg/V36DpH2rH5aZmZWpaPIfEhFLKy2IiG9VMR4zM6uBoid8fyWpd/OMpI0ljSknJDMzK1vhq30iYlHzTES8CuxaSkRmZla6osm/W/5HVyT1ofiQkZmZrWGKJvCfAfdLuiHNHwOcX05IZmZWtqJ39bxS0gyyX/AScFREPFpqZGZmVppVGbr5B/BqcxtJW0XEs6VEZWZmpSqU/CWdRvat3gXAcrLef5Dd9sHMzLqYoj3/bwMfi4iXywzGzMxqo+jVPs8Br5UZiJmZ1U7Rnv/TwCRJNwNvNxdGxM9LicrMzEpVNPk/mx7rpYeZmXVhRS/1PAdA0oYR8XrRladbQBwGvBQRg1NZH+BaYCAwB/hc+sawmZnVSNFbOg+V9CjwWJrfRdKvCzS9AjioRdmZwJ0RsR1wZ5o3M7MaKnrC9xfAZ4CXASJiFtDurZwj4l7glRbFRwBj0/RY4MiCMZiZWZUUTf5ExHMtipZ3cJubRcT8tM75QL/WKqafi5wuaXpTU1MHN2dmZi0VvtRT0t5ASFpP0ndJQ0BliojLIqIxIhr79u1b9ubMzOpG0eR/CvBNYAAwD2gAvtHBbS6Q1B8g/X2pg+sxM7MOKpr8PxYRX4yIzSKiX0R8Cdihg9scDxyfpo8H/tzB9ZiZWQcVTf6/LFi2EknXAFOAj0maJ+krwAXAcElPAMPTvJmZ1VCb1/lLGgrsDfSV9J3col5A9/ZWHhEjW1n0qcIRmplZ1bX3Ja/1gB6pXs9c+WLgX8sKyszMytVm8o+Ie4B7JF0REXNrFJOZmZWs6L193pB0IbATsH5zYUQcUEpUZmZWqqInfP9A9kteHwHOIbsnz99KisnMzEpWNPlvEhG/BZZFxD0RcRLw8RLjMjOzEhUd9lmW/s6XdCjwArBFOSGZmVnZiib/8yRtBJxOdn1/L2B0WUGZmVm5it7Pf0KafA3YH0DS6JJiMjOzkhW+q2cF32m/ipmZrYlWJ/mralGYmVlNrU7yj6pFYWZmNdXevX2WUDnJC9iglIjMzKx07d3eoWdby83MrGtanWEfMzPropz8zczqkJO/mVkdcvI3M6tDTv5mZnXIyd/MrA45+ZuZ1aGid/WsOklzgCXAcuDdiGjsrFjMzOpNpyX/ZP+IWNjJMZiZ1R0P+5iZ1aHOTP4B3C5phqRRlSpIGiVpuqTpTU1NNQ7PzGzt1ZnJf5+I2A04GPimpH1bVoiIyyKiMSIa+/btW/sIzczWUp2W/CPihfT3JeAmYM/OisXMrN50SvKXtKGkns3TwKeBRzojFjOzetRZV/tsBtwkqTmGqyPitk6Kxcys7nRK8o+Ip4FdOmPbZmbmSz3NzOqSk7+ZWR1y8jczq0NO/mZmdcjJ38ysDjn5r0VOOukk+vXrx+DBgysunzRpEhtttBENDQ00NDTwwx/+cKXly5cvZ9ddd+Wwww6rRbhm1ok6+66eVkUnnHACp556Kscdd1yrdT75yU8yYcKEissuvvhidthhBxYvXlxWiGa2hnDPfy2y77770qdPnw61nTdvHjfffDMnn3xylaMyszWRk3+dmTJlCrvssgsHH3wws2fPfq989OjR/OQnP6FbN+8SZvXA7/Q6sttuuzF37lxmzZrFaaedxpFHHgnAhAkT6NevH7vvvnvnBmhmNePkX0d69epFjx49ADjkkENYtmwZCxcuZPLkyYwfP56BAwdy7LHHctddd/GlL32pk6M1szI5+deRF198kYgAYNq0aaxYsYJNNtmEH/3oR8ybN485c+Ywbtw4DjjgAK666qpOjtbMyuSrfdYiI0eOZNKkSSxcuJAtttiCc845h2XLlgFwyimncMMNN3DppZeyzjrrsMEGGzBu3DjSnVXNrM6ouSe4pmtsbIzp06d3dhhmpRh45s2dHYKtoeZccOhqtZc0IyIaW5bXRc/fbyxrzeq+scy6Ko/5m5nVISd/M7M65ORvZlaHnPzNzOqQk7+ZWR3qtOQv6SBJj0t6UtKZnRWHmVk96pTkL6k78CvgYGBHYKSkHTsjFjOzetRZPf89gScj4umIeAcYBxzRSbGYmdWdzvqS1wDgudz8PGCvlpUkjQJGpdmlkh6vQWz1YFNgYWcHsSbQjzs7AmuF99GkCvvo1pUKOyv5V7qhzAfuMxERlwGXlR9OfZE0vdLXvc3WFN5Hy9dZwz7zgC1z81sAL3RSLGZmdaezkv/fgO0kfUTSesCxwPhOisXMrO50yrBPRLwr6VTgf4HuwJiImN1OM6seD6XZms77aMm6zC2dzcysevwNXzOzOuTkb2ZWh5z8a0jS0hbzJ0i6RFJvSS8r/aaipKGSQtIWaX4jSa9I6tai/dmS3pDUL78NSZtImpkeL0p6Pk0vl/Romn5F0jNp+o5aPP9cjJMk+TK+tYyk/ytptqSH0n61l6QjJP0pV+ffJT2Zmx8h6QMXe6R9ZHpuvjGVfSa3by9Nt4iZKem5XPk7kh5O0xeU/sS7qLr4Ja81XUQskvQisAPwKLA38GD6ex3wcWBqRKyo0HwhcDrwvdz6XgYaIDtAAEsj4qf5RpKuACZExA1VfjpWhyQNBQ4DdouItyVtCqwHPM3KJ2+HAosl9YuIl8j28cmtrLafpIMj4tbmgoj4X7ILRZA0CfhuRKz0+66S5gD7R4S/JNYG9/zXHJPJ3gikvxe1mL+/lXZjgM9L6lPNYCT9SdKM1JMblSs/SNLfJc2SdGcq6yHpd6m39ZCko1P5pyVNSfWvl9SjmjHaGqU/sDAi3gaIiIUR8UJENAGvSfpoqjcAuJFi+/aFwA9KjLmuOfnX1ga5j6YzgR/mlt3P+2+IbYDrgeahkbZ6R0vJDgDfrnKsJ0XE7imGb6WhpL7A5cDREbELcEyq+x/AaxGxc0QMAe5KPb8fAAdGxG7AdOA7VY7R1hy3A1tK+qekX0vaL7fsfmBvSR8DngAeSPPrAEPIvvdTyRTgbUn7lxl4vXLyr603I6Kh+QH8v9yyyWRviI8AcyLiLUCpt7w7MK2N9f4XcLykXlWM9VuSZpG9UbcEtiMbfro3Ip4BiIhXUt0Dye7SSip/NdXdEZicDnTH08o9Rqzri4ilZPvpKKAJuFbSCWlx86favckS+jSye3ntCjye9vXWnId7/6XwmP8aIiKekLQxMILsDQIwAzgReCa9uVpru0jS1cA3qhGLpGFkCX1oRLyRxlbXJ7snU6UvhlQqFzAxIkZWIyZb80XEcmASMEnSw2QH/CvIev6nkX2h8/KIWCJpfWAYrX+ibV7nXZLOJetMWBW5579mmUI2fDMlNz+a1sdE834OfI3qHNA3Al5NiX8Q77/xpgD7pU8n5M4z3A6c2tw4HcQeAPZpHuuV9GFJ21chNlsDSfqYpO1yRQ3A3DT9KLA58EmyCxkAZgKnUGzfPh84oyqB2nuc/Ncsk8mGWJqvXphCNv7f7hskXdlwE/ChKsRxG7COpIeAc8kSOenk3Sjgj2lI6NpU/zxgY0mPpPL9U90TgGvSeh4ABlUhNlsz9QDGpkuJHyIb8jsbILLbCEwlOyG8LNVflX37FrKhJKsi397BzKwOuedvZlaHnPzNzOqQk7+ZWR1y8jczq0NO/mZmdcjJ38ysDjn5m5nVof8PDwGUt6YdpScAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# oho Last HW & SW + Dashboard\n", + "# HW vs SW NTT poly_mult load/throughput comparison\n", + "\n", + "import time\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# -------------------------------------------------------------------\n", + "# Assumptions:\n", + "# - q, n2 already defined (q = 3329, n2 = 256)\n", + "# - poly_mul_hw(a, b) is your HW-backed polymult (DMA + IP), len n2\n", + "# - poly_mul_sw(a, b) is the NTT-based SW polymult (NOT the naive one)\n", + "# - psis, inv_psis, pwmf have been initialised already (for poly_mul_sw)\n", + "# -------------------------------------------------------------------\n", + "\n", + "# Number of operations to benchmark for each implementation\n", + "N_ITERS = 2000 # adjust if this is too slow/fast on your board\n", + "\n", + "def random_poly():\n", + " \"\"\"Sample a random polynomial in Z_q[X]/(X^n2 + 1).\"\"\"\n", + " return np.random.randint(0, q, size=(n2,), dtype=np.int16)\n", + "\n", + "# -------------------------------------------------------------------\n", + "# Warm-up: run each once to avoid one-time setup effects\n", + "# -------------------------------------------------------------------\n", + "a0 = random_poly()\n", + "b0 = random_poly()\n", + "_ = poly_mul_hw(a0, b0)\n", + "_ = poly_mul_sw(a0, b0)\n", + "\n", + "# -------------------------------------------------------------------\n", + "# Benchmark HW (poly_mul_hw)\n", + "# -------------------------------------------------------------------\n", + "start_hw = time.time()\n", + "for _ in range(N_ITERS):\n", + " a = random_poly()\n", + " b = random_poly()\n", + " _ = poly_mul_hw(a, b)\n", + "end_hw = time.time()\n", + "\n", + "hw_time_total = end_hw - start_hw\n", + "hw_throughput = N_ITERS / hw_time_total # ops per second\n", + "hw_latency = hw_time_total / N_ITERS # seconds per op\n", + "\n", + "# -------------------------------------------------------------------\n", + "# Benchmark SW NTT (poly_mul_sw)\n", + "# -------------------------------------------------------------------\n", + "start_sw = time.time()\n", + "for _ in range(N_ITERS):\n", + " a = random_poly()\n", + " b = random_poly()\n", + " _ = poly_mul_sw(a, b)\n", + "end_sw = time.time()\n", + "\n", + "sw_time_total = end_sw - start_sw\n", + "sw_throughput = N_ITERS / sw_time_total\n", + "sw_latency = sw_time_total / N_ITERS\n", + "\n", + "# -------------------------------------------------------------------\n", + "# Text summary\n", + "# -------------------------------------------------------------------\n", + "speedup = hw_throughput / sw_throughput\n", + "\n", + "print(\"=== PolyMult HW vs SW NTT benchmark ===\")\n", + "print(f\"N_ITERS : {N_ITERS}\")\n", + "print()\n", + "print(\"Hardware (NTT accelerator via DMA):\")\n", + "print(f\" Total time : {hw_time_total:8.3f} s\")\n", + "print(f\" Throughput : {hw_throughput:8.2f} poly_mult/s\")\n", + "print(f\" Latency per op : {hw_latency*1e3:8.3f} ms\")\n", + "print()\n", + "print(\"Software (NTT in Python/NumPy):\")\n", + "print(f\" Total time : {sw_time_total:8.3f} s\")\n", + "print(f\" Throughput : {sw_throughput:8.2f} poly_mult/s\")\n", + "print(f\" Latency per op : {sw_latency*1e3:8.3f} ms\")\n", + "print()\n", + "print(f\"Speedup (HW / SW NTT): {speedup:8.1f}×\")\n", + "\n", + "# -------------------------------------------------------------------\n", + "# Visual comparison: throughput and latency\n", + "# -------------------------------------------------------------------\n", + "labels = [\"HW NTT accel\", \"SW NTT\"]\n", + "throughput = [hw_throughput, sw_throughput]\n", + "latency_ms = [hw_latency * 1e3, sw_latency * 1e3]\n", + "\n", + "# Throughput bar chart\n", + "fig1, ax1 = plt.subplots()\n", + "ax1.bar(labels, throughput)\n", + "ax1.set_ylabel(\"poly_mult per second\")\n", + "ax1.set_title(\"PolyMult throughput: HW accelerator vs SW NTT\")\n", + "for i, v in enumerate(throughput):\n", + " ax1.text(i, v, f\"{v:.0f}\", ha=\"center\", va=\"bottom\")\n", + "plt.show()\n", + "\n", + "# Latency bar chart\n", + "fig2, ax2 = plt.subplots()\n", + "ax2.bar(labels, latency_ms)\n", + "ax2.set_ylabel(\"Latency per poly_mult [ms]\")\n", + "ax2.set_title(\"PolyMult latency: HW accelerator vs SW NTT\")\n", + "for i, v in enumerate(latency_ms):\n", + " ax2.text(i, v, f\"{v:.2f}\", ha=\"center\", va=\"bottom\")\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ced1330", + "metadata": {}, + "outputs": [], + "source": [ + "##############################################" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aad9d98c", + "metadata": {}, + "outputs": [], + "source": [ + "##############################################" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93864394", + "metadata": {}, + "outputs": [], + "source": [ + "##############################################" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "d3df6736", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Single roundtrip with current settings:\n", + "USE_COMPRESSION=True, NOISE_DIV_KEY=3, NOISE_DIV_ENC=3\n", + "Decryption successful? True\n", + "\n", + "Monte-Carlo over 20 trials:\n", + "USE_COMPRESSION=True, NOISE_DIV_KEY=3, NOISE_DIV_ENC=3\n", + "Trials: 20, successes: 20, failures: 0, failure rate ≈ 0.0000\n" + ] + } + ], + "source": [ + "# oho K\n", + "# Experiment K - selfcontained tunable solution\n", + "\n", + "# (success only with both noise params set to 3 !)\n", + "\n", + "import numpy as np\n", + "import math\n", + "\n", + "# =========================================================\n", + "# GLOBAL TUNING KNOBS (EDIT THESE LINES)\n", + "# =========================================================\n", + "USE_COMPRESSION = True # True = use compress/decompress(u,v); False = bypass\n", + "NOISE_DIV_KEY = 3 # noise scaling for s,e in keygen (1 = original)\n", + "NOISE_DIV_ENC = 3 # noise scaling for r,e1,e2 in encrypt (1 = original)\n", + "# =========================================================\n", + "# Assumes: q, n2, du, dv, eta1, eta2, k, cbd_vector, cbd,\n", + "# compress, decompress, poly_mul_hw are already defined.\n", + "# =========================================================\n", + "\n", + "\n", + "# ---------------------------------------------------------\n", + "# Utility: integer noise scaling\n", + "# ---------------------------------------------------------\n", + "def scale_noise(x, div: int):\n", + " \"\"\"\n", + " Scale integer noise by integer divisor 'div' with truncation toward 0.\n", + " div = 1 -> original noise\n", + " div = 2 -> roughly half magnitude, etc.\n", + " \"\"\"\n", + " if div == 1:\n", + " return x.astype(np.int16)\n", + " x = np.asarray(x, dtype=np.int32)\n", + " return (np.sign(x) * (np.abs(x) // div)).astype(np.int16)\n", + "\n", + "\n", + "# ---------------------------------------------------------\n", + "# HW-based Kyber-style routines using global knobs\n", + "# ---------------------------------------------------------\n", + "def key_gen2_hw_cellYY1():\n", + " \"\"\"\n", + " Key generation using hardware poly_mult and global NOISE_DIV_KEY.\n", + " \"\"\"\n", + " # a: public matrix, shape (k, k, n2)\n", + " a = np.random.randint(q, size=(k, k, n2), dtype=np.int16)\n", + "\n", + " # raw noise\n", + " s_raw = cbd_vector(n2, eta1, k) # (k, n2)\n", + " e_raw = cbd_vector(n2, eta1, k) # (k, n2)\n", + "\n", + " # scaled noise\n", + " s = scale_noise(s_raw, NOISE_DIV_KEY)\n", + " e = scale_noise(e_raw, NOISE_DIV_KEY)\n", + "\n", + " b0 = (poly_mul_hw(a[0,0], s[0]) + e[0]) % q\n", + " b1 = (poly_mul_hw(a[0,1], s[1]) + e[1]) % q\n", + " b2 = (poly_mul_hw(a[1,0], s[0]) + e[0]) % q\n", + " b3 = (poly_mul_hw(a[1,1], s[1]) + e[1]) % q\n", + "\n", + " b01 = (b0 + b1) % q\n", + " b23 = (b2 + b3) % q\n", + " b = np.array([b01, b23], dtype=np.int16)\n", + " return s, a, b\n", + "\n", + "\n", + "def encrypt2_hw_cellYY1(a, b, m):\n", + " \"\"\"\n", + " Encryption using hardware poly_mult and global NOISE_DIV_ENC.\n", + " Compression controlled by global USE_COMPRESSION.\n", + " \"\"\"\n", + " # raw noise\n", + " r_raw = cbd_vector(n2, eta1, k) # (k, n2)\n", + " e1_raw = cbd_vector(n2, eta2, k) # (k, n2)\n", + " e2_raw = cbd(n2, eta2) # (n2,)\n", + "\n", + " # scaled noise\n", + " r = scale_noise(r_raw, NOISE_DIV_ENC)\n", + " e1 = scale_noise(e1_raw, NOISE_DIV_ENC)\n", + " e2 = scale_noise(e2_raw, NOISE_DIV_ENC)\n", + "\n", + " # u part\n", + " u0 = (poly_mul_hw(a[0,0], r[0]) + e1[0]) % q\n", + " u1 = (poly_mul_hw(a[1,0], r[1]) + e1[1]) % q\n", + " u2 = (poly_mul_hw(a[0,1], r[0]) + e1[0]) % q\n", + " u3 = (poly_mul_hw(a[1,1], r[1]) + e1[1]) % q\n", + "\n", + " u01 = (u0 + u1) % q\n", + " u23 = (u2 + u3) % q\n", + " u = np.array([u01, u23], dtype=np.int16)\n", + "\n", + " # v part\n", + " v0 = poly_mul_hw(b[0], r[0]) % q\n", + " v1 = poly_mul_hw(b[1], r[1]) % q\n", + " v = (v0 + v1 + e2 + m) % q\n", + "\n", + " if USE_COMPRESSION:\n", + " u_c = compress(u, q, du)\n", + " v_c = compress(v, q, dv)\n", + " else:\n", + " u_c = u.astype(np.int16)\n", + " v_c = v.astype(np.int16)\n", + "\n", + " return u_c, v_c\n", + "\n", + "\n", + "def decrypt2_hw_cellYY1(s, u, v):\n", + " \"\"\"\n", + " Decryption using hardware poly_mult.\n", + " Compression controlled by global USE_COMPRESSION.\n", + " \"\"\"\n", + " if USE_COMPRESSION:\n", + " u_dec = decompress(u, q, du)\n", + " v_dec = decompress(v, q, dv)\n", + " else:\n", + " u_dec = u.astype(np.int16)\n", + " v_dec = v.astype(np.int16)\n", + "\n", + " p0 = poly_mul_hw(s[0], u_dec[0]) % q\n", + " p1 = poly_mul_hw(s[1], u_dec[1]) % q\n", + " p = (p0 + p1) % q\n", + " d = (v_dec - p) % q\n", + " return d\n", + "\n", + "\n", + "# ---------------------------------------------------------\n", + "# Roundtrip and Monte-Carlo\n", + "# ---------------------------------------------------------\n", + "def roundtrip_once(verbose: bool = True):\n", + " \"\"\"\n", + " One Decrypt(Encrypt(m)) round using global knobs.\n", + " Returns True/False for bitwise equality.\n", + " \"\"\"\n", + " # random binary message\n", + " m = np.random.randint(2, size=(n2,))\n", + " ms = decompress(m, q, 1) # same embedding as before\n", + "\n", + " s, a, b = key_gen2_hw_cellYY1()\n", + " u, v = encrypt2_hw_cellYY1(a, b, ms)\n", + " d = decrypt2_hw_cellYY1(s, u, v)\n", + "\n", + " th1 = math.floor(q/4)\n", + " th3 = math.floor(3*q/4)\n", + " md = np.array([1 if (th1 < x < th3) else 0 for x in d], dtype=np.int8)\n", + "\n", + " ok = np.array_equal(m, md)\n", + "\n", + " if verbose:\n", + " print(f\"USE_COMPRESSION={USE_COMPRESSION}, \"\n", + " f\"NOISE_DIV_KEY={NOISE_DIV_KEY}, NOISE_DIV_ENC={NOISE_DIV_ENC}\")\n", + " print(\"Decryption successful? \", ok)\n", + " if not ok:\n", + " idx = np.where(m != md)[0][0]\n", + " print(\"First mismatch at index\", idx)\n", + " print(\"m[idx] =\", int(m[idx]), \" md[idx] =\", int(md[idx]))\n", + " return ok\n", + "\n", + "\n", + "def monte_carlo_roundtrip(trials: int = 50):\n", + " \"\"\"\n", + " Run multiple roundtrips with current global knobs;\n", + " print empirical failure rate.\n", + " \"\"\"\n", + " successes = 0\n", + " for t in range(trials):\n", + " if roundtrip_once(verbose=False):\n", + " successes += 1\n", + " failures = trials - successes\n", + " failure_rate = failures / trials\n", + " print(f\"USE_COMPRESSION={USE_COMPRESSION}, \"\n", + " f\"NOISE_DIV_KEY={NOISE_DIV_KEY}, NOISE_DIV_ENC={NOISE_DIV_ENC}\")\n", + " print(f\"Trials: {trials}, successes: {successes}, \"\n", + " f\"failures: {failures}, failure rate ≈ {failure_rate:.4f}\")\n", + "\n", + "\n", + "# ---------------------------------------------------------\n", + "# Example usage (you can comment these out)\n", + "# ---------------------------------------------------------\n", + "print(\"Single roundtrip with current settings:\")\n", + "roundtrip_once()\n", + "\n", + "print(\"\\nMonte-Carlo over 20 trials:\")\n", + "monte_carlo_roundtrip(trials=20)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "7fb23af6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Single roundtrip with current global settings:\n", + "USE_COMPRESSION=True, NOISE_DIV_KEY=3, NOISE_DIV_ENC=3\n", + "Decryption successful? True\n", + "\n", + "Monte-Carlo over 20 trials:\n", + "USE_COMPRESSION=True, NOISE_DIV_KEY=3, NOISE_DIV_ENC=3\n", + "Trials: 20, successes: 20, failures: 0, failure rate ≈ 0.0000\n" + ] + } + ], + "source": [ + "# oho O-1\n", + "# Variante 1 von Experiment O\n", + "\n", + "# tunable experiment closer to ML-KEM 512 instead of Kyber Variant of mine\n", + "\n", + "# (success only with both noise params set to 4)\n", + "\n", + "import numpy as np\n", + "import math\n", + "\n", + "# =========================================================\n", + "# ASSUMPTIONS: these come from your existing notebook\n", + "# =========================================================\n", + "# q : modulus (should be 3329 for Kyber512)\n", + "# n2 : polynomial length (should be 256)\n", + "# k : module rank (should be 2)\n", + "# du,dv: compression parameters\n", + "# eta1,eta2: CBD parameters\n", + "# cbd_vector, cbd, compress, decompress: existing functions\n", + "# poly_mul_hw: your hardware polymult wrapper (DMA + IP)\n", + "#\n", + "# We assert some basics to avoid silent mismatches.\n", + "# =========================================================\n", + "\n", + "try:\n", + " assert q == 3329\n", + " assert n2 == 256\n", + " assert k == 2\n", + "except NameError:\n", + " print(\"WARNING: q, n2, k not defined yet in this notebook.\")\n", + "except AssertionError:\n", + " print(\"WARNING: q/n2/k do not match Kyber-512-style values (q=3329, n2=256, k=2).\")\n", + "\n", + "\n", + "# =========================================================\n", + "# GLOBAL TUNING KNOBS (THIS IS WHAT YOU EDIT)\n", + "# =========================================================\n", + "USE_COMPRESSION = True # True: use compress/decompress(u,v); False: bypass\n", + "NOISE_DIV_KEY = 3 # integer divisor for s,e in keygen (1 = spec-ish, 3 = your working setting)\n", + "NOISE_DIV_ENC = 3 # integer divisor for r,e1,e2 in encrypt (1 = spec-ish, 3 = your working setting)\n", + "# =========================================================\n", + "\n", + "\n", + "# ---------------------------------------------------------\n", + "# Utility: integer noise scaling\n", + "# ---------------------------------------------------------\n", + "def scale_noise(x, div: int):\n", + " \"\"\"\n", + " Scale integer noise by integer divisor 'div' with truncation toward 0.\n", + "\n", + " - div = 1 -> original noise distribution.\n", + " - div = 2 -> approximately halves the magnitude of samples.\n", + " - div = 3 -> ~one-third, etc.\n", + "\n", + " This keeps sign, and works on scalars or arrays.\n", + " \"\"\"\n", + " x = np.asarray(x, dtype=np.int32)\n", + " if div == 1:\n", + " return x.astype(np.int16)\n", + " return (np.sign(x) * (np.abs(x) // div)).astype(np.int16)\n", + "\n", + "\n", + "# ---------------------------------------------------------\n", + "# HW-based Kyber-style routines using the global knobs\n", + "# ---------------------------------------------------------\n", + "def key_gen2_hw_cellZZA():\n", + " \"\"\"\n", + " Key generation using hardware poly_mult and global NOISE_DIV_KEY.\n", + "\n", + " Structure is Kyber-512-like:\n", + " - a ~ U(Z_q)^{k×k×n2}\n", + " - s,e from CBD(eta1) scaled by NOISE_DIV_KEY\n", + " - b = As + e (with the same algebraic formula as your original code).\n", + " \"\"\"\n", + " # public matrix a: shape (k, k, n2)\n", + " a = np.random.randint(q, size=(k, k, n2), dtype=np.int16)\n", + "\n", + " # raw noise\n", + " s_raw = cbd_vector(n2, eta1, k) # shape (k, n2)\n", + " e_raw = cbd_vector(n2, eta1, k) # shape (k, n2)\n", + "\n", + " # scaled noise according to global divisor\n", + " s = scale_noise(s_raw, NOISE_DIV_KEY)\n", + " e = scale_noise(e_raw, NOISE_DIV_KEY)\n", + "\n", + " # same structure as original key_gen2 (but with HW multiply)\n", + " b0 = (poly_mul_hw(a[0,0], s[0]) + e[0]) % q\n", + " b1 = (poly_mul_hw(a[0,1], s[1]) + e[1]) % q\n", + " b2 = (poly_mul_hw(a[1,0], s[0]) + e[0]) % q\n", + " b3 = (poly_mul_hw(a[1,1], s[1]) + e[1]) % q\n", + "\n", + " b01 = (b0 + b1) % q\n", + " b23 = (b2 + b3) % q\n", + " b = np.array([b01, b23], dtype=np.int16)\n", + " return s, a, b\n", + "\n", + "\n", + "def encrypt2_hw_cellZZA(a, b, m):\n", + " \"\"\"\n", + " Encryption using hardware poly_mult and global NOISE_DIV_ENC.\n", + "\n", + " - a, b: as from key_gen2_hw\n", + " - m : message polynomial (length n2), already embedded in Z_q\n", + " - noise (r, e1, e2) scaled by NOISE_DIV_ENC\n", + " - compression controlled by USE_COMPRESSION\n", + " \"\"\"\n", + " # raw noise\n", + " r_raw = cbd_vector(n2, eta1, k) # (k, n2)\n", + " e1_raw = cbd_vector(n2, eta2, k) # (k, n2)\n", + " e2_raw = cbd(n2, eta2) # (n2,)\n", + "\n", + " # scaled noise\n", + " r = scale_noise(r_raw, NOISE_DIV_ENC)\n", + " e1 = scale_noise(e1_raw, NOISE_DIV_ENC)\n", + " e2 = scale_noise(e2_raw, NOISE_DIV_ENC)\n", + "\n", + " # u part (matrix * r + e1)\n", + " u0 = (poly_mul_hw(a[0,0], r[0]) + e1[0]) % q\n", + " u1 = (poly_mul_hw(a[1,0], r[1]) + e1[1]) % q\n", + " u2 = (poly_mul_hw(a[0,1], r[0]) + e1[0]) % q\n", + " u3 = (poly_mul_hw(a[1,1], r[1]) + e1[1]) % q\n", + "\n", + " u01 = (u0 + u1) % q\n", + " u23 = (u2 + u3) % q\n", + " u = np.array([u01, u23], dtype=np.int16)\n", + "\n", + " # v part (b*r + e2 + m)\n", + " v0 = poly_mul_hw(b[0], r[0]) % q\n", + " v1 = poly_mul_hw(b[1], r[1]) % q\n", + " v = (v0 + v1 + e2 + m) % q\n", + "\n", + " if USE_COMPRESSION:\n", + " u_c = compress(u, q, du)\n", + " v_c = compress(v, q, dv)\n", + " else:\n", + " u_c = u.astype(np.int16)\n", + " v_c = v.astype(np.int16)\n", + "\n", + " return u_c, v_c\n", + "\n", + "\n", + "def decrypt2_hw_cellZZA(s, u, v):\n", + " \"\"\"\n", + " Decryption using hardware poly_mult.\n", + "\n", + " - s : secret key vector (k × n2)\n", + " - u,v: ciphertext components (compressed or not)\n", + " - compression controlled by USE_COMPRESSION\n", + " \"\"\"\n", + " if USE_COMPRESSION:\n", + " u_dec = decompress(u, q, du)\n", + " v_dec = decompress(v, q, dv)\n", + " else:\n", + " u_dec = u.astype(np.int16)\n", + " v_dec = v.astype(np.int16)\n", + "\n", + " p0 = poly_mul_hw(s[0], u_dec[0]) % q\n", + " p1 = poly_mul_hw(s[1], u_dec[1]) % q\n", + " p = (p0 + p1) % q\n", + " d = (v_dec - p) % q\n", + " return d\n", + "\n", + "\n", + "# ---------------------------------------------------------\n", + "# Roundtrip and Monte-Carlo with these parameters\n", + "# ---------------------------------------------------------\n", + "def roundtrip_once(verbose: bool = True):\n", + " \"\"\"\n", + " One Decrypt(Encrypt(m)) round using the current global knobs.\n", + "\n", + " - draws m uniformly in {0,1}^n2\n", + " - embeds it via decompress(m, q, 1) (as in your original code)\n", + " - runs key_gen2_hw / encrypt2_hw / decrypt2_hw\n", + " - decodes d with the usual Kyber-ish threshold rule (q/4, 3q/4)\n", + "\n", + " Returns True if m == md bitwise.\n", + " \"\"\"\n", + " # random binary message\n", + " m = np.random.randint(2, size=(n2,))\n", + " ms = decompress(m, q, 1) # your existing embedding\n", + "\n", + " s, a, b = key_gen2_hw_cellZZA()\n", + " u, v = encrypt2_hw_cellZZA(a, b, ms)\n", + " d = decrypt2_hw_cellZZA(s, u, v)\n", + "\n", + " th1 = math.floor(q / 4)\n", + " th3 = math.floor(3 * q / 4)\n", + " md = np.array([1 if (th1 < x < th3) else 0 for x in d], dtype=np.int8)\n", + "\n", + " ok = np.array_equal(m, md)\n", + "\n", + " if verbose:\n", + " print(f\"USE_COMPRESSION={USE_COMPRESSION}, \"\n", + " f\"NOISE_DIV_KEY={NOISE_DIV_KEY}, NOISE_DIV_ENC={NOISE_DIV_ENC}\")\n", + " print(\"Decryption successful? \", ok)\n", + " if not ok:\n", + " idx = np.where(m != md)[0][0]\n", + " print(\"First mismatch at index\", idx)\n", + " print(\"m[idx] =\", int(m[idx]))\n", + " print(\"md[idx] =\", int(md[idx]))\n", + " return ok\n", + "\n", + "\n", + "def monte_carlo_roundtrip(trials: int = 50):\n", + " \"\"\"\n", + " Run multiple roundtrips with current global knobs;\n", + " print empirical failure rate.\n", + "\n", + " This is your basic diagnostic for whether a given\n", + " (USE_COMPRESSION, NOISE_DIV_KEY, NOISE_DIV_ENC) triple\n", + " is \"safe enough\" with your hardware multiplier.\n", + " \"\"\"\n", + " successes = 0\n", + " for t in range(trials):\n", + " if roundtrip_once(verbose=False):\n", + " successes += 1\n", + " failures = trials - successes\n", + " failure_rate = failures / trials\n", + " print(f\"USE_COMPRESSION={USE_COMPRESSION}, \"\n", + " f\"NOISE_DIV_KEY={NOISE_DIV_KEY}, NOISE_DIV_ENC={NOISE_DIV_ENC}\")\n", + " print(f\"Trials: {trials}, successes: {successes}, \"\n", + " f\"failures: {failures}, failure rate ≈ {failure_rate:.4f}\")\n", + "\n", + "\n", + "# ---------------------------------------------------------\n", + "# Example usage with current presets\n", + "# ---------------------------------------------------------\n", + "print(\"Single roundtrip with current global settings:\")\n", + "roundtrip_once()\n", + "\n", + "print(\"\\nMonte-Carlo over 20 trials:\")\n", + "monte_carlo_roundtrip(trials=20)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "0c35d97b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Single roundtrip with current global settings:\n", + "USE_COMPRESSION=True, ETA1=3, ETA2=2, DU=10, DV=4, NOISE_DIV_KEY=4, NOISE_DIV_ENC=4\n", + "Decryption successful? True\n", + "\n", + "Monte-Carlo over 20 trials:\n", + "USE_COMPRESSION=True, ETA1=3, ETA2=2, DU=10, DV=4, NOISE_DIV_KEY=4, NOISE_DIV_ENC=4\n", + "Trials: 20, successes: 20, failures: 0, failure rate ≈ 0.0000\n" + ] + } + ], + "source": [ + "# oho O-2\n", + "# Variante 2 des O Experiments\n", + "\n", + "# closest to ML-KEM\n", + "\n", + "# (success only with both noise params set to 4)\n", + "\n", + "import numpy as np\n", + "import math\n", + "\n", + "# =========================================================\n", + "# ASSUMPTIONS: these already exist in your notebook\n", + "# =========================================================\n", + "# - q : modulus (should be 3329 for Kyber512)\n", + "# - n2 : polynomial length (should be 256)\n", + "# - k : module rank (should be 2)\n", + "# - cbd_vector : cbd_vector(n, eta, k)\n", + "# - cbd : cbd(n, eta)\n", + "# - compress : compress(poly, q, d)\n", + "# - decompress : decompress(poly, q, d)\n", + "# - poly_mul_hw: hardware polymult wrapper (DMA + IP)\n", + "#\n", + "# If they don't, define/import them before running this cell.\n", + "# =========================================================\n", + "\n", + "# Basic sanity checks (won't stop execution, just warn)\n", + "try:\n", + " if q != 3329:\n", + " print(f\"WARNING: q={q}, expected 3329 for Kyber512/ML-KEM-512.\")\n", + " if n2 != 256:\n", + " print(f\"WARNING: n2={n2}, expected 256.\")\n", + " if k != 2:\n", + " print(f\"WARNING: k={k}, expected 2 (Kyber512/ML-KEM-512 style).\")\n", + "except NameError:\n", + " print(\"WARNING: q, n2, k are not all defined before this cell.\")\n", + "\n", + "\n", + "# =========================================================\n", + "# GLOBAL TUNING KNOBS (THIS IS WHAT YOU EDIT)\n", + "# =========================================================\n", + "# 1) Core ML-KEM / Kyber-style parameters\n", + "ETA1 = 3 # eta1 (Kyber512/ML-KEM-512: 3)\n", + "ETA2 = 2 # eta2 (Kyber512/ML-KEM-512: 2)\n", + "DU = 10 # du (Kyber512/ML-KEM-512: 10)\n", + "DV = 4 # dv (Kyber512/ML-KEM-512: 4)\n", + "\n", + "# 2) Extra “safety” scaling for noise (beyond eta1/eta2)\n", + "USE_COMPRESSION = True # True: compress/decompress u,v with DU,DV; False: bypass\n", + "NOISE_DIV_KEY = 4 # divisor for s,e in keygen (1 = spec-like, 3 = your current stable setting)\n", + "NOISE_DIV_ENC = 4 # divisor for r,e1,e2 in encrypt (1 = spec-like, 3 = your current stable setting)\n", + "# =========================================================\n", + "\n", + "\n", + "# ---------------------------------------------------------\n", + "# Utility: integer noise scaling\n", + "# ---------------------------------------------------------\n", + "def scale_noise(x, div: int):\n", + " \"\"\"\n", + " Scale integer noise by integer divisor 'div' with truncation toward 0.\n", + "\n", + " - div = 1 -> original CBD(eta) noise distribution.\n", + " - div = 2 -> approximately halves magnitude.\n", + " - div = 3 -> ~one-third, etc.\n", + " \"\"\"\n", + " x = np.asarray(x, dtype=np.int32)\n", + " if div == 1:\n", + " return x.astype(np.int16)\n", + " return (np.sign(x) * (np.abs(x) // div)).astype(np.int16)\n", + "\n", + "\n", + "# ---------------------------------------------------------\n", + "# HW-based Kyber-style routines using the global knobs\n", + "# ---------------------------------------------------------\n", + "def key_gen2_hw_cellZZB():\n", + " \"\"\"\n", + " Key generation using hardware poly_mult and global parameters:\n", + "\n", + " - Ring: Z_q[X]/(X^256 + 1), q=3329 (assumed).\n", + " - k = 2 module dimension (Kyber512/ML-KEM-512 style).\n", + " - a ~ U(Z_q)^{k×k×n2}.\n", + " - s,e ~ CBD(ETA1), then scaled by NOISE_DIV_KEY.\n", + "\n", + " Returns (s, a, b) with b = A*s + e.\n", + " \"\"\"\n", + " # public matrix a: shape (k, k, n2)\n", + " a = np.random.randint(q, size=(k, k, n2), dtype=np.int16)\n", + "\n", + " # raw noise\n", + " s_raw = cbd_vector(n2, ETA1, k) # shape (k, n2)\n", + " e_raw = cbd_vector(n2, ETA1, k) # shape (k, n2)\n", + "\n", + " # scaled noise according to global divisor\n", + " s = scale_noise(s_raw, NOISE_DIV_KEY)\n", + " e = scale_noise(e_raw, NOISE_DIV_KEY)\n", + "\n", + " # same structure as your original key_gen2, but with HW multiply\n", + " b0 = (poly_mul_hw(a[0,0], s[0]) + e[0]) % q\n", + " b1 = (poly_mul_hw(a[0,1], s[1]) + e[1]) % q\n", + " b2 = (poly_mul_hw(a[1,0], s[0]) + e[0]) % q\n", + " b3 = (poly_mul_hw(a[1,1], s[1]) + e[1]) % q\n", + "\n", + " b01 = (b0 + b1) % q\n", + " b23 = (b2 + b3) % q\n", + " b = np.array([b01, b23], dtype=np.int16)\n", + " return s, a, b\n", + "\n", + "\n", + "def encrypt2_hw_cellZZB(a, b, m):\n", + " \"\"\"\n", + " Encryption using hardware poly_mult and global parameters:\n", + "\n", + " - a, b as from key_gen2_hw.\n", + " - message m: length-n2 polynomial already embedded in Z_q.\n", + " - r ~ CBD(ETA1), e1 ~ CBD(ETA2), e2 ~ CBD(ETA2), then all\n", + " scaled by NOISE_DIV_ENC.\n", + " - DU, DV control compression of u and v, if USE_COMPRESSION is True.\n", + " \"\"\"\n", + " # raw noise\n", + " r_raw = cbd_vector(n2, ETA1, k) # (k, n2)\n", + " e1_raw = cbd_vector(n2, ETA2, k) # (k, n2)\n", + " e2_raw = cbd(n2, ETA2) # (n2,)\n", + "\n", + " # scaled noise\n", + " r = scale_noise(r_raw, NOISE_DIV_ENC)\n", + " e1 = scale_noise(e1_raw, NOISE_DIV_ENC)\n", + " e2 = scale_noise(e2_raw, NOISE_DIV_ENC)\n", + "\n", + " # u part: A * r + e1\n", + " u0 = (poly_mul_hw(a[0,0], r[0]) + e1[0]) % q\n", + " u1 = (poly_mul_hw(a[1,0], r[1]) + e1[1]) % q\n", + " u2 = (poly_mul_hw(a[0,1], r[0]) + e1[0]) % q\n", + " u3 = (poly_mul_hw(a[1,1], r[1]) + e1[1]) % q\n", + "\n", + " u01 = (u0 + u1) % q\n", + " u23 = (u2 + u3) % q\n", + " u = np.array([u01, u23], dtype=np.int16)\n", + "\n", + " # v part: b * r + e2 + m\n", + " v0 = poly_mul_hw(b[0], r[0]) % q\n", + " v1 = poly_mul_hw(b[1], r[1]) % q\n", + " v = (v0 + v1 + e2 + m) % q\n", + "\n", + " if USE_COMPRESSION:\n", + " u_c = compress(u, q, DU)\n", + " v_c = compress(v, q, DV)\n", + " else:\n", + " u_c = u.astype(np.int16)\n", + " v_c = v.astype(np.int16)\n", + "\n", + " return u_c, v_c\n", + "\n", + "\n", + "def decrypt2_hw_cellZZB(s, u, v):\n", + " \"\"\"\n", + " Decryption using hardware poly_mult and global parameters:\n", + "\n", + " - s: secret key (k × n2)\n", + " - (u, v): ciphertext components, compressed with (DU, DV)\n", + " if USE_COMPRESSION is True.\n", + "\n", + " Decryption computes p = s·u and d = v - p (mod q).\n", + " \"\"\"\n", + " if USE_COMPRESSION:\n", + " u_dec = decompress(u, q, DU)\n", + " v_dec = decompress(v, q, DV)\n", + " else:\n", + " u_dec = u.astype(np.int16)\n", + " v_dec = v.astype(np.int16)\n", + "\n", + " p0 = poly_mul_hw(s[0], u_dec[0]) % q\n", + " p1 = poly_mul_hw(s[1], u_dec[1]) % q\n", + " p = (p0 + p1) % q\n", + " d = (v_dec - p) % q\n", + " return d\n", + "\n", + "\n", + "# ---------------------------------------------------------\n", + "# Roundtrip and Monte-Carlo\n", + "# ---------------------------------------------------------\n", + "def roundtrip_once(verbose: bool = True):\n", + " \"\"\"\n", + " One Decrypt(Encrypt(m)) round using the current global knobs.\n", + "\n", + " - m ∈ {0,1}^n2 uniformly random.\n", + " - Embedding via decompress(m, q, 1) (your original convention).\n", + " - Uses key_gen2_hw / encrypt2_hw / decrypt2_hw.\n", + " - Decoding via threshold rule: 0 if in [0, q/4] ∪ [3q/4, q),\n", + " 1 if in (q/4, 3q/4).\n", + "\n", + " Returns True if m == md bitwise.\n", + " \"\"\"\n", + " # random binary message\n", + " m = np.random.randint(2, size=(n2,))\n", + " # message embedding (this is separate from DU/DV and ETA*)\n", + " ms = decompress(m, q, 1)\n", + "\n", + " s, a, b = key_gen2_hw_cellZZB()\n", + " u, v = encrypt2_hw_cellZZB(a, b, ms)\n", + " d = decrypt2_hw_cellZZB(s, u, v)\n", + "\n", + " th1 = math.floor(q / 4)\n", + " th3 = math.floor(3 * q / 4)\n", + " md = np.array([1 if (th1 < x < th3) else 0 for x in d], dtype=np.int8)\n", + "\n", + " ok = np.array_equal(m, md)\n", + "\n", + " if verbose:\n", + " print(f\"USE_COMPRESSION={USE_COMPRESSION}, \"\n", + " f\"ETA1={ETA1}, ETA2={ETA2}, DU={DU}, DV={DV}, \"\n", + " f\"NOISE_DIV_KEY={NOISE_DIV_KEY}, NOISE_DIV_ENC={NOISE_DIV_ENC}\")\n", + " print(\"Decryption successful? \", ok)\n", + " if not ok:\n", + " idx = np.where(m != md)[0][0]\n", + " print(\"First mismatch at index\", idx)\n", + " print(\"m[idx] =\", int(m[idx]))\n", + " print(\"md[idx] =\", int(md[idx]))\n", + " return ok\n", + "\n", + "\n", + "def monte_carlo_roundtrip(trials: int = 50):\n", + " \"\"\"\n", + " Run multiple roundtrips with current global knobs;\n", + " print empirical failure rate.\n", + "\n", + " This tells you how stable a given parameter set is with your\n", + " hardware multiplier.\n", + " \"\"\"\n", + " successes = 0\n", + " for t in range(trials):\n", + " if roundtrip_once(verbose=False):\n", + " successes += 1\n", + " failures = trials - successes\n", + " failure_rate = failures / trials\n", + " print(f\"USE_COMPRESSION={USE_COMPRESSION}, \"\n", + " f\"ETA1={ETA1}, ETA2={ETA2}, DU={DU}, DV={DV}, \"\n", + " f\"NOISE_DIV_KEY={NOISE_DIV_KEY}, NOISE_DIV_ENC={NOISE_DIV_ENC}\")\n", + " print(f\"Trials: {trials}, successes: {successes}, \"\n", + " f\"failures: {failures}, failure rate ≈ {failure_rate:.4f}\")\n", + "\n", + "\n", + "# ---------------------------------------------------------\n", + "# Example usage with current presets\n", + "# ---------------------------------------------------------\n", + "print(\"Single roundtrip with current global settings:\")\n", + "roundtrip_once()\n", + "\n", + "print(\"\\nMonte-Carlo over 20 trials:\")\n", + "monte_carlo_roundtrip(trials=20)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "2c2a0c6e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Single roundtrip with current global settings:\n", + "USE_COMPRESSION=True, ETA1=3, ETA2=2, DU=10, DV=4, NOISE_DIV_KEY=4, NOISE_DIV_ENC=4\n", + "Decryption successful? True\n", + "\n", + "Monte-Carlo over 20 trials:\n", + "USE_COMPRESSION=True, ETA1=3, ETA2=2, DU=10, DV=4, NOISE_DIV_KEY=4, NOISE_DIV_ENC=4\n", + "Trials: 20, successes: 20, failures: 0, failure rate ≈ 0.0000\n" + ] + } + ], + "source": [ + "# oho O-3\n", + "# Variante 3 des O Experiments\n", + "\n", + "# (again, Noise Params must be set to 4 each)\n", + "\n", + "import numpy as np\n", + "import math\n", + "\n", + "# =========================================================\n", + "# ASSUMPTIONS: these exist in your notebook already\n", + "# =========================================================\n", + "# q : modulus (int) -> should be 3329\n", + "# n2 : polynomial length -> should be 256\n", + "# k : module rank -> should be 2\n", + "# cbd_vector : cbd_vector(n, eta, k)\n", + "# cbd : cbd(n, eta)\n", + "# compress : compress(poly, q, d)\n", + "# decompress : decompress(poly, q, d)\n", + "# poly_mul_hw: hardware polymult wrapper (DMA + IP), signature:\n", + "# poly_mul_hw(poly_a: np.ndarray, poly_b: np.ndarray) -> np.ndarray (len n2, mod q)\n", + "# =========================================================\n", + "\n", + "# Sanity checks (warn only)\n", + "try:\n", + " if q != 3329:\n", + " print(f\"WARNING: q={q}, expected 3329 for Kyber512/ML-KEM-512.\")\n", + " if n2 != 256:\n", + " print(f\"WARNING: n2={n2}, expected 256.\")\n", + " if k != 2:\n", + " print(f\"WARNING: k={k}, expected 2 (Kyber512-style).\")\n", + "except NameError:\n", + " print(\"WARNING: q, n2, k not all defined before this cell.\")\n", + "\n", + "\n", + "# =========================================================\n", + "# GLOBAL TUNING KNOBS (EDIT ONLY THIS BLOCK)\n", + "# =========================================================\n", + "# 1) Kyber / ML-KEM-style parameters\n", + "ETA1 = 3 # Kyber512/ML-KEM-512: eta1 = 3\n", + "ETA2 = 2 # Kyber512/ML-KEM-512: eta2 = 2\n", + "DU = 10 # Kyber512/ML-KEM-512: du = 10\n", + "DV = 4 # Kyber512/ML-KEM-512: dv = 4\n", + "\n", + "# 2) Extra “safety” scaling for noise amplitudes\n", + "# (these are *not* spec knobs, just HW-tuning knobs)\n", + "USE_COMPRESSION = True # True -> compress/decompress(u,v) with (DU,DV); False -> bypass\n", + "NOISE_DIV_KEY = 4 # scale s,e in keygen by 1/NOISE_DIV_KEY (1 = spec-like, 3 = conservative)\n", + "NOISE_DIV_ENC = 4 # scale r,e1,e2 in encrypt by 1/NOISE_DIV_ENC (same interpretation)\n", + "# Good profiles to try:\n", + "# - \"Safe baseline\" (what you saw working): ETA1=3, ETA2=2, DU=10, DV=4, USE_COMPRESSION=True,\n", + "# NOISE_DIV_KEY=3, NOISE_DIV_ENC=3\n", + "# - \"Move toward spec\": keep ETA1/ETA2/DU/DV, gradually try:\n", + "# (KEY,ENC) = (3,3) -> (2,3) -> (2,2) -> (1,2) -> (1,1)\n", + "# =========================================================\n", + "\n", + "\n", + "# ---------------------------------------------------------\n", + "# Utility: integer noise scaling\n", + "# ---------------------------------------------------------\n", + "def scale_noise(x, div: int):\n", + " \"\"\"\n", + " Scale integer noise by integer divisor 'div' with truncation toward 0.\n", + "\n", + " - div = 1 -> original CBD(eta) noise distribution.\n", + " - div = 2 -> approximately halves magnitude.\n", + " - div = 3 -> ~one-third, etc.\n", + " \"\"\"\n", + " x = np.asarray(x, dtype=np.int32)\n", + " if div == 1:\n", + " return x.astype(np.int16)\n", + " return (np.sign(x) * (np.abs(x) // div)).astype(np.int16)\n", + "\n", + "\n", + "# ---------------------------------------------------------\n", + "# HW-based Kyber-style routines using the global knobs\n", + "# ---------------------------------------------------------\n", + "def key_gen2_hw_cellZZC():\n", + " \"\"\"\n", + " Key generation using hardware poly_mult and global parameters:\n", + "\n", + " - Ring: Z_q[X]/(X^256 + 1), q=3329 (assumed by the rest of your code).\n", + " - k = 2 module dimension (Kyber512/ML-KEM-512 style).\n", + " - a ~ U(Z_q)^{k×k×n2}.\n", + " - s,e ~ CBD(ETA1), then scaled by NOISE_DIV_KEY.\n", + "\n", + " Returns (s, a, b) with b = A*s + e.\n", + " \"\"\"\n", + " # public matrix a: shape (k, k, n2)\n", + " a = np.random.randint(q, size=(k, k, n2), dtype=np.int16)\n", + "\n", + " # raw noise\n", + " s_raw = cbd_vector(n2, ETA1, k) # shape (k, n2)\n", + " e_raw = cbd_vector(n2, ETA1, k) # shape (k, n2)\n", + "\n", + " # scaled noise according to global divisor\n", + " s = scale_noise(s_raw, NOISE_DIV_KEY)\n", + " e = scale_noise(e_raw, NOISE_DIV_KEY)\n", + "\n", + " # same structure as your original key_gen2, but with HW multiply\n", + " b0 = (poly_mul_hw(a[0,0], s[0]) + e[0]) % q\n", + " b1 = (poly_mul_hw(a[0,1], s[1]) + e[1]) % q\n", + " b2 = (poly_mul_hw(a[1,0], s[0]) + e[0]) % q\n", + " b3 = (poly_mul_hw(a[1,1], s[1]) + e[1]) % q\n", + "\n", + " b01 = (b0 + b1) % q\n", + " b23 = (b2 + b3) % q\n", + " b = np.array([b01, b23], dtype=np.int16)\n", + " return s, a, b\n", + "\n", + "\n", + "def encrypt2_hw_cellZZC(a, b, m):\n", + " \"\"\"\n", + " Encryption using hardware poly_mult and global parameters:\n", + "\n", + " - a, b as from key_gen2_hw.\n", + " - message m: length-n2 polynomial already embedded in Z_q.\n", + " - r ~ CBD(ETA1), e1 ~ CBD(ETA2), e2 ~ CBD(ETA2), then all\n", + " scaled by NOISE_DIV_ENC.\n", + " - DU, DV control compression of u and v, if USE_COMPRESSION is True.\n", + " \"\"\"\n", + " # raw noise\n", + " r_raw = cbd_vector(n2, ETA1, k) # (k, n2)\n", + " e1_raw = cbd_vector(n2, ETA2, k) # (k, n2)\n", + " e2_raw = cbd(n2, ETA2) # (n2,)\n", + "\n", + " # scaled noise\n", + " r = scale_noise(r_raw, NOISE_DIV_ENC)\n", + " e1 = scale_noise(e1_raw, NOISE_DIV_ENC)\n", + " e2 = scale_noise(e2_raw, NOISE_DIV_ENC)\n", + "\n", + " # u part: A * r + e1\n", + " u0 = (poly_mul_hw(a[0,0], r[0]) + e1[0]) % q\n", + " u1 = (poly_mul_hw(a[1,0], r[1]) + e1[1]) % q\n", + " u2 = (poly_mul_hw(a[0,1], r[0]) + e1[0]) % q\n", + " u3 = (poly_mul_hw(a[1,1], r[1]) + e1[1]) % q\n", + "\n", + " u01 = (u0 + u1) % q\n", + " u23 = (u2 + u3) % q\n", + " u = np.array([u01, u23], dtype=np.int16)\n", + "\n", + " # v part: b * r + e2 + m\n", + " v0 = poly_mul_hw(b[0], r[0]) % q\n", + " v1 = poly_mul_hw(b[1], r[1]) % q\n", + " v = (v0 + v1 + e2 + m) % q\n", + "\n", + " if USE_COMPRESSION:\n", + " u_c = compress(u, q, DU)\n", + " v_c = compress(v, q, DV)\n", + " else:\n", + " u_c = u.astype(np.int16)\n", + " v_c = v.astype(np.int16)\n", + "\n", + " return u_c, v_c\n", + "\n", + "\n", + "def decrypt2_hw_cellZZC(s, u, v):\n", + " \"\"\"\n", + " Decryption using hardware poly_mult and global parameters:\n", + "\n", + " - s: secret key (k × n2)\n", + " - (u, v): ciphertext components, compressed with (DU, DV)\n", + " if USE_COMPRESSION is True.\n", + "\n", + " Decryption computes p = s·u and d = v - p (mod q).\n", + " \"\"\"\n", + " if USE_COMPRESSION:\n", + " u_dec = decompress(u, q, DU)\n", + " v_dec = decompress(v, q, DV)\n", + " else:\n", + " u_dec = u.astype(np.int16)\n", + " v_dec = v.astype(np.int16)\n", + "\n", + " p0 = poly_mul_hw(s[0], u_dec[0]) % q\n", + " p1 = poly_mul_hw(s[1], u_dec[1]) % q\n", + " p = (p0 + p1) % q\n", + " d = (v_dec - p) % q\n", + " return d\n", + "\n", + "\n", + "# ---------------------------------------------------------\n", + "# Roundtrip and Monte-Carlo\n", + "# ---------------------------------------------------------\n", + "def roundtrip_once(verbose: bool = True):\n", + " \"\"\"\n", + " One Decrypt(Encrypt(m)) round using the current global knobs.\n", + "\n", + " - m ∈ {0,1}^n2 uniformly random.\n", + " - Embedding via decompress(m, q, 1) (your original convention).\n", + " - Uses key_gen2_hw / encrypt2_hw / decrypt2_hw.\n", + " - Decoding via threshold rule: 0 if in [0, q/4] ∪ [3q/4, q),\n", + " 1 if in (q/4, 3q/4).\n", + "\n", + " Returns True if m == md bitwise.\n", + " \"\"\"\n", + " # random binary message\n", + " m = np.random.randint(2, size=(n2,))\n", + " # message embedding (this is independent of DU/DV and ETA*)\n", + " ms = decompress(m, q, 1)\n", + "\n", + " s, a, b = key_gen2_hw_cellZZC()\n", + " u, v = encrypt2_hw_cellZZC(a, b, ms)\n", + " d = decrypt2_hw_cellZZC(s, u, v)\n", + "\n", + " th1 = math.floor(q / 4)\n", + " th3 = math.floor(3 * q / 4)\n", + " md = np.array([1 if (th1 < x < th3) else 0 for x in d], dtype=np.int8)\n", + "\n", + " ok = np.array_equal(m, md)\n", + "\n", + " if verbose:\n", + " print(f\"USE_COMPRESSION={USE_COMPRESSION}, \"\n", + " f\"ETA1={ETA1}, ETA2={ETA2}, DU={DU}, DV={DV}, \"\n", + " f\"NOISE_DIV_KEY={NOISE_DIV_KEY}, NOISE_DIV_ENC={NOISE_DIV_ENC}\")\n", + " print(\"Decryption successful? \", ok)\n", + " if not ok:\n", + " idx = np.where(m != md)[0][0]\n", + " print(\"First mismatch at index\", idx)\n", + " print(\"m[idx] =\", int(m[idx]))\n", + " print(\"md[idx] =\", int(md[idx]))\n", + " return ok\n", + "\n", + "\n", + "def monte_carlo_roundtrip(trials: int = 50):\n", + " \"\"\"\n", + " Run multiple roundtrips with current global knobs;\n", + " print empirical failure rate.\n", + "\n", + " Use this to see how a given parameter set behaves with your HW.\n", + " \"\"\"\n", + " successes = 0\n", + " for t in range(trials):\n", + " if roundtrip_once(verbose=False):\n", + " successes += 1\n", + " failures = trials - successes\n", + " failure_rate = failures / trials\n", + " print(f\"USE_COMPRESSION={USE_COMPRESSION}, \"\n", + " f\"ETA1={ETA1}, ETA2={ETA2}, DU={DU}, DV={DV}, \"\n", + " f\"NOISE_DIV_KEY={NOISE_DIV_KEY}, NOISE_DIV_ENC={NOISE_DIV_ENC}\")\n", + " print(f\"Trials: {trials}, successes: {successes}, \"\n", + " f\"failures: {failures}, failure rate ≈ {failure_rate:.4f}\")\n", + "\n", + "\n", + "# ---------------------------------------------------------\n", + "# Example usage with current presets\n", + "# ---------------------------------------------------------\n", + "print(\"Single roundtrip with current global settings:\")\n", + "roundtrip_once()\n", + "\n", + "print(\"\\nMonte-Carlo over 20 trials:\")\n", + "monte_carlo_roundtrip(trials=20)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a6ff41f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97df6d6f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}