mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-26 22:35:43 +00:00
### Description ``` Avx2: Int8 NS(Prompt) MLAS(Prompt) MLAS(Prompt)Gain/Loss NS(TokenGen) MLAS(TokenGen) MLAS(TokenGen)Gain/Loss Blklen16: 90.96 25.15 -72% 7.65 11.71 53% Blklen32: 90.73 48.55 -46% 7.86 14.28 81% Blklen64: 89.49 68.84 -23% 8.30 15.78 90% Blklen128: 87.38 78.37 -10% 7.90 16.05 103% Blklen256: 89.45 82.36 -7% 8.30 16.56 99% Fp32 NS(Prompt) MLAS(Prompt) MLAS(Prompt)Gain/Loss NS(TokenGen) MLAS(TokenGen) MLAS(TokenGen)Gain/Loss Blklen16: 91.36 105.18 15% 7.57 9.52 25% Blklen32: 89.30 105.99 18% 7.65 9.68 26% Blklen64: 89.53 101.41 13% 7.97 9.84 23% Blklen128: 85.23 99.71 16% 7.86 10.39 32% Blklen256: 88.46 97.94 10% 8.32 10.23 22% Avx512vnni: Int8 NS(Prompt) MLAS(Prompt) MLAS(Prompt)Gain/Loss NS(TokenGen) MLAS(TokenGen) MLAS(TokenGen)Gain/Loss Blklen16: 132.18 21.56 -83% 10.34 11.48 11% Blklen32: 168.28 43.69 -74% 11.85 14.73 24% Blklen64: 201.81 60.29 -70% 12.36 15.47 25% Blklen128: 194.92 57.04 -71% 13.03 14.67 12% Blklen256: 218.76 70.20 -68% 13.33 16.31 22% Fp32 NS(Prompt) MLAS(Prompt) MLAS(Prompt)Gain/Loss NS(TokenGen) MLAS(TokenGen) MLAS(TokenGen)Gain/Loss Blklen16: 102.81 92.74 -9% 8.41 9.18 9% Blklen32: 109.49 97.08 -11% 8.83 11.51 30% Blklen64: 104.13 101.57 -2% 9.32 12.00 28% Blklen128: 108.45 103.69 -4% 9.58 12.45 29% Blklen256: 109.43 106.43 -2% 9.19 12.2 32% ``` --------- Signed-off-by: Liqun Fu <liqfu@microsoft.com> Signed-off-by: liqunfu <liqun.fu@microsoft.com> Co-authored-by: edgchen1 <18449977+edgchen1@users.noreply.github.com> |
||
|---|---|---|
| .. | ||
| external | ||
| patches | ||
| tensorboard | ||
| adjust_global_compile_flags.cmake | ||
| arm64x.cmake | ||
| CMakeLists.txt | ||
| CMakeSettings.json | ||
| codeconv.runsettings | ||
| deps.txt | ||
| deps_update_and_upload.py | ||
| EnableVisualStudioCodeAnalysis.props | ||
| gdk_toolchain.cmake | ||
| Info.plist.in | ||
| libonnxruntime.pc.cmake.in | ||
| linux_arm32_crosscompile_toolchain.cmake | ||
| linux_arm64_crosscompile_toolchain.cmake | ||
| maccatalyst_prepare_objects_for_prelink.py | ||
| nuget_helpers.cmake | ||
| onnxruntime.cmake | ||
| onnxruntime_codegen_tvm.cmake | ||
| onnxruntime_common.cmake | ||
| onnxruntime_compile_triton_kernel.cmake | ||
| onnxruntime_config.h.in | ||
| onnxruntime_csharp.cmake | ||
| onnxruntime_flatbuffers.cmake | ||
| onnxruntime_framework.cmake | ||
| onnxruntime_framework.natvis | ||
| onnxruntime_fuzz_test.cmake | ||
| onnxruntime_graph.cmake | ||
| onnxruntime_ios.toolchain.cmake | ||
| onnxruntime_java.cmake | ||
| onnxruntime_java_unittests.cmake | ||
| onnxruntime_kernel_explorer.cmake | ||
| onnxruntime_language_interop_ops.cmake | ||
| onnxruntime_mlas.cmake | ||
| onnxruntime_nodejs.cmake | ||
| onnxruntime_objectivec.cmake | ||
| onnxruntime_opschema_lib.cmake | ||
| onnxruntime_optimizer.cmake | ||
| onnxruntime_providers.cmake | ||
| onnxruntime_providers_acl.cmake | ||
| onnxruntime_providers_armnn.cmake | ||
| onnxruntime_providers_azure.cmake | ||
| onnxruntime_providers_cann.cmake | ||
| onnxruntime_providers_coreml.cmake | ||
| onnxruntime_providers_cpu.cmake | ||
| onnxruntime_providers_cuda.cmake | ||
| onnxruntime_providers_dml.cmake | ||
| onnxruntime_providers_dnnl.cmake | ||
| onnxruntime_providers_js.cmake | ||
| onnxruntime_providers_migraphx.cmake | ||
| onnxruntime_providers_nnapi.cmake | ||
| onnxruntime_providers_openvino.cmake | ||
| onnxruntime_providers_qnn.cmake | ||
| onnxruntime_providers_rknpu.cmake | ||
| onnxruntime_providers_rocm.cmake | ||
| onnxruntime_providers_tensorrt.cmake | ||
| onnxruntime_providers_tvm.cmake | ||
| onnxruntime_providers_vitisai.cmake | ||
| onnxruntime_providers_webnn.cmake | ||
| onnxruntime_providers_xnnpack.cmake | ||
| onnxruntime_pyop.cmake | ||
| onnxruntime_python.cmake | ||
| onnxruntime_rocm_hipify.cmake | ||
| onnxruntime_session.cmake | ||
| onnxruntime_snpe_provider.cmake | ||
| onnxruntime_training.cmake | ||
| onnxruntime_unittests.cmake | ||
| onnxruntime_util.cmake | ||
| onnxruntime_visionos.toolchain.cmake | ||
| onnxruntime_webassembly.cmake | ||
| precompiled_header.cmake | ||
| riscv64.toolchain.cmake | ||
| Sdl.ruleset | ||
| set_winapi_family_desktop.h | ||
| target_delayload.cmake | ||
| uwp_stubs.h | ||
| wcos_rules_override.cmake | ||
| winml.cmake | ||
| winml_cppwinrt.cmake | ||
| winml_sdk_helpers.cmake | ||
| winml_unittests.cmake | ||