onnxruntime

mirror of https://github.com/saymrwulf/onnxruntime.git synced 2026-07-03 03:58:54 +00:00

History

Tianlei Wu 72186bbb71 [CUDA] Build nhwc ops by default (#22648 ) ### Description * Build cuda nhwc ops by default. * Deprecate `--enable_cuda_nhwc_ops` in build.py and add `--disable_cuda_nhwc_ops` option Note that it requires cuDNN 9.x. If you build with cuDNN 8, NHWC ops will be disabled automatically. ### Motivation and Context In general, NHWC is faster than NCHW for convolution in Nvidia GPUs with Tensor Cores, and this could improve performance for vision models. This is the first step to prefer NHWC for CUDA in 1.21 release. Next step is to do some tests on popular vision models. If it help in most models and devices, set `prefer_nhwc=1` as default cuda provider option.		2024-11-06 09:54:55 -08:00
..
external	Add implementation of WebGPU EP (#22591 )	2024-10-29 18:29:40 -07:00
patches	Add implementation of WebGPU EP (#22591 )	2024-10-29 18:29:40 -07:00
tensorboard
adjust_global_compile_flags.cmake	[JS/WebGPU] Support WASM64 (#21836 )	2024-10-24 20:21:51 -07:00
arm64x.cmake	Dev/mookerem/arm64x update (#20536 )	2024-05-07 12:50:38 -07:00
CMakeLists.txt	[CUDA] Build nhwc ops by default (#22648 )	2024-11-06 09:54:55 -08:00
CMakePresets.json	Create CMake option `onnxruntime_USE_VCPKG` (#21348 )	2024-09-10 16:39:27 -07:00
CMakeSettings.json
codeconv.runsettings
deps.txt	Remove nsync (#20413 )	2024-10-21 15:32:14 -07:00
deps_update_and_upload.py	Update google benchmark to 1.8.3. (#19734 )	2024-03-01 11:01:58 -08:00
EnableVisualStudioCodeAnalysis.props
gdk_toolchain.cmake
hip_fatbin_insert	[MIGraphX EP/ ROCm EP] add gfx1200, gfx1201 to CMAKE_HIP_ARCHITECTURES (#22348 )	2024-10-11 17:31:36 -07:00
Info.plist.in
libonnxruntime.pc.cmake.in
linux_arm32_crosscompile_toolchain.cmake	Add a build validation for Linux ARM64 cross-compile (#18200 )	2023-11-08 13:03:18 -08:00
linux_arm64_crosscompile_toolchain.cmake	Add a build validation for Linux ARM64 cross-compile (#18200 )	2023-11-08 13:03:18 -08:00
maccatalyst_prepare_objects_for_prelink.py	Support xcframework for mac catalyst builds. (#19534 )	2024-03-20 10:55:19 -07:00
nuget_helpers.cmake	Update nuget.exe used in WindowsAI nuget packaging so `readme` property is supported. (#22141 )	2024-09-19 19:06:47 +10:00
onnxruntime.cmake	Refactor the cmake code that is related to delay loading (#22646 )	2024-11-04 16:30:50 -08:00
onnxruntime_codegen_tvm.cmake
onnxruntime_common.cmake	Enable QNN HTP support for Node (#20576 )	2024-05-09 13:11:07 -07:00
onnxruntime_compile_triton_kernel.cmake	[CUDA] Add SparseAttention operator for Phi-3-small (#20216 )	2024-04-30 09:06:29 -07:00
onnxruntime_config.h.in	Get build working on Xcode 16 (#22168 )	2024-09-24 08:33:03 -07:00
onnxruntime_csharp.cmake
onnxruntime_flatbuffers.cmake
onnxruntime_framework.cmake	Adding CUDNN Frontend and use for CUDA NN Convolution (#19470 )	2024-08-02 15:16:42 -07:00
onnxruntime_framework.natvis
onnxruntime_fuzz_test.cmake	[Fuzzer] Add two new ORT libfuzzer (Linux clang support for now) (#22055 )	2024-09-12 11:50:34 -07:00
onnxruntime_graph.cmake	[Apple framework] Fix minimal build with training enabled. (#19858 )	2024-03-12 11:33:30 -07:00
onnxruntime_ios.toolchain.cmake	Support visionos build (#20365 )	2024-04-23 18:15:07 -07:00
onnxruntime_java.cmake	Remove deprecated "mobile" packages (#20941 )	2024-06-07 16:20:32 -05:00
onnxruntime_java_unittests.cmake	[Java] Add API for appending QNN EP (#22208 )	2024-10-01 10:18:04 -07:00
onnxruntime_kernel_explorer.cmake	[ROCm] prefer hip interfaces over roc during hipify (#22394 )	2024-10-14 20:34:03 -07:00
onnxruntime_lora.cmake	Multi-Lora support (#22046 )	2024-09-30 15:59:07 -07:00
onnxruntime_mlas.cmake	Remove nsync (#20413 )	2024-10-21 15:32:14 -07:00
onnxruntime_nodejs.cmake	Initial WebGPU EP checkin (#22318 )	2024-10-08 16:10:46 -07:00
onnxruntime_objectivec.cmake	Initial WebGPU EP checkin (#22318 )	2024-10-08 16:10:46 -07:00
onnxruntime_opschema_lib.cmake
onnxruntime_optimizer.cmake	Flash attention recompute (#20603 )	2024-05-21 13:38:19 +08:00
onnxruntime_providers.cmake	Initial WebGPU EP checkin (#22318 )	2024-10-08 16:10:46 -07:00
onnxruntime_providers_acl.cmake
onnxruntime_providers_armnn.cmake
onnxruntime_providers_azure.cmake
onnxruntime_providers_cann.cmake	Remove nsync (#20413 )	2024-10-21 15:32:14 -07:00
onnxruntime_providers_coreml.cmake	Fix Objective-C static analysis warnings. (#20417 )	2024-04-24 11:48:29 -07:00
onnxruntime_providers_cpu.cmake	Initial WebGPU EP checkin (#22318 )	2024-10-08 16:10:46 -07:00
onnxruntime_providers_cuda.cmake	Remove nsync (#20413 )	2024-10-21 15:32:14 -07:00
onnxruntime_providers_dml.cmake	Refactor the cmake code that is related to delay loading (#22646 )	2024-11-04 16:30:50 -08:00
onnxruntime_providers_dnnl.cmake	Remove nsync (#20413 )	2024-10-21 15:32:14 -07:00
onnxruntime_providers_js.cmake
onnxruntime_providers_migraphx.cmake	Remove nsync (#20413 )	2024-10-21 15:32:14 -07:00
onnxruntime_providers_nnapi.cmake	Make partitioning utils QDQ aware so it does not break up QDQ node units (#19723 )	2024-03-12 10:55:49 +10:00
onnxruntime_providers_openvino.cmake	Ovep develop lnl 1.2 (#22424 )	2024-10-14 12:10:01 -07:00
onnxruntime_providers_qnn.cmake	Make partitioning utils QDQ aware so it does not break up QDQ node units (#19723 )	2024-03-12 10:55:49 +10:00
onnxruntime_providers_rknpu.cmake
onnxruntime_providers_rocm.cmake	Remove nsync (#20413 )	2024-10-21 15:32:14 -07:00
onnxruntime_providers_tensorrt.cmake	Remove nsync (#20413 )	2024-10-21 15:32:14 -07:00
onnxruntime_providers_tvm.cmake
onnxruntime_providers_vitisai.cmake	[VitisAI] remove wrong error msg, required by Microsoft (#21715 )	2024-08-21 21:10:28 -07:00
onnxruntime_providers_vsinpu.cmake	Remove nsync (#20413 )	2024-10-21 15:32:14 -07:00
onnxruntime_providers_webgpu.cmake	Add implementation of WebGPU EP (#22591 )	2024-10-29 18:29:40 -07:00
onnxruntime_providers_webnn.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_providers_xnnpack.cmake	Make partitioning utils QDQ aware so it does not break up QDQ node units (#19723 )	2024-03-12 10:55:49 +10:00
onnxruntime_python.cmake	Refactor the cmake code that is related to delay loading (#22646 )	2024-11-04 16:30:50 -08:00
onnxruntime_rocm_hipify.cmake	[ROCm] redo hipify of version controlled files (#22449 )	2024-10-18 12:40:54 -07:00
onnxruntime_session.cmake	Multi-Lora support (#22046 )	2024-09-30 15:59:07 -07:00
onnxruntime_snpe_provider.cmake
onnxruntime_training.cmake	Multi-Lora support (#22046 )	2024-09-30 15:59:07 -07:00
onnxruntime_unittests.cmake	Add implementation of WebGPU EP (#22591 )	2024-10-29 18:29:40 -07:00
onnxruntime_util.cmake
onnxruntime_visionos.toolchain.cmake	Support visionos build (#20365 )	2024-04-23 18:15:07 -07:00
onnxruntime_webassembly.cmake	[JS/WebGPU] Support WASM64 (#21836 )	2024-10-24 20:21:51 -07:00
precompiled_header.cmake
riscv64.toolchain.cmake	Enable RISC-V 64-bit Cross-Compiling Support for ONNX Runtime on Linux (#19238 )	2024-01-24 16:27:05 -08:00
Sdl.ruleset
set_winapi_family_desktop.h
target_delayload.cmake	Refactor the cmake code that is related to delay loading (#22646 )	2024-11-04 16:30:50 -08:00
uwp_stubs.h
vcpkg-configuration.json	Auto regenerate LORA's fbs files (#22313 )	2024-10-04 10:01:19 -07:00
vcpkg.json	Create CMake option `onnxruntime_USE_VCPKG` (#21348 )	2024-09-10 16:39:27 -07:00
wcos_rules_override.cmake	Stop using apiset in OneCore build: use onecoreuap.lib instead of onecoreuap_apiset.lib (#19632 )	2024-02-23 22:31:57 -08:00
winml.cmake	Change libonnxruntime.so's SONAME: remove the minor and patch version. (#21339 )	2024-07-15 14:21:34 -07:00
winml_cppwinrt.cmake
winml_sdk_helpers.cmake
winml_unittests.cmake	Multi-Lora support (#22046 )	2024-09-30 15:59:07 -07:00