onnxruntime

mirror of https://github.com/saymrwulf/onnxruntime.git synced 2026-07-02 03:55:34 +00:00

History

Vincent Wang b7408f7389 [ORTModule] ATen Efficient Attention and Triton Flash Attention (#17959 ) This PR is to support efficient attention and flash attention in ORTModule, including: - Use ATen to call efficient attention, which requires PyTorch 2.2.0 dev or newer. ORTMODULE_USE_EFFICIENT_ATTENTION=1 to enable. - Integrate Triton Flash attention, which requires triton==2.0.0.dev20221202. Need A100 or H100. ORTMODULE_USE_FLASH_ATTENTION=1 to enable. - A python transformer tool to match sub-graph by config and write transformer quickly. Current transformers supports attention mask for both efficient attn and flash attn, and dropout for efficient attn only. To support more training scenarios (such as causal mask in GPT2), more transformers need to be added. The feature is guarded by system environment variables, it won't effect any current behavior if not enabled. Since it requires specific PyTorch/Triton versions, related tests is not added for now.		2023-10-27 10:29:27 +08:00
..
external	Update ONNX to 1.15.0rc1 (#17914 )	2023-10-20 15:08:25 -07:00
patches	ONNX 1.15 integration (#17125 )	2023-09-26 14:44:48 -07:00
tensorboard	Improve dependency management (#13523 )	2022-12-01 09:51:59 -08:00
adjust_global_compile_flags.cmake	Update cmake to 3.27 and upgrade Linux CUDA docker files from CentOS7 to UBI8 (#16856 )	2023-09-05 18:12:10 -07:00
CMakeLists.txt	Make CUDA a NHWC EP (#17200 )	2023-10-16 10:16:37 -07:00
CMakeSettings.json
codeconv.runsettings
deps.txt	[TensorRT EP] Use latest onnx-tensorrt parser (#18067 )	2023-10-26 13:55:12 -07:00
deps_update_and_upload.py	[Linter] Bump ruff and remove pylint (#17797 )	2023-10-05 21:07:33 -07:00
EnableVisualStudioCodeAnalysis.props
gdk_toolchain.cmake
Info.plist.in
libonnxruntime.pc.cmake.in
nuget_helpers.cmake
onnxruntime.cmake	Add noexcep_operators to onnxruntime internal libraries (#17850 )	2023-10-09 16:29:41 -07:00
onnxruntime_codegen_tvm.cmake	Use target name for flatbuffers (#13991 )	2022-12-20 11:44:02 -08:00
onnxruntime_common.cmake	Update C/C++ dependencies: abseil, date, nsync, googletest, wil, mp11, cpuinfo and safeint (#15470 )	2023-09-08 13:35:04 -07:00
onnxruntime_compile_triton_kernel.cmake	[ROCm] Add ROCm Triton TunableOp for GroupNorm (#16196 )	2023-07-11 13:55:30 +08:00
onnxruntime_config.h.in	Enabling c++ 20 in MacOS build (#16187 )	2023-09-26 11:27:02 -07:00
onnxruntime_csharp.cmake	Refactor training build options (#13964 )	2023-01-03 13:28:16 -08:00
onnxruntime_flatbuffers.cmake	Rework some external targets to ease building with `-DFETCHCONTENT_FULLY_DISCONNECTED=ON` (#15323 )	2023-04-03 17:45:12 -07:00
onnxruntime_framework.cmake	[C#, CPP] Introduce Float16/BFloat16 support and tests for C#, C++ (#16506 )	2023-07-14 10:46:52 -07:00
onnxruntime_framework.natvis	[C#, CPP] Introduce Float16/BFloat16 support and tests for C#, C++ (#16506 )	2023-07-14 10:46:52 -07:00
onnxruntime_fuzz_test.cmake	Fix fuzz test (#14385 )	2023-01-22 22:17:43 -08:00
onnxruntime_graph.cmake	Update C/C++ dependencies: abseil, date, nsync, googletest, wil, mp11, cpuinfo and safeint (#15470 )	2023-09-08 13:35:04 -07:00
onnxruntime_ios.toolchain.cmake
onnxruntime_java.cmake	Update build option for training in java to enable_training_api (#15638 )	2023-04-24 11:53:08 -07:00
onnxruntime_java_unittests.cmake	Update build option for training in java to enable_training_api (#15638 )	2023-04-24 11:53:08 -07:00
onnxruntime_kernel_explorer.cmake	[ROCm] TunableOp: Update rocBLAS get_solutions API (since ROCm5.6) (#16657 )	2023-07-13 11:20:26 +08:00
onnxruntime_language_interop_ops.cmake	Use target name for flatbuffers (#13991 )	2022-12-20 11:44:02 -08:00
onnxruntime_mlas.cmake	[aarch64] Remove mmla kernel support from apple (#18082 )	2023-10-25 11:34:57 -07:00
onnxruntime_nodejs.cmake	Added DML and CUDA provider support in onnxruntime-node (#16050 )	2023-08-25 16:57:06 -07:00
onnxruntime_objectivec.cmake	Objective C Training API: TrainingSession (#16374 )	2023-06-28 09:13:56 -07:00
onnxruntime_opschema_lib.cmake	Use target name for flatbuffers (#13991 )	2022-12-20 11:44:02 -08:00
onnxruntime_optimizer.cmake	Support inplace update for PythonOp/Grad (#17687 )	2023-10-10 21:36:45 -07:00
onnxruntime_providers.cmake	Add API for NPU Device Selection in the DML EP (#17612 )	2023-10-11 14:53:00 -07:00
onnxruntime_providers_acl.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_providers_armnn.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_providers_azure.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_providers_cann.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_providers_coreml.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_providers_cpu.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_providers_cuda.cmake	distributed slice (#17761 )	2023-10-12 14:28:00 -07:00
onnxruntime_providers_dml.cmake	Add API for NPU Device Selection in the DML EP (#17612 )	2023-10-11 14:53:00 -07:00
onnxruntime_providers_dnnl.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_providers_js.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_providers_migraphx.cmake	CUDA EP vs ROCM EP hipify audit (#17776 )	2023-10-13 10:13:53 +08:00
onnxruntime_providers_nnapi.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_providers_openvino.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_providers_qnn.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_providers_rknpu.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_providers_rocm.cmake	CUDA EP vs ROCM EP hipify audit (#17776 )	2023-10-13 10:13:53 +08:00
onnxruntime_providers_tensorrt.cmake	[TensorRT EP] Fix cmake install (#17923 )	2023-10-16 09:16:24 -07:00
onnxruntime_providers_tvm.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_providers_vitisai.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_providers_webnn.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_providers_xnnpack.cmake	Split onnxruntime_providers.cmake to multiple (#17853 )	2023-10-09 20:33:44 -07:00
onnxruntime_pyop.cmake	Use target name for flatbuffers (#13991 )	2022-12-20 11:44:02 -08:00
onnxruntime_python.cmake	[ORTModule] ATen Efficient Attention and Triton Flash Attention (#17959 )	2023-10-27 10:29:27 +08:00
onnxruntime_rocm_hipify.cmake	Add MatMul FP4 and NF4 Support (#18066 )	2023-10-25 15:34:58 -07:00
onnxruntime_session.cmake	added support for cmake "find_package" (#8919 )	2023-06-19 22:20:31 -07:00
onnxruntime_snpe_provider.cmake	Use target name for flatbuffers (#13991 )	2022-12-20 11:44:02 -08:00
onnxruntime_training.cmake	Triton Codegen for ORTModule (#15831 )	2023-07-13 18:17:58 +08:00
onnxruntime_unittests.cmake	Make CUDA a NHWC EP (#17200 )	2023-10-16 10:16:37 -07:00
onnxruntime_util.cmake	Improve dependency management (#13523 )	2022-12-01 09:51:59 -08:00
onnxruntime_webassembly.cmake	Add training WASM generation to Web CI pipeline (#17319 )	2023-09-08 15:49:47 -07:00
precompiled_header.cmake
Sdl.ruleset	Add a Github workflow for Prefast (#15763 )	2023-05-03 11:42:51 -07:00
set_winapi_family_desktop.h
target_delayload.cmake
uwp_stubs.h	Run clang-format in CI (#15524 )	2023-04-18 09:26:58 -07:00
wcos_rules_override.cmake
winml.cmake	Rework WIL dependency retrieval/usage (#17130 )	2023-08-15 09:11:46 -07:00
winml_cppwinrt.cmake
winml_sdk_helpers.cmake
winml_unittests.cmake	Update C/C++ dependencies: abseil, date, nsync, googletest, wil, mp11, cpuinfo and safeint (#15470 )	2023-09-08 13:35:04 -07:00