2018-11-20 00:48:22 +00:00
|
|
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
|
|
|
# Licensed under the MIT License.
|
|
|
|
|
|
2019-04-29 19:58:20 +00:00
|
|
|
file(GLOB_RECURSE onnxruntime_framework_srcs CONFIGURE_DEPENDS
|
2018-11-20 00:48:22 +00:00
|
|
|
"${ONNXRUNTIME_INCLUDE_DIR}/core/framework/*.h"
|
|
|
|
|
"${ONNXRUNTIME_ROOT}/core/framework/*.h"
|
|
|
|
|
"${ONNXRUNTIME_ROOT}/core/framework/*.cc"
|
|
|
|
|
)
|
|
|
|
|
|
2021-06-19 05:41:07 +00:00
|
|
|
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
|
2023-07-13 10:17:58 +00:00
|
|
|
file(GLOB_RECURSE onnxruntime_training_framework_torch_srcs CONFIGURE_DEPENDS
|
|
|
|
|
"${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.h"
|
|
|
|
|
"${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.cc"
|
|
|
|
|
)
|
2021-07-22 15:50:19 +00:00
|
|
|
list(APPEND onnxruntime_framework_srcs ${onnxruntime_training_framework_torch_srcs})
|
2023-07-13 10:17:58 +00:00
|
|
|
if (onnxruntime_ENABLE_TRITON)
|
|
|
|
|
file(GLOB_RECURSE onnxruntime_training_framework_triton_srcs CONFIGURE_DEPENDS
|
|
|
|
|
"${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.h"
|
|
|
|
|
"${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.cc"
|
|
|
|
|
)
|
|
|
|
|
list(APPEND onnxruntime_framework_srcs ${onnxruntime_training_framework_triton_srcs})
|
|
|
|
|
endif()
|
|
|
|
|
elseif(onnxruntime_ENABLE_TRITON)
|
|
|
|
|
# Triton executor shares some code from torch_interop, such as python and dlpack related code files.
|
|
|
|
|
# When torch_interop is enabled, all these dependencies are already included.
|
|
|
|
|
# But if not, we need to include them explicitly.
|
|
|
|
|
file(GLOB_RECURSE onnxruntime_training_framework_triton_srcs CONFIGURE_DEPENDS
|
|
|
|
|
"${ORTTRAINING_SOURCE_DIR}/core/framework/torch/dlpack_python.h"
|
|
|
|
|
"${ORTTRAINING_SOURCE_DIR}/core/framework/torch/dlpack_python.cc"
|
|
|
|
|
"${ORTTRAINING_SOURCE_DIR}/core/framework/torch/gil.h"
|
|
|
|
|
"${ORTTRAINING_SOURCE_DIR}/core/framework/torch/python_common.h"
|
|
|
|
|
"${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.h"
|
|
|
|
|
"${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.cc"
|
|
|
|
|
)
|
|
|
|
|
list(APPEND onnxruntime_framework_srcs ${onnxruntime_training_framework_triton_srcs})
|
2021-06-19 05:41:07 +00:00
|
|
|
endif()
|
|
|
|
|
|
2020-08-21 21:14:53 +00:00
|
|
|
if (onnxruntime_MINIMAL_BUILD)
|
2020-12-14 23:27:40 +00:00
|
|
|
set(onnxruntime_framework_src_exclude
|
|
|
|
|
"${ONNXRUNTIME_ROOT}/core/framework/fallback_cpu_capability.h"
|
|
|
|
|
"${ONNXRUNTIME_ROOT}/core/framework/fallback_cpu_capability.cc"
|
2020-08-21 21:14:53 +00:00
|
|
|
)
|
|
|
|
|
|
2021-02-13 02:42:33 +00:00
|
|
|
# custom ops support must be explicitly enabled in a minimal build. exclude if not.
|
|
|
|
|
if (NOT onnxruntime_MINIMAL_BUILD_CUSTOM_OPS)
|
|
|
|
|
list(APPEND onnxruntime_framework_src_exclude
|
|
|
|
|
"${ONNXRUNTIME_INCLUDE_DIR}/core/framework/customregistry.h"
|
|
|
|
|
"${ONNXRUNTIME_ROOT}/core/framework/customregistry.cc"
|
|
|
|
|
)
|
|
|
|
|
endif()
|
|
|
|
|
|
2020-08-21 21:14:53 +00:00
|
|
|
list(REMOVE_ITEM onnxruntime_framework_srcs ${onnxruntime_framework_src_exclude})
|
|
|
|
|
endif()
|
|
|
|
|
|
2018-11-20 00:48:22 +00:00
|
|
|
source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_framework_srcs})
|
|
|
|
|
|
2021-04-29 18:54:57 +00:00
|
|
|
onnxruntime_add_static_library(onnxruntime_framework ${onnxruntime_framework_srcs})
|
CloudEP (#13855)
Implement CloudEP for hybrid inferencing.
The PR introduces zero new API, customers could configure session and
run options to do inferencing with Azure [triton
endpoint.](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-with-triton?tabs=azure-cli%2Cendpoint)
Sample configuration in python be like:
```
sess_opt.add_session_config_entry('cloud.endpoint_type', 'triton');
sess_opt.add_session_config_entry('cloud.uri', 'https://cloud.com');
sess_opt.add_session_config_entry('cloud.model_name', 'detection2');
sess_opt.add_session_config_entry('cloud.model_version', '7'); // optional, default 1
sess_opt.add_session_config_entry('cloud.verbose', '1'); // optional, default '0', meaning no verbose
...
run_opt.add_run_config_entry('use_cloud', '1') # 0 for local inferencing, 1 for cloud endpoint.
run_opt.add_run_config_entry('cloud.auth_key', '...')
...
sess.run(None, {'input':input_}, run_opt)
```
Co-authored-by: Randy Shuai <rashuai@microsoft.com>
2023-01-03 18:03:15 +00:00
|
|
|
|
2023-07-14 17:46:52 +00:00
|
|
|
if (MSVC)
|
|
|
|
|
set(ORT_FRAMEWORK_NATVIS_FILE "onnxruntime_framework.natvis")
|
|
|
|
|
target_sources(
|
|
|
|
|
onnxruntime_framework
|
|
|
|
|
INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/${ORT_FRAMEWORK_NATVIS_FILE}>)
|
|
|
|
|
endif()
|
|
|
|
|
|
2019-11-12 05:34:10 +00:00
|
|
|
if(onnxruntime_ENABLE_INSTRUMENT)
|
|
|
|
|
target_compile_definitions(onnxruntime_framework PRIVATE ONNXRUNTIME_ENABLE_INSTRUMENT)
|
|
|
|
|
endif()
|
2021-06-25 21:08:01 +00:00
|
|
|
if(onnxruntime_USE_TENSORRT OR onnxruntime_USE_NCCL)
|
2020-09-15 05:48:00 +00:00
|
|
|
# TODO: for now, core framework depends on CUDA. It should be moved to TensorRT EP
|
2021-06-25 21:08:01 +00:00
|
|
|
# TODO: provider_bridge_ort.cc should not include nccl.h
|
2021-05-20 14:53:47 +00:00
|
|
|
target_include_directories(onnxruntime_framework PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${onnxruntime_CUDNN_HOME}/include PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
|
2020-09-15 05:48:00 +00:00
|
|
|
else()
|
2021-05-20 14:53:47 +00:00
|
|
|
target_include_directories(onnxruntime_framework PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
|
|
|
|
|
endif()
|
|
|
|
|
# Needed for the provider interface, as it includes training headers when training is enabled
|
2022-12-14 16:32:46 +00:00
|
|
|
if (onnxruntime_ENABLE_TRAINING_OPS)
|
2021-05-20 14:53:47 +00:00
|
|
|
target_include_directories(onnxruntime_framework PRIVATE ${ORTTRAINING_ROOT})
|
2023-07-13 10:17:58 +00:00
|
|
|
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP OR onnxruntime_ENABLE_TRITON)
|
training with custom autograd Functions (#7513)
* Register Torch Custom autograd.Function
* Add flag to supress pybind11 warning
* Avoid unnecessary include in cmake
* Add missing reference
* Add getter for registerred functions
* Format for making subsquent changes cleaner
* Fix interop feature build failure
* Forward pass, run PyOP on CPU EP
* clean up the code
* Fix build
* Define new ops
* refactor pyop - extract PyOpLibProxy class
* Hacks to run example
* implement the kernel compute func
* add back PyOP for comparision experiments
* debug info - thread id
* refine the kernels
* Polish code
(cherry picked from commit 4ed606f9a0833592b325a4b40cf917e219845f6f)
* Fix a the Tensor address mismatch in C++ side
* PythonOpGrad compute
* add distributed test case
* refine test cases
* get dist.get_rank() in Autograd forward pass
* Add CUDA kernels
* Store float, int, and tuple of them as PythonOp's attributes
* Populate local changes
* Fix bugs
* PythonOp/PythonOpGrad CUDA kernels
* Support non-tensor inputs
* Single GPU FP16 Run Pass
(cherry picked from commit e539989e91e18ee997900292d3493b97d3eafa8a)
* Fix segement
* add basic test cases
* Save progress
* fix gradient builder for a Add op who have same inputs
* add test cases for auto grad fallback feature
* fix ref cnt issue. add thread id for debugging
* POC: remove interface class
* Remove interface classes
* Clean a bit
* Coarse-grained clean up after rebase master
* reset pyop and language_interop_ops to latest master
* Fix missing part during merge
* re-structure torch related language interop files
* Fix build
* Fix tests and build
* Fix build and basic unit tests
* Fix most of uts
* remove unnecessary import
* clean up and fix build when enabling language_interop_ops
* Fix single-GPU UTs
* Move runner register into ORT package
* Update dist UTs to new style
* Also fix distributed UTs and leaf gradient problem
* Static generation for constant args
* Move arg_positions_ to static field
* Rename some functions
* Move arg ceration into a function
* Clean output logic in PythonOp
* Move PythonOp's ctor
* Revise PythonOpGrad
* Fix "ORT only supports contiguous tensor for now" for inputs
* Fix evaulation mode error, add test & clean up
* clean up codes
* Fix issues introduced by recent master change (enabled symbolic shape infer)
* automatically register forward/backward function pointers && clean up
* Fix multi-output case
* Add a test back
* fix build and clean up
* RAII for function params PyObject
* Use new exporter
* Clean full name in new exporter
* Fix UTs
* Format a file
* Add "inplace" back
Remove a legacy comment
* Refine TorchProxy
1. Make TorchProxy a formal singleton class.
2. Remove unused Scope class.
3. Simplify the call to Forward and Backward. The two functions now
automatically acquire and release GIL state, so user doesn't need
any GIL-related calls.
* Format
* Add lock to avoid racing condition when registering Python objs
* Fix Python call param ref issues && Add RefcountTracker for debug build && Clean up
* clean up print
* Resolve part of comments && clean up
* Fix a potential bug
* track pyobject consistently
* move kernels to cpu provider as base class
* Refactor - 1. Extract PythonOpBase/PythonOpGradBase 2. Implement CPU kernels 3. Test coverage for CPU kernels
* Refine register code
* Add a missing macro
* Release python call result objects with PythonObjectPtr && Add UnRegisterContext && Track PyObject for Debugging && Clena up
* Fix random segfault issue - relasing a wrong ctx pointer for inplace cases
* put ref count in debug macro
* Move GIL out
* Refine tests
* Fix memory leak issue && forward output lifecycle issue:
1. Unregister the OrtValue PythonObject. Currently, the OrtValue shared same buffer with PythonOp/PythonOpGrad's output. So after those kernels outputs are released, the "leaked" OrtValue caused the shared buffer cannot be released.
2. According PyTorch forward+backward execution. The forward outputs (e.g. torch tensors) maintains the context/saved variables/dirty inputs, etc, which are used for backward execution, so its life should be after the backward runs. This change added such a depencencies between PythonOpGrad on PythonOp.
* Move dlpack->ortvalue into C++ to avoid temp object registration
* Fix the over released Py_False/Py_True && refine tests
* Clean up unused functions
* Always assume the first forward output is context so we don't need to test unused cases.
* Fix a memory leak
* move-copy unique_ptr & avoid C-style casting
* Use inplace attribute to determine if input tensors are copied
* Move DlpackCapsuleDestructor's to a common place
* Thread-safe TorchProxy
* Use OrtValue instead of OrtValue*
* Only keep checks for Debug build
* Wrap some long line per comment
* onnx_export_type --> kwargs
* Use requires_grads to create PythonOpGrad's inputs
* add missing files during master merge
* Fix build issue after merge
* Address two comments.
1. Internalize DlpackCapsuleDestructor
2. Change "(" to "]" for describing closed interval.
* Address some comments.
1. "override" -> "overwrite" to avoid using reserved keyword.
2. Call DLPack's helper to create OrtValue for avoiding repeated code.
* Address comments.
1. Pass std::mutex to registeration helpers so their callers don't
have to lock the mutex expclicitly.
2. Rename "func_context_pool_mutex_" to "mutex_". This mutex is the global mutex for OrtTorchFunctionPool.
* Add bridging code to make cuda kernels work with merged master
* put debue macro check within RefCountTracker && use default logger for debug info && remove useless ortvalue_ptr interface && typos && revert unncessary blank line changes
* fix some comments
* Resolve more comments
* Capitalize a word
* use unique_ptr instead of ObjectPointer for PyObject management && add converntion
* Support symbolic shape
* Remove unused variable
* fix build
* Enable function registration for training only && rectify ToDlpack/FromDlpack merge with master.
* Don't add context for non-PythonOp opeartors (for example AtenOp)
* Fix build error
* Polish frontend part.
1. Avoid adding kwargs to ORTModule's ctor
2. Use onnx_export_type rather than kwargs for type safty
3. Fix some build bugs.
* Resolve simpler comments
* Resolve export related comments
* sync master && fix tests && fix non-training build error
* Fix build errors
* add target link lib
* windows build error
* Fix orttraining-linux-ci build
* disable autograd test && clean up
* fix linux orttraining ci build
* try fixing win build error
* Revise append calls in runner
* Enable custom function using a function
* Rename to avoid using reservied keyword
* Use list comprehension
* Set ORT random seed in tests
* Remove print code and fix ctx shape
* [] -> list()
* Move autograd.Function and nn.Module into corresponding functions
* Move test helpers
* Polish dist test a bit. Tried move helpers to helper file but it causes a deadlock.
* trying fix undefined reference
* Context is not managed by global pool
* Polish dist test
* Polish dist test
* Add enable_custom_autograd_function
* Remove enable_custom_autograd_function from ctors
* Add doc strings
* Shorter code
* Address comments
* Add one empty line
* revert a minor and not needed change
* Address comments
* Back to reference
* Fix windows builds
* Fix windows debug build fail to find "'python39_d.lib'"
* fix mac build error
* revert _to_contiguous change
* add debugging tag for orttraining-cpu-ci
* Fix the wrong PYTHON_LIBRARIES which is affected by PYTHON_LIBRARY given in build command
* add debugging info
* Fix the build in this case: PYTHON_LIBDIR: /opt/_internal/cpython-3.7.10/lib, PYTHON_EXECUTABLE: /opt/python/cp37-cp37m/bin/python3, PYTHON_MULTIARCH: x86_64-linux-gnu
PYTHON_LIBRARY_PATH python3.7m
* fix build error due to python lib not found
* Fixes
1. Release PyObject's
2. Not useing deepcopy because we assume autograd.Function's
non-tensor inputs are static (constants) so there should
be no side effect after calling any autograd.Function
multiple times.
* Revert dtoc for decreasing refcnt
* add debugging log
* add debugging tag
* Fix a small leak
* Remove ONNX_FALLTHROUGH flag
* debug tag
* debug tag
* fix builds
* remove debug tag
* fix build
* fix builds
* fix build
* install python3 in centos, in case there is no libpython3.xm.so
* build python so for redhat
* add training cpu specific docker, build python so inside
* revert build-cpython change
* try fixing numpy include issue
* install_deps after re-installing cpython
* fix build && remove debug tag
* install openssl before cpython
* let's say: builds pass!
* add build flag for torch iterop, only enable it when training+Python is enabled
* skip ComputeBroadcastBackwardAxesDynamic for the shared inputs
* fix build
* add debug info for padgrad test
* Fix builds
* Split dlpack_converter into C++ and Python interfaces respecitively. Then different build use them as needed.
* clean up the changes
* fix addsubgradient builder
* Fix builds
* clean up
* clean up
* Address some comments.
1. Use pointer wraper to avoid calling Py_DECREF
2. Remove unregister_* functions
3. Allow repeated registration by skipping those with existing keys
4. Unregister context in PythonOpGrad
* Fix over-released Py_Boolean
Co-authored-by: Wei-Sheng Chin <wschin@outlook.com>
2021-06-07 20:01:21 +00:00
|
|
|
onnxruntime_add_include_to_target(onnxruntime_framework Python::Module)
|
Improve dependency management (#13523)
## Description
1. Convert some git submodules to cmake external projects
2. Update nsync from
[1.23.0](https://github.com/google/nsync/releases/tag/1.23.0) to
[1.25.0](https://github.com/google/nsync/releases/tag/1.25.0)
3. Update re2 from 2021-06-01 to 2022-06-01
4. Update wil from an old commit to 1.0.220914.1 tag
5. Update gtest to a newer commit so that it can optionally leverage
absl/re2 for parsing command line flags.
The following git submodules are deleted:
1. FP16
2. safeint
3. XNNPACK
4. cxxopts
5. dlpack
7. flatbuffers
8. googlebenchmark
9. json
10. mimalloc
11. mp11
12. pthreadpool
More will come.
## Motivation and Context
There are 3 ways of integrating 3rd party C/C++ libraries into ONNX
Runtime:
1. Install them to a system location, then use cmake's find_package
module to locate them.
2. Use git submodules
6. Use cmake's external projects(externalproject_add).
At first when this project was just started, we considered both option 2
and option 3. We preferred option 2 because:
1. It's easier to handle authentication. At first this project was not
open source, and it had some other non-public dependencies. If we use
git submodule, ADO will handle authentication smoothly. Otherwise we
need to manually pass tokens around and be very careful on not exposing
them in build logs.
2. At that time, cmake fetched dependencies after "cmake" finished
generating vcprojects/makefiles. So it was very difficult to make cflags
consistent. Since cmake 3.11, it has a new command: FetchContent, which
fetches dependencies when it generates vcprojects/makefiles just before
add_subdirectories, so the parent project's variables/settings can be
easily passed to the child projects.
And when the project went on, we had some new concerns:
1. As we started to have more and more EPs and build configs, the number
of submodules grew quickly. For more developers, most ORT submodules are
not relevant to them. They shouldn't need to download all of them.
2. It is impossible to let two different build configs use two different
versions of the same dependency. For example, right now we have protobuf
3.18.3 in the submodules. Then every EP must use the same version.
Whenever we have a need to upgrade protobuf, we need to coordinate
across the whole team and many external developers. I can't manage it
anymore.
3. Some projects want to manage the dependencies in a different way,
either because of their preference or because of compliance
requirements. For example, some Microsoft teams want to use vcpkg, but
we don't want to force every user of onnxruntime using vcpkg.
7. Someone wants to dynamically link to protobuf, but our build script
only does static link.
8. Hard to handle security vulnerabilities. For example, whenever
protobuf has a security patch, we have a lot of things to do. But if we
allowed people to build ORT with a different version of protobuf without
changing ORT"s source code, the customer who build ORT from source will
be able to act on such things in a quicker way. They will not need to
wait ORT having a patch release.
9. Every time we do a release, github will also publish a source file
zip file and a source file tarball for us. But they are not usable,
because they miss submodules.
### New features
After this change, users will be able to:
1. Build the dependencies in the way they want, then install them to
somewhere(for example, /usr or a temp folder).
2. Or download the dependencies by using cmake commands from these
dependencies official website
3. Similar to the above, but use your private mirrors to migrate supply
chain risks.
4. Use different versions of the dependencies, as long as our source
code is compatible with them. For example, you may use you can't use
protobuf 3.20.x as they need code changes in ONNX Runtime.
6. Only download the things the current build needs.
10. Avoid building external dependencies again and again in every build.
### Breaking change
The onnxruntime_PREFER_SYSTEM_LIB build option is removed you could think from now
it is default ON. If you don't like the new behavior, you can set FETCHCONTENT_TRY_FIND_PACKAGE_MODE to NEVER.
Besides, for who relied on the onnxruntime_PREFER_SYSTEM_LIB build
option, please be aware that this PR will change find_package calls from
Module mode to Config mode. For example, in the past if you have
installed protobuf from apt-get from ubuntu 20.04's official repo,
find_package can find it and use it. But after this PR, it won't. This
is because that protobuf version provided by Ubuntu 20.04 is too old to
support the "config mode". It can be resolved by getting a newer version
of protobuf from somewhere.
2022-12-01 17:51:59 +00:00
|
|
|
target_include_directories(onnxruntime_framework PRIVATE ${dlpack_SOURCE_DIR}/include)
|
training with custom autograd Functions (#7513)
* Register Torch Custom autograd.Function
* Add flag to supress pybind11 warning
* Avoid unnecessary include in cmake
* Add missing reference
* Add getter for registerred functions
* Format for making subsquent changes cleaner
* Fix interop feature build failure
* Forward pass, run PyOP on CPU EP
* clean up the code
* Fix build
* Define new ops
* refactor pyop - extract PyOpLibProxy class
* Hacks to run example
* implement the kernel compute func
* add back PyOP for comparision experiments
* debug info - thread id
* refine the kernels
* Polish code
(cherry picked from commit 4ed606f9a0833592b325a4b40cf917e219845f6f)
* Fix a the Tensor address mismatch in C++ side
* PythonOpGrad compute
* add distributed test case
* refine test cases
* get dist.get_rank() in Autograd forward pass
* Add CUDA kernels
* Store float, int, and tuple of them as PythonOp's attributes
* Populate local changes
* Fix bugs
* PythonOp/PythonOpGrad CUDA kernels
* Support non-tensor inputs
* Single GPU FP16 Run Pass
(cherry picked from commit e539989e91e18ee997900292d3493b97d3eafa8a)
* Fix segement
* add basic test cases
* Save progress
* fix gradient builder for a Add op who have same inputs
* add test cases for auto grad fallback feature
* fix ref cnt issue. add thread id for debugging
* POC: remove interface class
* Remove interface classes
* Clean a bit
* Coarse-grained clean up after rebase master
* reset pyop and language_interop_ops to latest master
* Fix missing part during merge
* re-structure torch related language interop files
* Fix build
* Fix tests and build
* Fix build and basic unit tests
* Fix most of uts
* remove unnecessary import
* clean up and fix build when enabling language_interop_ops
* Fix single-GPU UTs
* Move runner register into ORT package
* Update dist UTs to new style
* Also fix distributed UTs and leaf gradient problem
* Static generation for constant args
* Move arg_positions_ to static field
* Rename some functions
* Move arg ceration into a function
* Clean output logic in PythonOp
* Move PythonOp's ctor
* Revise PythonOpGrad
* Fix "ORT only supports contiguous tensor for now" for inputs
* Fix evaulation mode error, add test & clean up
* clean up codes
* Fix issues introduced by recent master change (enabled symbolic shape infer)
* automatically register forward/backward function pointers && clean up
* Fix multi-output case
* Add a test back
* fix build and clean up
* RAII for function params PyObject
* Use new exporter
* Clean full name in new exporter
* Fix UTs
* Format a file
* Add "inplace" back
Remove a legacy comment
* Refine TorchProxy
1. Make TorchProxy a formal singleton class.
2. Remove unused Scope class.
3. Simplify the call to Forward and Backward. The two functions now
automatically acquire and release GIL state, so user doesn't need
any GIL-related calls.
* Format
* Add lock to avoid racing condition when registering Python objs
* Fix Python call param ref issues && Add RefcountTracker for debug build && Clean up
* clean up print
* Resolve part of comments && clean up
* Fix a potential bug
* track pyobject consistently
* move kernels to cpu provider as base class
* Refactor - 1. Extract PythonOpBase/PythonOpGradBase 2. Implement CPU kernels 3. Test coverage for CPU kernels
* Refine register code
* Add a missing macro
* Release python call result objects with PythonObjectPtr && Add UnRegisterContext && Track PyObject for Debugging && Clena up
* Fix random segfault issue - relasing a wrong ctx pointer for inplace cases
* put ref count in debug macro
* Move GIL out
* Refine tests
* Fix memory leak issue && forward output lifecycle issue:
1. Unregister the OrtValue PythonObject. Currently, the OrtValue shared same buffer with PythonOp/PythonOpGrad's output. So after those kernels outputs are released, the "leaked" OrtValue caused the shared buffer cannot be released.
2. According PyTorch forward+backward execution. The forward outputs (e.g. torch tensors) maintains the context/saved variables/dirty inputs, etc, which are used for backward execution, so its life should be after the backward runs. This change added such a depencencies between PythonOpGrad on PythonOp.
* Move dlpack->ortvalue into C++ to avoid temp object registration
* Fix the over released Py_False/Py_True && refine tests
* Clean up unused functions
* Always assume the first forward output is context so we don't need to test unused cases.
* Fix a memory leak
* move-copy unique_ptr & avoid C-style casting
* Use inplace attribute to determine if input tensors are copied
* Move DlpackCapsuleDestructor's to a common place
* Thread-safe TorchProxy
* Use OrtValue instead of OrtValue*
* Only keep checks for Debug build
* Wrap some long line per comment
* onnx_export_type --> kwargs
* Use requires_grads to create PythonOpGrad's inputs
* add missing files during master merge
* Fix build issue after merge
* Address two comments.
1. Internalize DlpackCapsuleDestructor
2. Change "(" to "]" for describing closed interval.
* Address some comments.
1. "override" -> "overwrite" to avoid using reserved keyword.
2. Call DLPack's helper to create OrtValue for avoiding repeated code.
* Address comments.
1. Pass std::mutex to registeration helpers so their callers don't
have to lock the mutex expclicitly.
2. Rename "func_context_pool_mutex_" to "mutex_". This mutex is the global mutex for OrtTorchFunctionPool.
* Add bridging code to make cuda kernels work with merged master
* put debue macro check within RefCountTracker && use default logger for debug info && remove useless ortvalue_ptr interface && typos && revert unncessary blank line changes
* fix some comments
* Resolve more comments
* Capitalize a word
* use unique_ptr instead of ObjectPointer for PyObject management && add converntion
* Support symbolic shape
* Remove unused variable
* fix build
* Enable function registration for training only && rectify ToDlpack/FromDlpack merge with master.
* Don't add context for non-PythonOp opeartors (for example AtenOp)
* Fix build error
* Polish frontend part.
1. Avoid adding kwargs to ORTModule's ctor
2. Use onnx_export_type rather than kwargs for type safty
3. Fix some build bugs.
* Resolve simpler comments
* Resolve export related comments
* sync master && fix tests && fix non-training build error
* Fix build errors
* add target link lib
* windows build error
* Fix orttraining-linux-ci build
* disable autograd test && clean up
* fix linux orttraining ci build
* try fixing win build error
* Revise append calls in runner
* Enable custom function using a function
* Rename to avoid using reservied keyword
* Use list comprehension
* Set ORT random seed in tests
* Remove print code and fix ctx shape
* [] -> list()
* Move autograd.Function and nn.Module into corresponding functions
* Move test helpers
* Polish dist test a bit. Tried move helpers to helper file but it causes a deadlock.
* trying fix undefined reference
* Context is not managed by global pool
* Polish dist test
* Polish dist test
* Add enable_custom_autograd_function
* Remove enable_custom_autograd_function from ctors
* Add doc strings
* Shorter code
* Address comments
* Add one empty line
* revert a minor and not needed change
* Address comments
* Back to reference
* Fix windows builds
* Fix windows debug build fail to find "'python39_d.lib'"
* fix mac build error
* revert _to_contiguous change
* add debugging tag for orttraining-cpu-ci
* Fix the wrong PYTHON_LIBRARIES which is affected by PYTHON_LIBRARY given in build command
* add debugging info
* Fix the build in this case: PYTHON_LIBDIR: /opt/_internal/cpython-3.7.10/lib, PYTHON_EXECUTABLE: /opt/python/cp37-cp37m/bin/python3, PYTHON_MULTIARCH: x86_64-linux-gnu
PYTHON_LIBRARY_PATH python3.7m
* fix build error due to python lib not found
* Fixes
1. Release PyObject's
2. Not useing deepcopy because we assume autograd.Function's
non-tensor inputs are static (constants) so there should
be no side effect after calling any autograd.Function
multiple times.
* Revert dtoc for decreasing refcnt
* add debugging log
* add debugging tag
* Fix a small leak
* Remove ONNX_FALLTHROUGH flag
* debug tag
* debug tag
* fix builds
* remove debug tag
* fix build
* fix builds
* fix build
* install python3 in centos, in case there is no libpython3.xm.so
* build python so for redhat
* add training cpu specific docker, build python so inside
* revert build-cpython change
* try fixing numpy include issue
* install_deps after re-installing cpython
* fix build && remove debug tag
* install openssl before cpython
* let's say: builds pass!
* add build flag for torch iterop, only enable it when training+Python is enabled
* skip ComputeBroadcastBackwardAxesDynamic for the shared inputs
* fix build
* add debug info for padgrad test
* Fix builds
* Split dlpack_converter into C++ and Python interfaces respecitively. Then different build use them as needed.
* clean up the changes
* fix addsubgradient builder
* Fix builds
* clean up
* clean up
* Address some comments.
1. Use pointer wraper to avoid calling Py_DECREF
2. Remove unregister_* functions
3. Allow repeated registration by skipping those with existing keys
4. Unregister context in PythonOpGrad
* Fix over-released Py_Boolean
Co-authored-by: Wei-Sheng Chin <wschin@outlook.com>
2021-06-07 20:01:21 +00:00
|
|
|
endif()
|
2023-02-07 21:47:48 +00:00
|
|
|
endif()
|
|
|
|
|
if (onnxruntime_USE_MPI)
|
|
|
|
|
target_include_directories(onnxruntime_framework PUBLIC ${MPI_CXX_INCLUDE_DIRS})
|
2020-09-15 05:48:00 +00:00
|
|
|
endif()
|
Improve dependency management (#13523)
## Description
1. Convert some git submodules to cmake external projects
2. Update nsync from
[1.23.0](https://github.com/google/nsync/releases/tag/1.23.0) to
[1.25.0](https://github.com/google/nsync/releases/tag/1.25.0)
3. Update re2 from 2021-06-01 to 2022-06-01
4. Update wil from an old commit to 1.0.220914.1 tag
5. Update gtest to a newer commit so that it can optionally leverage
absl/re2 for parsing command line flags.
The following git submodules are deleted:
1. FP16
2. safeint
3. XNNPACK
4. cxxopts
5. dlpack
7. flatbuffers
8. googlebenchmark
9. json
10. mimalloc
11. mp11
12. pthreadpool
More will come.
## Motivation and Context
There are 3 ways of integrating 3rd party C/C++ libraries into ONNX
Runtime:
1. Install them to a system location, then use cmake's find_package
module to locate them.
2. Use git submodules
6. Use cmake's external projects(externalproject_add).
At first when this project was just started, we considered both option 2
and option 3. We preferred option 2 because:
1. It's easier to handle authentication. At first this project was not
open source, and it had some other non-public dependencies. If we use
git submodule, ADO will handle authentication smoothly. Otherwise we
need to manually pass tokens around and be very careful on not exposing
them in build logs.
2. At that time, cmake fetched dependencies after "cmake" finished
generating vcprojects/makefiles. So it was very difficult to make cflags
consistent. Since cmake 3.11, it has a new command: FetchContent, which
fetches dependencies when it generates vcprojects/makefiles just before
add_subdirectories, so the parent project's variables/settings can be
easily passed to the child projects.
And when the project went on, we had some new concerns:
1. As we started to have more and more EPs and build configs, the number
of submodules grew quickly. For more developers, most ORT submodules are
not relevant to them. They shouldn't need to download all of them.
2. It is impossible to let two different build configs use two different
versions of the same dependency. For example, right now we have protobuf
3.18.3 in the submodules. Then every EP must use the same version.
Whenever we have a need to upgrade protobuf, we need to coordinate
across the whole team and many external developers. I can't manage it
anymore.
3. Some projects want to manage the dependencies in a different way,
either because of their preference or because of compliance
requirements. For example, some Microsoft teams want to use vcpkg, but
we don't want to force every user of onnxruntime using vcpkg.
7. Someone wants to dynamically link to protobuf, but our build script
only does static link.
8. Hard to handle security vulnerabilities. For example, whenever
protobuf has a security patch, we have a lot of things to do. But if we
allowed people to build ORT with a different version of protobuf without
changing ORT"s source code, the customer who build ORT from source will
be able to act on such things in a quicker way. They will not need to
wait ORT having a patch release.
9. Every time we do a release, github will also publish a source file
zip file and a source file tarball for us. But they are not usable,
because they miss submodules.
### New features
After this change, users will be able to:
1. Build the dependencies in the way they want, then install them to
somewhere(for example, /usr or a temp folder).
2. Or download the dependencies by using cmake commands from these
dependencies official website
3. Similar to the above, but use your private mirrors to migrate supply
chain risks.
4. Use different versions of the dependencies, as long as our source
code is compatible with them. For example, you may use you can't use
protobuf 3.20.x as they need code changes in ONNX Runtime.
6. Only download the things the current build needs.
10. Avoid building external dependencies again and again in every build.
### Breaking change
The onnxruntime_PREFER_SYSTEM_LIB build option is removed you could think from now
it is default ON. If you don't like the new behavior, you can set FETCHCONTENT_TRY_FIND_PACKAGE_MODE to NEVER.
Besides, for who relied on the onnxruntime_PREFER_SYSTEM_LIB build
option, please be aware that this PR will change find_package calls from
Module mode to Config mode. For example, in the past if you have
installed protobuf from apt-get from ubuntu 20.04's official repo,
find_package can find it and use it. But after this PR, it won't. This
is because that protobuf version provided by Ubuntu 20.04 is too old to
support the "config mode". It can be resolved by getting a newer version
of protobuf from somewhere.
2022-12-01 17:51:59 +00:00
|
|
|
|
2022-06-09 08:07:30 +00:00
|
|
|
if (onnxruntime_ENABLE_ATEN)
|
2021-07-23 08:53:26 +00:00
|
|
|
# DLPack is a header-only dependency
|
Improve dependency management (#13523)
## Description
1. Convert some git submodules to cmake external projects
2. Update nsync from
[1.23.0](https://github.com/google/nsync/releases/tag/1.23.0) to
[1.25.0](https://github.com/google/nsync/releases/tag/1.25.0)
3. Update re2 from 2021-06-01 to 2022-06-01
4. Update wil from an old commit to 1.0.220914.1 tag
5. Update gtest to a newer commit so that it can optionally leverage
absl/re2 for parsing command line flags.
The following git submodules are deleted:
1. FP16
2. safeint
3. XNNPACK
4. cxxopts
5. dlpack
7. flatbuffers
8. googlebenchmark
9. json
10. mimalloc
11. mp11
12. pthreadpool
More will come.
## Motivation and Context
There are 3 ways of integrating 3rd party C/C++ libraries into ONNX
Runtime:
1. Install them to a system location, then use cmake's find_package
module to locate them.
2. Use git submodules
6. Use cmake's external projects(externalproject_add).
At first when this project was just started, we considered both option 2
and option 3. We preferred option 2 because:
1. It's easier to handle authentication. At first this project was not
open source, and it had some other non-public dependencies. If we use
git submodule, ADO will handle authentication smoothly. Otherwise we
need to manually pass tokens around and be very careful on not exposing
them in build logs.
2. At that time, cmake fetched dependencies after "cmake" finished
generating vcprojects/makefiles. So it was very difficult to make cflags
consistent. Since cmake 3.11, it has a new command: FetchContent, which
fetches dependencies when it generates vcprojects/makefiles just before
add_subdirectories, so the parent project's variables/settings can be
easily passed to the child projects.
And when the project went on, we had some new concerns:
1. As we started to have more and more EPs and build configs, the number
of submodules grew quickly. For more developers, most ORT submodules are
not relevant to them. They shouldn't need to download all of them.
2. It is impossible to let two different build configs use two different
versions of the same dependency. For example, right now we have protobuf
3.18.3 in the submodules. Then every EP must use the same version.
Whenever we have a need to upgrade protobuf, we need to coordinate
across the whole team and many external developers. I can't manage it
anymore.
3. Some projects want to manage the dependencies in a different way,
either because of their preference or because of compliance
requirements. For example, some Microsoft teams want to use vcpkg, but
we don't want to force every user of onnxruntime using vcpkg.
7. Someone wants to dynamically link to protobuf, but our build script
only does static link.
8. Hard to handle security vulnerabilities. For example, whenever
protobuf has a security patch, we have a lot of things to do. But if we
allowed people to build ORT with a different version of protobuf without
changing ORT"s source code, the customer who build ORT from source will
be able to act on such things in a quicker way. They will not need to
wait ORT having a patch release.
9. Every time we do a release, github will also publish a source file
zip file and a source file tarball for us. But they are not usable,
because they miss submodules.
### New features
After this change, users will be able to:
1. Build the dependencies in the way they want, then install them to
somewhere(for example, /usr or a temp folder).
2. Or download the dependencies by using cmake commands from these
dependencies official website
3. Similar to the above, but use your private mirrors to migrate supply
chain risks.
4. Use different versions of the dependencies, as long as our source
code is compatible with them. For example, you may use you can't use
protobuf 3.20.x as they need code changes in ONNX Runtime.
6. Only download the things the current build needs.
10. Avoid building external dependencies again and again in every build.
### Breaking change
The onnxruntime_PREFER_SYSTEM_LIB build option is removed you could think from now
it is default ON. If you don't like the new behavior, you can set FETCHCONTENT_TRY_FIND_PACKAGE_MODE to NEVER.
Besides, for who relied on the onnxruntime_PREFER_SYSTEM_LIB build
option, please be aware that this PR will change find_package calls from
Module mode to Config mode. For example, in the past if you have
installed protobuf from apt-get from ubuntu 20.04's official repo,
find_package can find it and use it. But after this PR, it won't. This
is because that protobuf version provided by Ubuntu 20.04 is too old to
support the "config mode". It can be resolved by getting a newer version
of protobuf from somewhere.
2022-12-01 17:51:59 +00:00
|
|
|
set(DLPACK_INCLUDE_DIR ${dlpack_SOURCE_DIR}/include)
|
2021-07-23 08:53:26 +00:00
|
|
|
target_include_directories(onnxruntime_framework PRIVATE ${DLPACK_INCLUDE_DIR})
|
|
|
|
|
endif()
|
2023-01-11 18:03:14 +00:00
|
|
|
onnxruntime_add_include_to_target(onnxruntime_framework onnxruntime_common onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers safeint_interface Boost::mp11 nlohmann_json::nlohmann_json)
|
2021-12-08 01:56:58 +00:00
|
|
|
|
|
|
|
|
if (onnxruntime_USE_MIMALLOC)
|
|
|
|
|
target_link_libraries(onnxruntime_framework mimalloc-static)
|
|
|
|
|
endif()
|
|
|
|
|
|
2023-05-21 01:07:39 +00:00
|
|
|
if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
|
2022-04-27 15:57:52 +00:00
|
|
|
target_link_libraries(onnxruntime_framework ${ABSEIL_LIBS})
|
2022-01-24 18:40:46 +00:00
|
|
|
endif()
|
|
|
|
|
|
2018-11-20 00:48:22 +00:00
|
|
|
set_target_properties(onnxruntime_framework PROPERTIES FOLDER "ONNXRuntime")
|
|
|
|
|
# need onnx to build to create headers that this project includes
|
|
|
|
|
add_dependencies(onnxruntime_framework ${onnxruntime_EXTERNAL_DEPENDENCIES})
|
|
|
|
|
|
2020-11-21 01:39:57 +00:00
|
|
|
# In order to find the shared provider libraries we need to add the origin to the rpath for all executables we build
|
|
|
|
|
# For the shared onnxruntime library, this is set in onnxruntime.cmake through CMAKE_SHARED_LINKER_FLAGS
|
|
|
|
|
# But our test files don't use the shared library so this must be set for them.
|
|
|
|
|
# For Win32 it generates an absolute path for shared providers based on the location of the executable/onnxruntime.dll
|
2023-05-21 01:07:39 +00:00
|
|
|
if (UNIX AND NOT APPLE AND NOT onnxruntime_MINIMAL_BUILD AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
|
2020-11-21 01:39:57 +00:00
|
|
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath='$ORIGIN'")
|
|
|
|
|
endif()
|
|
|
|
|
|
2021-08-21 07:40:12 +00:00
|
|
|
if (onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS_ENABLE_DUMP_TO_SQLDB)
|
2023-05-21 01:07:39 +00:00
|
|
|
find_package (SQLite3 REQUIRED)
|
|
|
|
|
include_directories(${SQLite3_INCLUDE_DIR})
|
|
|
|
|
target_link_libraries (onnxruntime_framework ${SQLite3_LIBRARY})
|
2021-08-21 07:40:12 +00:00
|
|
|
target_compile_definitions(onnxruntime_framework PRIVATE DEBUG_NODE_INPUTS_OUTPUTS_ENABLE_DUMP_TO_SQLDB)
|
2019-06-12 20:47:50 +00:00
|
|
|
endif()
|
|
|
|
|
|
2022-02-11 03:17:08 +00:00
|
|
|
if (WIN32)
|
|
|
|
|
target_compile_definitions(onnxruntime_framework PRIVATE _SCL_SECURE_NO_WARNINGS)
|
|
|
|
|
endif()
|
|
|
|
|
|
2022-04-04 05:37:18 +00:00
|
|
|
if (NOT onnxruntime_BUILD_SHARED_LIB)
|
2023-06-20 05:20:31 +00:00
|
|
|
install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/framework DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core)
|
|
|
|
|
install(TARGETS onnxruntime_framework
|
2022-04-04 05:37:18 +00:00
|
|
|
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
|
|
|
|
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
|
|
|
|
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
|
|
|
|
FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
|
|
|
|
|
endif()
|