onnxruntime/tools/ci_build/build.py

2119 lines
90 KiB
Python
Raw Normal View History

2018-11-20 00:48:22 +00:00
#!/usr/bin/env python3
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import argparse
import glob
import os
import re
import shutil
import subprocess
import sys
2018-12-11 03:15:03 +00:00
import hashlib
import platform
from logger import get_logger
from amd_hipify import amd_hipify
2020-12-02 04:57:44 +00:00
from distutils.version import LooseVersion
2020-04-19 03:48:30 +00:00
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
REPO_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", ".."))
sys.path.append(os.path.join(REPO_DIR, "tools", "python"))
from util import run # noqa: E402
log = get_logger("build")
class BaseError(Exception):
"""Base class for errors originating from build.py."""
pass
2020-04-19 03:48:30 +00:00
class BuildError(BaseError):
"""Error from running build steps."""
def __init__(self, *messages):
super().__init__("\n".join(messages))
2020-04-19 03:48:30 +00:00
class UsageError(BaseError):
"""Usage related error."""
def __init__(self, message):
super().__init__(message)
2020-04-19 03:48:30 +00:00
def _check_python_version():
# According to the BUILD.md, python 3.5+ is required:
2020-04-19 03:48:30 +00:00
# Python 2 is definitely not supported and it should be safer to consider
# it won't run with python 4:
if sys.version_info[0] != 3:
raise BuildError(
"Bad python major version: expecting python 3, found version "
"'{}'".format(sys.version))
if sys.version_info[1] < 5:
raise BuildError(
"Bad python minor version: expecting python 3.5+, found version "
"'{}'".format(sys.version))
2020-04-19 03:48:30 +00:00
_check_python_version()
2018-11-20 00:48:22 +00:00
def _openvino_verify_device_type(device_read):
choices = ["CPU_FP32", "GPU_FP32", "GPU_FP16", "VAD-M_FP16", "MYRIAD_FP16", "VAD-F_FP32"]
status_hetero = True
res = False
if (device_read in choices):
res = True
elif (device_read.startswith("HETERO:") or device_read.startswith("MULTI:")):
res = True
comma_separated_devices = device_read.split(":")
comma_separated_devices = comma_separated_devices[1].split(',')
if (len(comma_separated_devices) < 2):
print("Atleast two devices required in Hetero Mode")
status_hetero = False
dev_options = ["CPU", "GPU", "MYRIAD", "FPGA", "HDDL"]
for dev in comma_separated_devices:
if (dev not in dev_options):
status_hetero = False
break
def invalid_hetero_build():
print("\n" + "If trying to build Hetero or Multi, specifiy the supported devices along with it." + + "\n")
print("specify the keyword HETERO or MULTI followed by the devices ")
print("in the order of priority you want to build" + "\n")
print("The different hardware devices that can be added in HETERO or MULTI")
print("are ['CPU','GPU','MYRIAD','FPGA','HDDL']" + "\n")
print("An example of how to specify the hetero build type. Ex: HETERO:GPU,CPU" + "\n")
print("An example of how to specify the MULTI build type. Ex: MULTI:MYRIAD,CPU" + "\n")
sys.exit("Wrong Build Type selected")
if (res is False):
print("\n" + "You have selcted wrong configuration for the build.")
print("pick the build type for specific Hardware Device from following options: ", choices)
print("\n")
if not (device_read.startswith("HETERO:") or device_read.startswith("MULTI:")):
invalid_hetero_build()
sys.exit("Wrong Build Type selected")
if (status_hetero is False):
invalid_hetero_build()
return device_read
2020-04-19 03:48:30 +00:00
def parse_arguments():
parser = argparse.ArgumentParser(
description="ONNXRuntime CI build driver.",
usage=""" # noqa
Default behavior is --update --build --test for native architecture builds.
Default behavior is --update --build for cross-compiled builds.
The Update phase will update git submodules, and run cmake to generate makefiles.
The Build phase will build all projects.
The Test phase will run all unit tests, and optionally the ONNX tests.
Use the individual flags to only run the specified stages.
""")
2018-11-20 00:48:22 +00:00
# Main arguments
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--build_dir", required=True, help="Path to the build directory.")
parser.add_argument(
"--config", nargs="+", default=["Debug"],
choices=["Debug", "MinSizeRel", "Release", "RelWithDebInfo"],
help="Configuration(s) to build.")
parser.add_argument(
"--update", action='store_true', help="Update makefiles.")
2018-11-20 00:48:22 +00:00
parser.add_argument("--build", action='store_true', help="Build.")
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--clean", action='store_true',
help="Run 'cmake --build --target clean' for the selected config/s.")
parser.add_argument(
"--parallel", nargs='?', const='0', default='1', type=int,
help="Use parallel build. The optional value specifies the maximum number of parallel jobs. "
"If the optional value is 0 or unspecified, it is interpreted as the number of CPUs.")
2018-11-20 00:48:22 +00:00
parser.add_argument("--test", action='store_true', help="Run unit tests.")
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--skip_tests", action='store_true', help="Skip all tests.")
2018-11-20 00:48:22 +00:00
# Training options
parser.add_argument(
"--enable_nvtx_profile", action='store_true', help="Enable NVTX profile in ORT.")
parser.add_argument(
"--enable_training", action='store_true', help="Enable training in ORT.")
parser.add_argument(
"--enable_training_python_frontend_e2e_tests", action="store_true",
help="Enable the pytorch frontend training tests.")
parser.add_argument(
"--enable_training_pipeline_e2e_tests", action="store_true",
help="Enable the pipeline c++ e2e tests.")
parser.add_argument(
"--use_horovod", action='store_true', help="Enable Horovod.")
parser.add_argument(
"--disable_nccl", action='store_true', help="Disable Nccl.")
parser.add_argument(
"--mpi_home", help="Path to MPI installation dir")
parser.add_argument(
"--nccl_home", help="Path to NCCL installation dir")
2018-11-20 00:48:22 +00:00
# enable ONNX tests
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--enable_onnx_tests", action='store_true',
help="""When running the Test phase, run onnx_test_running against
available test data directories.""")
parser.add_argument("--path_to_protoc_exe", help="Path to protoc exe.")
parser.add_argument(
"--fuzz_testing", action='store_true', help="Enable Fuzz testing of the onnxruntime.")
parser.add_argument(
"--enable_symbolic_shape_infer_tests", action='store_true',
help="""When running the Test phase, run symbolic shape inference against
available test data directories.""")
# generate documentaiton
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--gen_doc", action='store_true',
help="Generate documentation on contrib ops")
2018-11-20 00:48:22 +00:00
# CUDA related
parser.add_argument("--use_cuda", action='store_true', help="Enable CUDA.")
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--cuda_version", help="The version of CUDA toolkit to use. "
"Auto-detect if not specified. e.g. 9.0")
parser.add_argument(
"--cuda_home", help="Path to CUDA home."
"Read from CUDA_HOME environment variable if --use_cuda is true and "
"--cuda_home is not specified.")
parser.add_argument(
"--cudnn_home", help="Path to CUDNN home. "
"Read from CUDNN_HOME environment variable if --use_cuda is true and "
"--cudnn_home is not specified.")
2018-11-20 00:48:22 +00:00
# Python bindings
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--enable_pybind", action='store_true', help="Enable Python Bindings.")
parser.add_argument(
"--build_wheel", action='store_true', help="Build Python Wheel.")
parser.add_argument(
"--wheel_name_suffix", help="Suffix to append to created wheel names. "
"This value is currently only used for nightly builds.")
parser.add_argument(
"--numpy_version", help="Installs a specific version of numpy "
"before building the python binding.")
parser.add_argument(
"--skip-keras-test", action='store_true',
help="Skip tests with Keras if keras is installed")
2018-11-20 00:48:22 +00:00
# C-Sharp bindings
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--build_csharp", action='store_true',
help="Build C#.Net DLL and NuGet package. This should be only used in CI pipelines. "
"For building C# bindings and packaging them into nuget package use --build_nuget arg.")
parser.add_argument(
"--build_nuget", action='store_true',
help="Build C#.Net DLL and NuGet package on the local machine. "
"Currently only Windows and Linux platforms are supported.")
2018-11-20 00:48:22 +00:00
2019-12-06 19:43:40 +00:00
# Java bindings
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--build_java", action='store_true', help="Build Java bindings.")
2018-11-20 00:48:22 +00:00
# Node.js binding
parser.add_argument(
"--build_nodejs", action='store_true',
help="Build Node.js binding and NPM package.")
2018-11-20 00:48:22 +00:00
# Build a shared lib
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--build_shared_lib", action='store_true',
help="Build a shared library for the ONNXRuntime.")
2018-11-20 00:48:22 +00:00
# Build options
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--cmake_extra_defines", nargs="+",
help="Extra definitions to pass to CMake during build system "
"generation. These are just CMake -D options without the leading -D.")
parser.add_argument(
"--target",
help="Build a specific target, e.g. winml_dll")
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--x86", action='store_true',
help="Create x86 makefiles. Requires --update and no existing cache "
"CMake setup. Delete CMakeCache.txt if needed")
parser.add_argument(
"--arm", action='store_true',
help="Create ARM makefiles. Requires --update and no existing cache "
"CMake setup. Delete CMakeCache.txt if needed")
parser.add_argument(
"--arm64", action='store_true',
help="Create ARM64 makefiles. Requires --update and no existing cache "
"CMake setup. Delete CMakeCache.txt if needed")
parser.add_argument(
"--msvc_toolset", help="MSVC toolset to use. e.g. 14.11")
parser.add_argument("--android", action='store_true', help='Build for Android')
parser.add_argument(
"--android_abi", default="arm64-v8a",
choices=["armeabi-v7a", "arm64-v8a", "x86", "x86_64"],
help="Specify the target Android Application Binary Interface (ABI)")
parser.add_argument("--android_api", type=int, default=27, help='Android API Level, e.g. 21')
parser.add_argument("--android_sdk_path", type=str, help='Path to the Android SDK')
parser.add_argument("--android_ndk_path", default="", help="Path to the Android NDK")
parser.add_argument("--android_cpp_shared", action="store_true",
help="Build with shared libc++ instead of the default static libc++.")
parser.add_argument("--test_binary_size", action="store_true",
help="If enabled, build will fail when the built binary size is larger than the threshold. "
"This only applies to Android Minimal build for now.")
2018-11-20 00:48:22 +00:00
parser.add_argument("--ios", action='store_true', help="build for ios")
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--ios_sysroot", default="",
help="Specify the location name of the macOS platform SDK to be used")
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--ios_toolchain_dir", default="",
help="Path to ios toolchain binaries")
parser.add_argument(
"--ios_toolchain_file", default="",
help="Path to ios toolchain file, "
"or cmake/onnxruntime_ios.toolchain.cmake will be used")
parser.add_argument(
"--xcode_code_signing_team_id", default="",
help="The development team ID used for code signing in Xcode")
parser.add_argument(
"--use_xcode", action='store_true',
help="Use Xcode as cmake generator, this is only supported on MacOS.")
parser.add_argument(
"--osx_arch",
default="arm64" if platform.machine() == "arm64" else "x86_64",
choices=["arm64", "x86_64"],
help="Specify the Target specific architectures for macOS and iOS, This is only supported on MacOS")
parser.add_argument(
"--apple_deploy_target", type=str,
help="Specify the minimum version of the target platform "
"(e.g. macOS or iOS)"
"This is only supported on MacOS")
2018-11-20 00:48:22 +00:00
# Arguments needed by CI
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--cmake_path", default="cmake", help="Path to the CMake program.")
parser.add_argument(
"--ctest_path", default="ctest", help="Path to the CTest program.")
parser.add_argument(
"--skip_submodule_sync", action='store_true', help="Don't do a "
"'git submodule update'. Makes the Update phase faster.")
parser.add_argument(
"--use_vstest", action='store_true',
help="Use use_vstest for running unitests.")
parser.add_argument(
"--use_jemalloc", action='store_true', help="Use jemalloc.")
parser.add_argument(
"--use_mimalloc", default=['none'],
choices=['none', 'stl', 'arena', 'all'], help="Use mimalloc.")
parser.add_argument(
"--use_openblas", action='store_true', help="Build with OpenBLAS.")
parser.add_argument(
"--use_dnnl", action='store_true', help="Build with DNNL.")
2020-11-13 04:17:54 +00:00
parser.add_argument(
"--dnnl_gpu_runtime", action='store', default='', type=str.lower,
help="e.g. --dnnl_gpu_runtime ocl")
parser.add_argument(
"--dnnl_opencl_root", action='store', default='',
help="Path to OpenCL SDK. "
"e.g. --dnnl_opencl_root \"C:/Program Files (x86)/IntelSWTools/sw_dev_tools/OpenCL/sdk\"")
parser.add_argument(
"--use_mklml", action='store_true', help="Build with MKLML.")
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--use_featurizers", action='store_true',
help="Build with ML Featurizer support.")
parser.add_argument(
"--use_openvino", nargs="?", const="CPU_FP32",
type=_openvino_verify_device_type,
2020-04-19 03:48:30 +00:00
help="Build with OpenVINO for specific hardware.")
The initial PR for NNAPI EP (#4287) * Move nnapi dnnlib to subfolder * dnnlib compile settings * add nnapi buildin build.py * add onnxruntime_USE_NNAPI_BUILTIN * compile using onnxruntime_USE_NNAPI_BUILTIN * remove dnnlib from built in code * Group onnxruntime_USE_NNAPI_BUILTIN sources * add file stubs * java 32bit compile error * built in nnapi support 5-26 * init working version * initializer support * fix crash on free execution * add dynamic input support * bug fixes for dynamic input shape, add mul support, working on conv and batchnorm * Add batchnormalization, add overflow check for int64 attributes * add global average/max pool and reshape * minor changes * minor changes * add skip relu and options to use different type of memory * small bug fix for in operator relu * bug fix for nnapi * add transpose support, minor bug fix * Add transpose support * minor bug fixes, depthwise conv weight fix * fixed the bug where the onnx model input has mismatch order than the nnapi model input * add helper to add scalar operand * add separated opbuilder to handle single operator * add cast operator * fixed reshape, moved some logs to verbose * Add softmax and identity support, change shaper calling signature, and add support for int32 output * changed the way to execute the NNAPI * move NNMemory and InputOutputInfo into Model class * add limited support for input dynamic shape * add gemm support, fixed crash when allocating big array on stack * add abs/exp/floor/log/sigmoid/neg/sin/sqrt/tanh support * better dynamic input shape support; * add more check for IsOpSupportedImpl, refactored some code * some code style fix, switch to safeint * Move opbuilders to a map with single instance, minor bug fixes * add GetUniqueName for new temp tensors * change from throw std to ort_throw * build settings change and 3rd party notice update * add readme for nnapi_lib, move to ort log, add comments to public functions, clean the code * add android log sink and more logging changes, add new string for NnApiErrorDescription * add nnapi execution options/fp16 relax * fix a dnnlibrary build break * addressed review comments * address review comments, changed adding output for subgraph in NnapiExecutionProvider::GetCapability, minor issue fixes * formatting in build.py * more formatting fix in build.py, return fail status instead of throw in compute_func * moved android_log_sink to platform folder, minor coding style changes * addressed review comments
2020-06-26 07:02:39 +00:00
parser.add_argument(
"--use_nnapi", action='store_true', help="Build with NNAPI support.")
parser.add_argument(
"--nnapi_min_api", type=int,
help="Minimum Android API level to enable NNAPI, should be no less than 27")
parser.add_argument(
"--use_rknpu", action='store_true', help="Build with RKNPU.")
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--use_preinstalled_eigen", action='store_true',
help="Use pre-installed Eigen.")
parser.add_argument("--eigen_path", help="Path to pre-installed Eigen.")
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--use_openmp", action='store_true', help="Build with OpenMP")
parser.add_argument(
"--enable_msinternal", action="store_true",
help="Enable for Microsoft internal builds only.")
2018-11-20 00:48:22 +00:00
parser.add_argument("--llvm_path", help="Path to llvm dir")
parser.add_argument(
"--use_vitisai", action='store_true', help="Build with Vitis-AI")
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--use_nuphar", action='store_true', help="Build with nuphar")
parser.add_argument(
"--use_tensorrt", action='store_true', help="Build with TensorRT")
parser.add_argument(
"--tensorrt_home", help="Path to TensorRT installation dir")
parser.add_argument(
"--use_migraphx", action='store_true', help="Build with MIGraphX")
parser.add_argument(
"--migraphx_home", help="Path to MIGraphX installation dir")
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--use_full_protobuf", action='store_true',
help="Use the full protobuf library")
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--skip_onnx_tests", action='store_true', help="Explicitly disable "
"all onnx related tests. Note: Use --skip_tests to skip all tests.")
parser.add_argument(
"--skip_winml_tests", action='store_true',
help="Explicitly disable all WinML related tests")
parser.add_argument(
"--skip_nodejs_tests", action='store_true',
help="Explicitly disable all Node.js binding tests")
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--enable_msvc_static_runtime", action='store_true',
help="Enable static linking of MSVC runtimes.")
parser.add_argument(
"--enable_language_interop_ops", action='store_true',
help="Enable operator implemented in language other than cpp")
parser.add_argument(
"--cmake_generator",
choices=['Visual Studio 15 2017', 'Visual Studio 16 2019', 'Ninja'],
default='Visual Studio 15 2017' if is_windows() else None,
2020-04-19 03:48:30 +00:00
help="Specify the generator that CMake invokes. "
"This is only supported on Windows")
parser.add_argument(
"--enable_multi_device_test", action='store_true',
help="Test with multi-device. Mostly used for multi-device GPU")
parser.add_argument(
"--use_dml", action='store_true', help="Build with DirectML.")
parser.add_argument(
"--use_winml", action='store_true', help="Build with WinML.")
parser.add_argument(
"--winml_root_namespace_override", type=str,
help="Specify the namespace that WinML builds into.")
parser.add_argument(
"--use_telemetry", action='store_true',
help="Only official builds can set this flag to enable telemetry.")
parser.add_argument(
"--enable_wcos", action='store_true',
help="Build for Windows Core OS.")
Create the Nuget WindowsAI Pipeline (#3684) * add windowsai.yml for new Microsoft.AI.MachineLearning nuget * temporarily add windowsai.yml to gpu.yml * pass in build arch * remove install onnx task * no dml for arm or arm64 * refactor nuget pipeline defs * update package creation * pass in build and sources path * missing hyphens * copy license file * fix parameter variable * disable arm builds for now * remove commented script block * download pipeline atifcat name update * set working dir * Add bundling nuget script * path combine * null path * combine needs parentheses * binplace microsoft.* dlls in new nuget package * update artifact name * move merged nuget to artifacts directory * move to merged subfolder in artifacts staging dir * forward slash to back * enable arm * vcvarsall needs x64 vars setup * Run Tests * fix tests * move global variables * update yml to not have global variable in template * removed parameters * fixes * Add build arch as an env variable * ne not neq * %Var% for batch script * dont pass argument for x64 * disable arm tests * skip csharp/cxx tests for microsoft nuget package * remove test-win as it tests only c# cxx and capi * test build for store apps * dont build for store * tools/nuget/generate_nuspec_for_native_nuget.py * remove args. * add new props and targets for microsoft.ai * make windowsai props/targets static * add dependency * dont ship dot net props * Remove c# fom windowsai nuget * copy license file * native packages must have win10 as the platform, not win * cuda header in wrong if branch * no dml for arm builds * only build dml for x64/ x86 * User/sheilk/props update (#3616) * prelim store work * props * Fix desktop nuget props/targets * clean up targets and make store apps work Co-authored-by: Sheil Kumar <sheilk@microsoft.com> * update windowsai.yml with latest * remove extra dloadhelpers * Add abi headers to abi dir, and reference native includes * update windowsai.yml * minor update * remove parameters * add doesrp param * hard code esrp to true * add directml for x86/x64 * revert gpu yml changes * add store builds * add store builds * add checks again in old way * dup job names for store and desktop builds * move all of the runtime binaries to win10 folder * only set safeseh on x86 * disable the store builds for now... missing msvcprt.lib * copy paste deletion... * switch back to win- (#3646) Co-authored-by: Sheil Kumar <sheilk@microsoft.com> * use stahlworks * & not supported in ado * add cuda to cpu nuget(???) and EnableDelayedExpansion to enable x86 dml package * revert nocontribops * add underscore... * extra win/win10 change * merged nuget... still not being bundled... * files in merged directory * missing parens causing dml to be included in cpu package * more diagnostic info * switch dir to get-childitem * wait for compression to complete * add winml_adapter to mkml and gpu packages * enable_wcos * add mklml binaries * props and targets missing from mklml Co-authored-by: Sheil Kumar <sheilk@microsoft.com>
2020-04-25 03:20:04 +00:00
parser.add_argument(
"--enable_windows_store", action='store_true',
help="Build for Windows Store")
2020-04-19 03:48:30 +00:00
parser.add_argument(
"--enable_lto", action='store_true',
help="Enable Link Time Optimization")
parser.add_argument(
"--use_acl", nargs="?", const="ACL_1905",
choices=["ACL_1902", "ACL_1905", "ACL_1908", "ACL_2002"],
help="Build with ACL for ARM architectures.")
parser.add_argument(
"--acl_home", help="Path to ACL home dir")
parser.add_argument(
"--acl_libs", help="Path to ACL libraries")
parser.add_argument(
"--use_armnn", action='store_true',
help="Enable ArmNN Execution Provider.")
parser.add_argument(
"--armnn_relu", action='store_true',
help="Use the Relu operator implementation from the ArmNN EP.")
parser.add_argument(
"--armnn_bn", action='store_true',
help="Use the Batch Normalization operator implementation from the ArmNN EP.")
parser.add_argument(
"--armnn_home", help="Path to ArmNN home dir")
parser.add_argument(
"--armnn_libs", help="Path to ArmNN libraries")
parser.add_argument(
"--build_micro_benchmarks", action='store_true',
help="Build ONNXRuntime micro-benchmarks.")
# options to reduce binary size
parser.add_argument("--minimal_build", action='store',
const='on', default='off', nargs='?', type=str.lower,
help="Create a build that only supports ORT format models. "
"See /docs/ONNX_Runtime_Format_Model_Usage.md for more information. "
"RTTI is automatically disabled in a minimal build. "
"To enable execution providers that compile kernels at runtime (e.g. NNAPI) pass 'extended' "
"as a parameter. e.g. '--minimal_build extended'.")
parser.add_argument("--include_ops_by_model", type=str, help="include ops from model(s) under designated path.")
parser.add_argument("--include_ops_by_config", type=str,
help="include ops from config file. "
"See /docs/Reduced_Operator_Kernel_build.md for more information.")
parser.add_argument("--disable_contrib_ops", action='store_true',
help="Disable contrib ops (reduces binary size)")
parser.add_argument("--disable_ml_ops", action='store_true',
help="Disable traditional ML ops (reduces binary size)")
parser.add_argument("--disable_rtti", action='store_true', help="Disable RTTI (reduces binary size)")
parser.add_argument("--disable_exceptions", action='store_true',
help="Disable exceptions to reduce binary size. Requires --minimal_build.")
parser.add_argument("--disable_ort_format_load", action='store_true',
help='Disable support for loading ORT format models in a non-minimal build.')
parser.add_argument("--use_rocm", action='store_true', help="Build with ROCm")
parser.add_argument("--rocm_home", help="Path to ROCm installation dir")
Android code coverage (#6061) * Added Onnxruntime_GCOV_COVERAGE flag for Android. * Set CMAKE_SYSTEM_NAME explicityly for Android. * Added GCOV_PREFIX option to collect code coverage data. Added a new python script to generate code coverage info. Modified build pipeline to geneate Android code coverage info * Added build command line option --android_coverage * Added a comment describing the GCOV environment variables * Fixed PEP8 issues. * Added --android_coverage option to the build command. * Increased Android emulator memory from 3K to 8K. * Increased Android partition-size from 2GB to 4GB to overcome no-space-left-on-device error * Removed source_dir from command line args. * Use cwd absolute path to run tests. * Added commands to output the contents of /data/local/tmp on the emulator. * Added run_adb_shell function. * Format changes. * Removed keywd argument cwd. * Removed Android in the --build_dir path. * Removed commands added for debugging. * Removed exxtra new-lines. * Fix MacOs build pipeline failures by uninstalling openssl before running build script. * Revert "Fix MacOs build pipeline failures by uninstalling openssl before running build script." This reverts commit 90d0568fe533e9456c20d061a2d435c8fea48266. * Change dir to the build directory where the tar file is copied. * Changed the option from --android_coverage to --code_coverage * Moved steps to generate Android code coverage to run_nnap_code_coverage.sh * Require --android option if --code_coverage is specified. * No code coverage needed for onnx_test_runner. * Expect that the emulator is running when the script is executed. * Fixed the title in the buildpipeline step. * Fixed the formatting issue. * Added a command line argument, ORT_ROOT, to run_nnapi_code_coverage.sh script Co-authored-by: Satya Jandhyala <satyajandhyala@Satyas-Mac-mini.local>
2020-12-08 18:55:02 +00:00
# Code coverage
parser.add_argument("--code_coverage", action='store_true',
help="Generate code coverage when targetting Android (only).")
2018-11-20 00:48:22 +00:00
return parser.parse_args()
2020-04-19 03:48:30 +00:00
def resolve_executable_path(command_or_path):
"""Returns the absolute path of an executable."""
executable_path = shutil.which(command_or_path)
if executable_path is None:
2020-04-19 03:48:30 +00:00
raise BuildError("Failed to resolve executable path for "
"'{}'.".format(command_or_path))
return os.path.realpath(executable_path)
2020-04-19 03:48:30 +00:00
2018-11-20 00:48:22 +00:00
def is_windows():
return sys.platform.startswith("win")
2020-04-19 03:48:30 +00:00
def is_macOS():
return sys.platform.startswith("darwin")
def is_linux():
return sys.platform.startswith("linux")
def get_linux_distro():
try:
with open('/etc/os-release', 'r') as f:
2020-04-19 03:48:30 +00:00
dist_info = dict(
line.strip().split('=', 1) for line in f.readlines())
return dist_info.get('NAME', '').strip('"'), dist_info.get(
'VERSION', '').strip('"')
except (IOError, ValueError):
return '', ''
2020-04-19 03:48:30 +00:00
2018-11-20 00:48:22 +00:00
def is_ubuntu_1604():
dist, ver = get_linux_distro()
return dist == 'Ubuntu' and ver.startswith('16.04')
2018-11-20 00:48:22 +00:00
2020-04-19 03:48:30 +00:00
2018-11-20 00:48:22 +00:00
def get_config_build_dir(build_dir, config):
# build directory per configuration
return os.path.join(build_dir, config)
2020-04-19 03:48:30 +00:00
def run_subprocess(args, cwd=None, capture_stdout=False, dll_path=None,
2020-04-19 03:48:30 +00:00
shell=False, env={}):
if isinstance(args, str):
raise ValueError("args should be a sequence of strings, not a string")
2018-11-20 00:48:22 +00:00
my_env = os.environ.copy()
if dll_path:
if is_windows():
my_env["PATH"] = dll_path + os.pathsep + my_env["PATH"]
2018-11-20 00:48:22 +00:00
else:
if "LD_LIBRARY_PATH" in my_env:
my_env["LD_LIBRARY_PATH"] += os.pathsep + dll_path
else:
my_env["LD_LIBRARY_PATH"] = dll_path
my_env.update(env)
return run(*args, cwd=cwd, capture_stdout=capture_stdout, shell=shell, env=my_env)
2018-11-20 00:48:22 +00:00
2020-04-19 03:48:30 +00:00
2018-11-20 00:48:22 +00:00
def update_submodules(source_dir):
run_subprocess(["git", "submodule", "sync", "--recursive"], cwd=source_dir)
2020-04-19 03:48:30 +00:00
run_subprocess(["git", "submodule", "update", "--init", "--recursive"],
cwd=source_dir)
2018-11-20 00:48:22 +00:00
def is_docker():
path = '/proc/self/cgroup'
return (
os.path.exists('/.dockerenv') or
os.path.isfile(path) and any('docker' in line for line in open(path))
)
2020-04-19 03:48:30 +00:00
2018-11-20 00:48:22 +00:00
def is_sudo():
return 'SUDO_UID' in os.environ.keys()
2020-04-19 03:48:30 +00:00
2018-11-20 00:48:22 +00:00
def install_apt_package(package):
2020-04-19 03:48:30 +00:00
have = package in str(run_subprocess(
["apt", "list", "--installed", package], capture_stdout=True).stdout)
2018-11-20 00:48:22 +00:00
if not have:
if is_sudo():
run_subprocess(['apt-get', 'install', '-y', package])
else:
2020-04-19 03:48:30 +00:00
raise BuildError(package + " APT package missing. Please re-run "
"this script using sudo to install.")
2018-11-20 00:48:22 +00:00
def install_ubuntu_deps(args):
2020-04-19 03:48:30 +00:00
"""Check if the necessary Ubuntu dependencies are installed.
Not required on docker. Provide help output if missing."""
2018-11-20 00:48:22 +00:00
# check we need the packages first
if not (args.enable_pybind or args.use_openblas):
return
# not needed on docker as packages are pre-installed
if not is_docker():
try:
if args.enable_pybind:
install_apt_package("python3")
if args.use_openblas:
install_apt_package("libopenblas-dev")
except Exception as e:
2020-04-19 03:48:30 +00:00
raise BuildError("Error setting up required APT packages. "
"{}".format(str(e)))
2018-11-20 00:48:22 +00:00
def install_python_deps(numpy_version=""):
dep_packages = ['setuptools', 'wheel', 'pytest']
2020-04-19 03:48:30 +00:00
dep_packages.append('numpy=={}'.format(numpy_version) if numpy_version
else 'numpy>=1.16.6')
dep_packages.append('sympy>=1.1')
dep_packages.append('packaging')
Add new PytTrch front-end (#4815) * Add ORTTrainerOptions class for the new pytorch frontend (#4382) Add ORTTrainerOptions class and some placeholders * Add _ORTTrainerModelDesc to perform validation for model description (#4416) * Add Loss Scaler classes to the new frontend (#4306) * Add TrainStepInfo used on the new frontend API (#4256) * Add Optimizer classes to the new frontend (#4280) * Add LRScheduler implementation (#4357) * Add basic ORTTrainer API (#4435) This PR presents the public API for ORTTrainer for the short term development. It also validates and saves input parameters, which will be used in the next stages, such as building ONNX model, post processing the model and configuring the training session * Add opset_version into ORTTrainerOptions and change type of ORTTrainer.loss_fn (#4592) * Update ModelDescription and minor fix on ORTTrainer ctor (#4605) * Update ModelDescription and minor fix on ORTTrainer/ORTTrainerOptions This PR keeps the public API intact, but changes how model description is stored on the backend Currently, users creates a dict with two lists of tuples. One list called 'inputs' and each tuple has the following format tuple(name, shape). The second list is called 'outputs' and each tuple can be either tuple(name, shape) or tuple(name, shape, is_loss). With this PR, when this dict is passed in to ORTTrainer, it is fully validated as usual. However, tuples are internally replaced by namedtuples and all output tuples will have tuple(name, shape, is_loss) format instead of is_loss being optionally present. Additionally to that normalization in the internal representation (which eases coding), two internal methods were created to replace a namedtuple(name, shape) to namedtuple(name, shape, dtype) or namedtuple(name, shape, is_loss, dtype) dependeing whether the tuple is an input or output. This is necessary as ORTTRainer finds out data types of each input/output during model export to onnx. Finally, a minor fix was done on ORTTrainer. It could initialize ORTTrainerOptions incorrectly when options=None * Rename input name for test * Add ONNX Model Export to New Frontend (#4612) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> * Create training session + minor improvements (#4668) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Save ONNX model in file (#4671) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Add eval step (#4674) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Add train_step (#4677) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Add LR Scheduler (#4694) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> * Add deterministic compute tests (#4716) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> * Add legacy vs experimental ORTTrainer accuracy comparison (#4727) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> * Add Mixed precision/LossScaler + several fixes (#4739) Additionally to the mixed precision/loss scaler code, this PR includes: * Fix CUDA training * Add optimization_step into TrainStepInfo class * Refactor LRSCheduler to use optimization_step instead of step * Updated several default values at ORTTrainerOptions * Add initial Gradient Accumulation supported. Untested * Fix ONNX model post processing * Refactor unit tests * Add ONNX BERT example + minor fixes (#4757) * Fix training issue when passing ONNX file into ORTTrainer Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Add Dynamic Shape support (#4758) * Update DeepSpeed Zero Stage option to a separate option group (#4772) * Add support to fetches (#4777) * Add Gradient Accumulation Steps support (#4793) * Fix Dynamic Axes feature and add unit test (#4795) * Add frozen weights test (#4807) * Move new pytorch front-end to 'experimental' namespace (#4814) * Fix build Co-authored-by: Rayan-Krishnan <rayankrishnan@live.com> Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net>
2020-08-17 16:45:25 +00:00
dep_packages.append('cerberus')
2020-04-19 03:48:30 +00:00
run_subprocess([sys.executable, '-m', 'pip', 'install', '--trusted-host',
'files.pythonhosted.org'] + dep_packages)
2018-11-20 00:48:22 +00:00
2020-07-10 21:02:28 +00:00
# We need to install Torch to test certain functionalities of the ORT Python package
def install_torch():
# Command works for both Windows
run_subprocess([sys.executable, '-m', 'pip', 'install', '--trusted-host',
'files.pythonhosted.org', 'torch===1.5.1+cu101', 'torchvision===0.6.1+cu101',
'-f', 'https://download.pytorch.org/whl/torch_stable.html'])
2018-12-11 03:15:03 +00:00
def check_md5(filename, expected_md5):
if not os.path.exists(filename):
return False
hash_md5 = hashlib.md5()
BLOCKSIZE = 1024*64
with open(filename, "rb") as f:
buf = f.read(BLOCKSIZE)
while len(buf) > 0:
hash_md5.update(buf)
buf = f.read(BLOCKSIZE)
hex = hash_md5.hexdigest()
if hex != expected_md5:
log.info('md5 mismatch, expect %s, got %s' % (expected_md5, hex))
os.remove(filename)
return False
return True
Upgrade TensorRT to version 7.0.0.11 (#2973) * update onnx-tensorrt submodule to trt7 branch * add fp16 option for TRT7 * switch to master branch of onnx tensorrt * update submodule * update to TensorRT7.0.0.11 * update to onnx-tensorrt for TensorRT7.0 * switch to private branch due to issues in master branch * remove trt_onnxify * disable warnings c4804 for TensorRT parser * disable warnings c4702 for TensorRT parser * add back sanity check of shape tensort input in the parser * disable some warnings for TensorRT7 * change fp16 threshold for TensorRT * update onn-tensorrt parser * fix cycle issue in faster-rcnn and add cycle detection in GetCapability * Update TensorRT container to v20.01 * Update TensorRT image name * Update linux-multi-gpu-tensorrt-ci-pipeline.yml * Update linux-gpu-tensorrt-ci-pipeline.yml * disable rnn tests for TensorRT * disable rnn tests for TensorRT * disabled some unit test for TensorRT * update onnx-tensorrt submodule * update build scripts for TensorRT * formating the code * Update TensorRT-ExecutionProvider.md * Update BUILD.md * Update tensorrt_execution_provider.h * Update tensorrt_execution_provider.cc * Update win-gpu-tensorrt-ci-pipeline.yml * use GetEnvironmentVar function to get env virables and switch to Win-GPU-2019 agent pool for win CI build * change tensorrt path * change tensorrt path * fix win ci build issue * update code based on the reviews * fix build issue * roll back to cuda10.0 * add RemoveCycleTest for TensorRT * fix windows ci build issues * fix ci build issues * fix file permission * fix out of range issue for max_workspace_size_env
2020-02-12 15:03:58 +00:00
def setup_test_data(build_dir, configs):
2020-04-19 03:48:30 +00:00
# create a shortcut for test models if there is a 'models'
# folder in build_dir
if is_windows():
src_model_dir = os.path.join(build_dir, 'models')
2020-04-19 03:48:30 +00:00
if os.path.exists('C:\\local\\models') and not os.path.exists(
src_model_dir):
log.debug("creating shortcut %s -> %s" % (
'C:\\local\\models', src_model_dir))
run_subprocess(['mklink', '/D', '/J', src_model_dir,
'C:\\local\\models'], shell=True)
for config in configs:
config_build_dir = get_config_build_dir(build_dir, config)
os.makedirs(config_build_dir, exist_ok=True)
dest_model_dir = os.path.join(config_build_dir, 'models')
2020-04-19 03:48:30 +00:00
if os.path.exists('C:\\local\\models') and not os.path.exists(
dest_model_dir):
log.debug("creating shortcut %s -> %s" % (
'C:\\local\\models', dest_model_dir))
run_subprocess(['mklink', '/D', '/J', dest_model_dir,
'C:\\local\\models'], shell=True)
elif os.path.exists(src_model_dir) and not os.path.exists(
dest_model_dir):
log.debug("creating shortcut %s -> %s" % (
src_model_dir, dest_model_dir))
run_subprocess(['mklink', '/D', '/J', dest_model_dir,
src_model_dir], shell=True)
def use_dev_mode(args):
if args.use_acl:
return 'OFF'
if args.use_armnn:
return 'OFF'
if args.ios and is_macOS():
return 'OFF'
return 'ON'
def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home, rocm_home,
mpi_home, nccl_home, tensorrt_home, migraphx_home, acl_home, acl_libs, armnn_home, armnn_libs,
path_to_protoc_exe, configs, cmake_extra_defines, args, cmake_extra_args):
2018-11-20 00:48:22 +00:00
log.info("Generating CMake build tree")
cmake_dir = os.path.join(source_dir, "cmake")
2020-04-19 03:48:30 +00:00
# TODO: fix jemalloc build so it does not conflict with onnxruntime
# shared lib builds. (e.g. onnxuntime_pybind)
2018-11-20 00:48:22 +00:00
# for now, disable jemalloc if pybind is also enabled.
2020-04-19 03:48:30 +00:00
cmake_args = [
cmake_path, cmake_dir,
"-Donnxruntime_RUN_ONNX_TESTS=" + (
"ON" if args.enable_onnx_tests else "OFF"),
"-Donnxruntime_BUILD_WINML_TESTS=" + (
"OFF" if args.skip_winml_tests else "ON"),
"-Donnxruntime_GENERATE_TEST_REPORTS=ON",
"-Donnxruntime_DEV_MODE=" + use_dev_mode(args),
2020-04-19 03:48:30 +00:00
"-DPYTHON_EXECUTABLE=" + sys.executable,
"-Donnxruntime_USE_CUDA=" + ("ON" if args.use_cuda else "OFF"),
"-Donnxruntime_CUDNN_HOME=" + (cudnn_home if args.use_cuda else ""),
"-Donnxruntime_USE_FEATURIZERS=" + (
"ON" if args.use_featurizers else "OFF"),
"-Donnxruntime_CUDA_HOME=" + (cuda_home if args.use_cuda else ""),
"-Donnxruntime_USE_JEMALLOC=" + ("ON" if args.use_jemalloc else "OFF"),
"-Donnxruntime_USE_MIMALLOC_STL_ALLOCATOR=" + (
"ON" if args.use_mimalloc == "stl" or
args.use_mimalloc == "all" else "OFF"),
"-Donnxruntime_USE_MIMALLOC_ARENA_ALLOCATOR=" + (
"ON" if args.use_mimalloc == "arena" or
args.use_mimalloc == "all" else "OFF"),
"-Donnxruntime_ENABLE_PYTHON=" + (
"ON" if args.enable_pybind else "OFF"),
"-Donnxruntime_BUILD_CSHARP=" + ("ON" if args.build_csharp else "OFF"),
"-Donnxruntime_BUILD_JAVA=" + ("ON" if args.build_java else "OFF"),
"-Donnxruntime_BUILD_NODEJS=" + ("ON" if args.build_nodejs else "OFF"),
2020-04-19 03:48:30 +00:00
"-Donnxruntime_BUILD_SHARED_LIB=" + (
"ON" if args.build_shared_lib else "OFF"),
"-Donnxruntime_USE_EIGEN_FOR_BLAS=" + (
"OFF" if args.use_openblas else "ON"),
"-Donnxruntime_USE_OPENBLAS=" + ("ON" if args.use_openblas else "OFF"),
"-Donnxruntime_USE_DNNL=" + ("ON" if args.use_dnnl else "OFF"),
2020-11-13 04:17:54 +00:00
"-Donnxruntime_DNNL_GPU_RUNTIME=" + (args.dnnl_gpu_runtime if args.use_dnnl else ""),
"-Donnxruntime_DNNL_OPENCL_ROOT=" + (args.dnnl_opencl_root if args.use_dnnl else ""),
"-Donnxruntime_USE_MKLML=" + ("ON" if args.use_mklml else "OFF"),
The initial PR for NNAPI EP (#4287) * Move nnapi dnnlib to subfolder * dnnlib compile settings * add nnapi buildin build.py * add onnxruntime_USE_NNAPI_BUILTIN * compile using onnxruntime_USE_NNAPI_BUILTIN * remove dnnlib from built in code * Group onnxruntime_USE_NNAPI_BUILTIN sources * add file stubs * java 32bit compile error * built in nnapi support 5-26 * init working version * initializer support * fix crash on free execution * add dynamic input support * bug fixes for dynamic input shape, add mul support, working on conv and batchnorm * Add batchnormalization, add overflow check for int64 attributes * add global average/max pool and reshape * minor changes * minor changes * add skip relu and options to use different type of memory * small bug fix for in operator relu * bug fix for nnapi * add transpose support, minor bug fix * Add transpose support * minor bug fixes, depthwise conv weight fix * fixed the bug where the onnx model input has mismatch order than the nnapi model input * add helper to add scalar operand * add separated opbuilder to handle single operator * add cast operator * fixed reshape, moved some logs to verbose * Add softmax and identity support, change shaper calling signature, and add support for int32 output * changed the way to execute the NNAPI * move NNMemory and InputOutputInfo into Model class * add limited support for input dynamic shape * add gemm support, fixed crash when allocating big array on stack * add abs/exp/floor/log/sigmoid/neg/sin/sqrt/tanh support * better dynamic input shape support; * add more check for IsOpSupportedImpl, refactored some code * some code style fix, switch to safeint * Move opbuilders to a map with single instance, minor bug fixes * add GetUniqueName for new temp tensors * change from throw std to ort_throw * build settings change and 3rd party notice update * add readme for nnapi_lib, move to ort log, add comments to public functions, clean the code * add android log sink and more logging changes, add new string for NnApiErrorDescription * add nnapi execution options/fp16 relax * fix a dnnlibrary build break * addressed review comments * address review comments, changed adding output for subgraph in NnapiExecutionProvider::GetCapability, minor issue fixes * formatting in build.py * more formatting fix in build.py, return fail status instead of throw in compute_func * moved android_log_sink to platform folder, minor coding style changes * addressed review comments
2020-06-26 07:02:39 +00:00
"-Donnxruntime_USE_NNAPI_BUILTIN=" + ("ON" if args.use_nnapi else "OFF"),
"-Donnxruntime_USE_RKNPU=" + ("ON" if args.use_rknpu else "OFF"),
2020-04-19 03:48:30 +00:00
"-Donnxruntime_USE_OPENMP=" + (
"ON" if args.use_openmp and not (
args.use_nnapi or (args.use_mklml and (is_macOS() or is_windows())) or
The initial PR for NNAPI EP (#4287) * Move nnapi dnnlib to subfolder * dnnlib compile settings * add nnapi buildin build.py * add onnxruntime_USE_NNAPI_BUILTIN * compile using onnxruntime_USE_NNAPI_BUILTIN * remove dnnlib from built in code * Group onnxruntime_USE_NNAPI_BUILTIN sources * add file stubs * java 32bit compile error * built in nnapi support 5-26 * init working version * initializer support * fix crash on free execution * add dynamic input support * bug fixes for dynamic input shape, add mul support, working on conv and batchnorm * Add batchnormalization, add overflow check for int64 attributes * add global average/max pool and reshape * minor changes * minor changes * add skip relu and options to use different type of memory * small bug fix for in operator relu * bug fix for nnapi * add transpose support, minor bug fix * Add transpose support * minor bug fixes, depthwise conv weight fix * fixed the bug where the onnx model input has mismatch order than the nnapi model input * add helper to add scalar operand * add separated opbuilder to handle single operator * add cast operator * fixed reshape, moved some logs to verbose * Add softmax and identity support, change shaper calling signature, and add support for int32 output * changed the way to execute the NNAPI * move NNMemory and InputOutputInfo into Model class * add limited support for input dynamic shape * add gemm support, fixed crash when allocating big array on stack * add abs/exp/floor/log/sigmoid/neg/sin/sqrt/tanh support * better dynamic input shape support; * add more check for IsOpSupportedImpl, refactored some code * some code style fix, switch to safeint * Move opbuilders to a map with single instance, minor bug fixes * add GetUniqueName for new temp tensors * change from throw std to ort_throw * build settings change and 3rd party notice update * add readme for nnapi_lib, move to ort log, add comments to public functions, clean the code * add android log sink and more logging changes, add new string for NnApiErrorDescription * add nnapi execution options/fp16 relax * fix a dnnlibrary build break * addressed review comments * address review comments, changed adding output for subgraph in NnapiExecutionProvider::GetCapability, minor issue fixes * formatting in build.py * more formatting fix in build.py, return fail status instead of throw in compute_func * moved android_log_sink to platform folder, minor coding style changes * addressed review comments
2020-06-26 07:02:39 +00:00
args.android or (args.ios and is_macOS())
or args.use_rknpu)
else "OFF"),
"-Donnxruntime_USE_TVM=" + ("ON" if args.use_nuphar else "OFF"),
"-Donnxruntime_USE_LLVM=" + ("ON" if args.use_nuphar else "OFF"),
2020-04-19 03:48:30 +00:00
"-Donnxruntime_ENABLE_MICROSOFT_INTERNAL=" + (
"ON" if args.enable_msinternal else "OFF"),
"-Donnxruntime_USE_VITISAI=" + ("ON" if args.use_vitisai else "OFF"),
2020-04-19 03:48:30 +00:00
"-Donnxruntime_USE_NUPHAR=" + ("ON" if args.use_nuphar else "OFF"),
"-Donnxruntime_USE_TENSORRT=" + ("ON" if args.use_tensorrt else "OFF"),
"-Donnxruntime_TENSORRT_HOME=" + (
tensorrt_home if args.use_tensorrt else ""),
# set vars for migraphx
"-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"),
"-Donnxruntime_MIGRAPHX_HOME=" + (migraphx_home if args.use_migraphx else ""),
2020-04-19 03:48:30 +00:00
# By default - we currently support only cross compiling for
# ARM/ARM64 (no native compilation supported through this
# script).
"-Donnxruntime_CROSS_COMPILING=" + (
"ON" if args.arm64 or args.arm else "OFF"),
"-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"),
"-Donnxruntime_DISABLE_ML_OPS=" + ("ON" if args.disable_ml_ops else "OFF"),
"-Donnxruntime_DISABLE_RTTI=" + ("ON" if args.disable_rtti else "OFF"),
"-Donnxruntime_DISABLE_EXCEPTIONS=" + ("ON" if args.disable_exceptions else "OFF"),
"-Donnxruntime_DISABLE_ORT_FORMAT_LOAD=" + ("ON" if args.disable_ort_format_load else "OFF"),
"-Donnxruntime_MINIMAL_BUILD=" + ("ON" if args.minimal_build != 'off' else "OFF"),
"-Donnxruntime_EXTENDED_MINIMAL_BUILD=" + ("ON" if args.minimal_build == 'extended' else "OFF"),
"-Donnxruntime_REDUCED_OPS_BUILD=" + (
"ON" if args.include_ops_by_config or args.include_ops_by_model else "OFF"),
2020-04-19 03:48:30 +00:00
"-Donnxruntime_MSVC_STATIC_RUNTIME=" + (
"ON" if args.enable_msvc_static_runtime else "OFF"),
# enable pyop if it is nightly build
"-Donnxruntime_ENABLE_LANGUAGE_INTEROP_OPS=" + (
2020-07-31 02:51:52 +00:00
"ON" if args.enable_language_interop_ops else "OFF"),
2020-04-19 03:48:30 +00:00
"-Donnxruntime_USE_DML=" + ("ON" if args.use_dml else "OFF"),
"-Donnxruntime_USE_WINML=" + ("ON" if args.use_winml else "OFF"),
"-Donnxruntime_USE_TELEMETRY=" + (
"ON" if args.use_telemetry else "OFF"),
"-Donnxruntime_ENABLE_LTO=" + ("ON" if args.enable_lto else "OFF"),
"-Donnxruntime_USE_ACL=" + ("ON" if args.use_acl else "OFF"),
"-Donnxruntime_USE_ACL_1902=" + (
"ON" if args.use_acl == "ACL_1902" else "OFF"),
"-Donnxruntime_USE_ACL_1905=" + (
"ON" if args.use_acl == "ACL_1905" else "OFF"),
"-Donnxruntime_USE_ACL_1908=" + (
"ON" if args.use_acl == "ACL_1908" else "OFF"),
"-Donnxruntime_USE_ACL_2002=" + (
"ON" if args.use_acl == "ACL_2002" else "OFF"),
"-Donnxruntime_USE_ARMNN=" + (
"ON" if args.use_armnn else "OFF"),
"-Donnxruntime_ARMNN_RELU_USE_CPU=" + (
"OFF" if args.armnn_relu else "ON"),
"-Donnxruntime_ARMNN_BN_USE_CPU=" + (
"OFF" if args.armnn_bn else "ON"),
# Training related flags
"-Donnxruntime_ENABLE_NVTX_PROFILE=" + (
"ON" if args.enable_nvtx_profile else "OFF"),
"-Donnxruntime_ENABLE_TRAINING=" + (
"ON" if args.enable_training else "OFF"),
"-Donnxruntime_USE_HOROVOD=" + (
"ON" if args.use_horovod else "OFF"),
"-Donnxruntime_USE_NCCL=" + (
"OFF" if args.disable_nccl else "ON"),
"-Donnxruntime_BUILD_BENCHMARKS=" + (
"ON" if args.build_micro_benchmarks else "OFF"),
"-Donnxruntime_USE_ROCM=" + ("ON" if args.use_rocm else "OFF"),
"-Donnxruntime_ROCM_HOME=" + (rocm_home if args.use_rocm else ""),
Android code coverage (#6061) * Added Onnxruntime_GCOV_COVERAGE flag for Android. * Set CMAKE_SYSTEM_NAME explicityly for Android. * Added GCOV_PREFIX option to collect code coverage data. Added a new python script to generate code coverage info. Modified build pipeline to geneate Android code coverage info * Added build command line option --android_coverage * Added a comment describing the GCOV environment variables * Fixed PEP8 issues. * Added --android_coverage option to the build command. * Increased Android emulator memory from 3K to 8K. * Increased Android partition-size from 2GB to 4GB to overcome no-space-left-on-device error * Removed source_dir from command line args. * Use cwd absolute path to run tests. * Added commands to output the contents of /data/local/tmp on the emulator. * Added run_adb_shell function. * Format changes. * Removed keywd argument cwd. * Removed Android in the --build_dir path. * Removed commands added for debugging. * Removed exxtra new-lines. * Fix MacOs build pipeline failures by uninstalling openssl before running build script. * Revert "Fix MacOs build pipeline failures by uninstalling openssl before running build script." This reverts commit 90d0568fe533e9456c20d061a2d435c8fea48266. * Change dir to the build directory where the tar file is copied. * Changed the option from --android_coverage to --code_coverage * Moved steps to generate Android code coverage to run_nnap_code_coverage.sh * Require --android option if --code_coverage is specified. * No code coverage needed for onnx_test_runner. * Expect that the emulator is running when the script is executed. * Fixed the title in the buildpipeline step. * Fixed the formatting issue. * Added a command line argument, ORT_ROOT, to run_nnapi_code_coverage.sh script Co-authored-by: Satya Jandhyala <satyajandhyala@Satyas-Mac-mini.local>
2020-12-08 18:55:02 +00:00
"-DOnnxruntime_GCOV_COVERAGE=" + ("ON" if args.code_coverage else "OFF"),
2020-04-19 03:48:30 +00:00
]
if acl_home and os.path.exists(acl_home):
cmake_args += ["-Donnxruntime_ACL_HOME=" + acl_home]
if acl_libs and os.path.exists(acl_libs):
cmake_args += ["-Donnxruntime_ACL_LIBS=" + acl_libs]
if armnn_home and os.path.exists(armnn_home):
cmake_args += ["-Donnxruntime_ARMNN_HOME=" + armnn_home]
if armnn_libs and os.path.exists(armnn_libs):
cmake_args += ["-Donnxruntime_ARMNN_LIBS=" + armnn_libs]
if mpi_home and os.path.exists(mpi_home):
cmake_args += ["-Donnxruntime_MPI_HOME=" + mpi_home]
if nccl_home and os.path.exists(nccl_home):
cmake_args += ["-Donnxruntime_NCCL_HOME=" + nccl_home]
if args.winml_root_namespace_override:
cmake_args += ["-Donnxruntime_WINML_NAMESPACE_OVERRIDE=" +
args.winml_root_namespace_override]
if args.use_openvino:
cmake_args += ["-Donnxruntime_USE_OPENVINO=ON",
"-Donnxruntime_USE_OPENVINO_MYRIAD=" + (
"ON" if args.use_openvino == "MYRIAD_FP16" else "OFF"),
"-Donnxruntime_USE_OPENVINO_GPU_FP32=" + (
"ON" if args.use_openvino == "GPU_FP32" else "OFF"),
"-Donnxruntime_USE_OPENVINO_GPU_FP16=" + (
"ON" if args.use_openvino == "GPU_FP16" else "OFF"),
"-Donnxruntime_USE_OPENVINO_CPU_FP32=" + (
"ON" if args.use_openvino == "CPU_FP32" else "OFF"),
"-Donnxruntime_USE_OPENVINO_VAD_M=" + (
"ON" if args.use_openvino == "VAD-M_FP16" else "OFF"),
"-Donnxruntime_USE_OPENVINO_VAD_F=" + (
"ON" if args.use_openvino == "VAD-F_FP32" else "OFF"),
"-Donnxruntime_USE_OPENVINO_HETERO=" + (
"ON" if args.use_openvino.startswith("HETERO") else "OFF"),
"-Donnxruntime_USE_OPENVINO_DEVICE=" + (args.use_openvino),
"-Donnxruntime_USE_OPENVINO_MULTI=" + (
"ON" if args.use_openvino.startswith("MULTI") else "OFF"),
"-Donnxruntime_USE_OPENVINO_BINARY=" + (
"ON" if args.use_openvino else "OFF")]
2020-03-11 21:25:37 +00:00
# temp turn on only for linux gpu build
if not is_windows():
if args.use_cuda:
cmake_args += [
"-Donnxruntime_USE_FULL_PROTOBUF=ON"]
# TensorRT and OpenVINO providers currently only supports
2020-04-19 03:48:30 +00:00
# full_protobuf option.
if (args.use_full_protobuf or args.use_tensorrt or
args.use_openvino or args.use_vitisai or args.gen_doc):
2020-04-19 03:48:30 +00:00
cmake_args += [
"-Donnxruntime_USE_FULL_PROTOBUF=ON",
"-DProtobuf_USE_STATIC_LIBS=ON"
]
if args.use_nuphar and args.llvm_path is not None:
2018-11-20 00:48:22 +00:00
cmake_args += ["-DLLVM_DIR=%s" % args.llvm_path]
if args.use_cuda and not is_windows():
nvml_stub_path = cuda_home + "/lib64/stubs"
cmake_args += ["-DCUDA_CUDA_LIBRARY=" + nvml_stub_path]
if args.use_preinstalled_eigen:
cmake_args += ["-Donnxruntime_USE_PREINSTALLED_EIGEN=ON",
"-Deigen_SOURCE_PATH=" + args.eigen_path]
if args.nnapi_min_api:
cmake_args += ["-Donnxruntime_NNAPI_MIN_API=" + str(args.nnapi_min_api)]
if args.android:
2020-04-19 03:48:30 +00:00
cmake_args += [
"-DCMAKE_TOOLCHAIN_FILE=" + args.android_ndk_path + "/build/cmake/android.toolchain.cmake",
2020-04-19 03:48:30 +00:00
"-DANDROID_PLATFORM=android-" + str(args.android_api),
"-DANDROID_ABI=" + str(args.android_abi)
]
if args.android_cpp_shared:
cmake_args += ["-DANDROID_STL=c++_shared"]
if is_macOS() and not args.android:
cmake_args += ["-DCMAKE_OSX_ARCHITECTURES=" + args.osx_arch]
# since cmake 3.19, it uses the xcode latest buildsystem, which is not supported by this project.
cmake_verstr = subprocess.check_output(['cmake', '--version']).decode('utf-8').split()[2]
2020-12-02 04:57:44 +00:00
if args.use_xcode and LooseVersion(cmake_verstr) >= LooseVersion('3.19.0'):
cmake_args += ["-T", "buildsystem=1"]
if args.ios:
if is_macOS():
needed_args = [
args.use_xcode,
args.ios_sysroot,
args.apple_deploy_target,
]
arg_names = [
"--use_xcode " +
"<need use xcode to cross build iOS on MacOS>",
"--ios_sysroot " +
"<the location or name of the macOS platform SDK>",
"--apple_deploy_target " +
"<the minimum version of the target platform>",
]
if not all(needed_args):
raise BuildError(
"iOS build on MacOS canceled due to missing arguments: " +
', '.join(
val for val, cond in zip(arg_names, needed_args)
if not cond))
cmake_args += [
"-DCMAKE_SYSTEM_NAME=iOS",
"-Donnxruntime_BUILD_SHARED_LIB=ON",
"-DCMAKE_OSX_SYSROOT=" + args.ios_sysroot,
"-DCMAKE_OSX_DEPLOYMENT_TARGET=" + args.apple_deploy_target,
# we do not need protoc binary for ios cross build
"-Dprotobuf_BUILD_PROTOC_BINARIES=OFF",
"-DCMAKE_TOOLCHAIN_FILE=" + (
args.ios_toolchain_file if args.ios_toolchain_file
else "../cmake/onnxruntime_ios.toolchain.cmake")
]
# Code sign the binaries, if the code signing development team id is provided
if args.xcode_code_signing_team_id:
cmake_args += ["-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=" + args.xcode_code_signing_team_id]
else:
# TODO: the cross compiling on Linux is not officially supported by Apple
# and is already broken with the latest codebase, so it should be removed.
# We are cross compiling on Linux
needed_args = [
args.ios_sysroot,
args.arm64 or args.arm,
args.ios_toolchain_dir
]
arg_names = [
"--ios_sysroot <path to sysroot>",
"--arm or --arm64",
"--ios_toolchain_dir <path to toolchain>"
]
if not all(needed_args):
raise BuildError(
"iOS build canceled due to missing arguments: " +
', '.join(
val for val, cond in zip(arg_names, needed_args)
if not cond))
compilers = sorted(
glob.glob(args.ios_toolchain_dir + "/bin/*-clang*"))
os.environ["PATH"] = os.path.join(
args.ios_toolchain_dir, "bin") + os.pathsep + os.environ.get(
"PATH", "")
os.environ["LD_LIBRARY_PATH"] = os.path.join(
args.ios_toolchain_dir, "/lib") + os.pathsep + os.environ.get(
"LD_LIBRARY_PATH", "")
if len(compilers) != 2:
raise BuildError(
"error identifying compilers in ios_toolchain_dir")
cmake_args += [
"-DCMAKE_OSX_ARCHITECTURES=" +
("arm64" if args.arm64 else "arm"),
"-DCMAKE_SYSTEM_NAME=iOSCross",
"-Donnxruntime_BUILD_UNIT_TESTS=OFF",
"-DCMAKE_OSX_SYSROOT=" + args.ios_sysroot,
"-DCMAKE_C_COMPILER=" + compilers[0],
"-DCMAKE_CXX_COMPILER=" + compilers[1]
]
if path_to_protoc_exe:
2020-04-19 03:48:30 +00:00
cmake_args += [
"-DONNX_CUSTOM_PROTOC_EXECUTABLE=%s" % path_to_protoc_exe]
if args.fuzz_testing:
if not (args.build_shared_lib and
is_windows() and
args.cmake_generator == 'Visual Studio 16 2019' and
args.use_full_protobuf):
raise BuildError(
"Fuzz test has only be tested with build shared libs option using MSVC on windows")
cmake_args += [
"-Donnxruntime_BUILD_UNIT_TESTS=ON",
"-Donnxruntime_FUZZ_TEST=ON",
"-Donnxruntime_USE_FULL_PROTOBUF=ON"]
if args.gen_doc:
cmake_args += ["-Donnxruntime_PYBIND_EXPORT_OPSCHEMA=ON"]
else:
cmake_args += ["-Donnxruntime_PYBIND_EXPORT_OPSCHEMA=OFF"]
2018-11-20 00:48:22 +00:00
cmake_args += ["-D{}".format(define) for define in cmake_extra_defines]
cmake_args += cmake_extra_args
2018-11-20 00:48:22 +00:00
2020-04-19 03:48:30 +00:00
# ADO pipelines will store the pipeline build number
# (e.g. 191101-2300.1.master) and source version in environment
# variables. If present, use these values to define the
# WinML/ORT DLL versions.
build_number = os.getenv('Build_BuildNumber')
source_version = os.getenv('Build_SourceVersion')
if build_number and source_version:
2020-04-19 03:48:30 +00:00
build_matches = re.fullmatch(
r"(\d\d)(\d\d)(\d\d)(\d\d)\.(\d+)", build_number)
if build_matches:
YY = build_matches.group(2)
MM = build_matches.group(3)
DD = build_matches.group(4)
# Get ORT major and minor number
with open(os.path.join(source_dir, 'VERSION_NUMBER')) as f:
first_line = f.readline()
ort_version_matches = re.match(r"(\d+).(\d+)", first_line)
if not ort_version_matches:
raise BuildError("Couldn't read version from VERSION_FILE")
ort_major = ort_version_matches.group(1)
ort_minor = ort_version_matches.group(2)
2020-04-19 03:48:30 +00:00
# Example (BuildNumber: 191101-2300.1.master,
# SourceVersion: 0bce7ae6755c792eda558e5d27ded701707dc404)
# MajorPart = 1
# MinorPart = 0
# BuildPart = 1911
# PrivatePart = 123
# String = 191101-2300.1.master.0bce7ae
2020-04-19 03:48:30 +00:00
cmake_args += [
"-DVERSION_MAJOR_PART={}".format(ort_major),
"-DVERSION_MINOR_PART={}".format(ort_minor),
"-DVERSION_BUILD_PART={}".format(YY),
"-DVERSION_PRIVATE_PART={}{}".format(MM, DD),
"-DVERSION_STRING={}.{}.{}.{}".format(
ort_major, ort_minor, build_number,
source_version[0:7])
]
for config in configs:
2018-11-20 00:48:22 +00:00
config_build_dir = get_config_build_dir(build_dir, config)
os.makedirs(config_build_dir, exist_ok=True)
if args.use_nuphar:
2020-04-19 03:48:30 +00:00
os.environ["PATH"] = os.path.join(
config_build_dir, "external", "tvm",
config) + os.pathsep + os.path.dirname(sys.executable) + os.pathsep + os.environ["PATH"]
2018-11-20 00:48:22 +00:00
2020-04-19 03:48:30 +00:00
run_subprocess(
cmake_args + [
"-Donnxruntime_ENABLE_MEMLEAK_CHECKER=" +
("ON" if config.lower() == 'debug' and not args.use_nuphar and not
args.use_openvino and not
args.enable_msvc_static_runtime
else "OFF"), "-DCMAKE_BUILD_TYPE={}".format(config)],
2020-04-19 03:48:30 +00:00
cwd=config_build_dir)
2018-11-20 00:48:22 +00:00
def clean_targets(cmake_path, build_dir, configs):
for config in configs:
log.info("Cleaning targets for %s configuration", config)
build_dir2 = get_config_build_dir(build_dir, config)
cmd_args = [cmake_path,
"--build", build_dir2,
"--config", config,
"--target", "clean"]
run_subprocess(cmd_args)
2020-04-19 03:48:30 +00:00
def build_targets(args, cmake_path, build_dir, configs, num_parallel_jobs, target=None):
2018-11-20 00:48:22 +00:00
for config in configs:
log.info("Building targets for %s configuration", config)
build_dir2 = get_config_build_dir(build_dir, config)
cmd_args = [cmake_path,
"--build", build_dir2,
"--config", config]
if target:
cmd_args.extend(['--target', target])
2018-11-20 00:48:22 +00:00
build_tool_args = []
if num_parallel_jobs != 1:
if is_windows() and args.cmake_generator != 'Ninja':
2020-03-11 21:25:37 +00:00
build_tool_args += [
"/maxcpucount:{}".format(num_parallel_jobs),
2020-03-11 21:25:37 +00:00
# if nodeReuse is true, msbuild processes will stay around for a bit after the build completes
"/nodeReuse:False",
]
elif (is_macOS() and args.use_xcode):
# CMake will generate correct build tool args for Xcode
cmd_args += ["--parallel", str(num_parallel_jobs)]
else:
build_tool_args += ["-j{}".format(num_parallel_jobs)]
2018-11-20 00:48:22 +00:00
2020-04-19 03:48:30 +00:00
if build_tool_args:
cmd_args += ["--"]
2018-11-20 00:48:22 +00:00
cmd_args += build_tool_args
env = {}
if args.android:
2020-04-19 03:48:30 +00:00
env['ANDROID_SDK_ROOT'] = args.android_sdk_path
run_subprocess(cmd_args, env=env)
2018-11-20 00:48:22 +00:00
2020-04-19 03:48:30 +00:00
def add_dir_if_exists(directory, dir_list):
if os.path.isdir(directory):
dir_list.append(directory)
2018-11-20 00:48:22 +00:00
2020-04-19 03:48:30 +00:00
def setup_cuda_vars(args):
2018-11-20 00:48:22 +00:00
cuda_home = ""
cudnn_home = ""
2020-04-19 03:48:30 +00:00
if args.use_cuda:
cuda_home = args.cuda_home if args.cuda_home else os.getenv(
"CUDA_HOME")
cudnn_home = args.cudnn_home if args.cudnn_home else os.getenv(
"CUDNN_HOME")
2018-11-20 00:48:22 +00:00
2020-04-19 03:48:30 +00:00
cuda_home_valid = (cuda_home is not None and os.path.exists(cuda_home))
cudnn_home_valid = (cudnn_home is not None and os.path.exists(
cudnn_home))
2018-11-20 00:48:22 +00:00
2020-04-19 03:48:30 +00:00
if not cuda_home_valid or not cudnn_home_valid:
raise BuildError(
"cuda_home and cudnn_home paths must be specified and valid.",
"cuda_home='{}' valid={}. cudnn_home='{}' valid={}"
.format(
cuda_home, cuda_home_valid, cudnn_home, cudnn_home_valid))
2018-11-20 00:48:22 +00:00
return cuda_home, cudnn_home
Trt execution provider (#382) * updated cmake files for trt * added trt execution provider * added trt basic test * removed trt_path action attribute * Add files via upload * Update build.py * Update trt_allocator.h * fixed issues found by reviewers * changed cast operator * added comment for custom kernel implementation * changed auto to auto& * changed to function compile APIs for TRT execution provider * changed to function compile APIs for TRT execution provider * added new DType DInt64 * adapted to the changes of onnxruntime_c_api * removed trt kernel (use function compile instead) * updated onnx-tensorrt submodule * set default memory type to TRT fused kernel * resolve merge conflict * fixed the issue that USE_CUDA conflicts with USE_TRT * construct graph by adding nodes in topological order * made changes for Windows * change buffers type * bypass HasImplementationOf check for TRT XP because TRT kernel is not registered * added domain to version info in rebuilt model proto * added trt to test option list * added DomainToVersionMap() to GraphViewer * removed Copy() * fixed broken code * format the code to clang format * used local reference to the frequently used values * fixed a couple of issues according to reviewers feedback * fixed a couple of issues according to reviewers feedback * added python binding for TRT and enable use_cuda when use_trt is on * fixed a redefinition issue * changed shared_ptr to unique_ptr on trt engines, and made a few changes required by reviewers * enabled trtexecution provider for unit tests * renamed trt to tensorrt * added tesorrt to python binding * update submodule onnx and onnx-tensorrt * made a couple of minor changes based on reviewer's feedback * added CUDA_CHECK * removed test code * fixed broken code after merge * updated onnx-tensorrt submodule * added post processing to align trt inputs/outputs with graph inputs/outputs * updated onnx submodule * added CUDA fallback for TensorRT and fixed TensorRT cmake issue * added ci pipeline for tensorrt and removed some redundent code from trt xp * fixed syntax issue * updated onnx-tensorrt submodule * fix trt build problem by: (#602) 1. Add additional /wd for debug build 2. Add io.h for additional targets 3. Bring back mb version of getopt * Update install_ubuntu.sh * Update linux-gpu-tensorrt-ci-pipeline.yml * Update linux-gpu-tensorrt-ci-pipeline.yml * Update run_build.sh * Update run_build.sh * Update run_build.sh * Update run_build.sh * fixed the issue that GetKernelRegistry returns nullptr * merged master to this branch * moved some data types to private * fixed tensorrt CI pipeline issue * customized test data for TensorRT pipeline * added onnx-tensorrt in json file and fixed an issue in ci script * added comments
2019-03-14 19:00:39 +00:00
2020-04-19 03:48:30 +00:00
def setup_tensorrt_vars(args):
Trt execution provider (#382) * updated cmake files for trt * added trt execution provider * added trt basic test * removed trt_path action attribute * Add files via upload * Update build.py * Update trt_allocator.h * fixed issues found by reviewers * changed cast operator * added comment for custom kernel implementation * changed auto to auto& * changed to function compile APIs for TRT execution provider * changed to function compile APIs for TRT execution provider * added new DType DInt64 * adapted to the changes of onnxruntime_c_api * removed trt kernel (use function compile instead) * updated onnx-tensorrt submodule * set default memory type to TRT fused kernel * resolve merge conflict * fixed the issue that USE_CUDA conflicts with USE_TRT * construct graph by adding nodes in topological order * made changes for Windows * change buffers type * bypass HasImplementationOf check for TRT XP because TRT kernel is not registered * added domain to version info in rebuilt model proto * added trt to test option list * added DomainToVersionMap() to GraphViewer * removed Copy() * fixed broken code * format the code to clang format * used local reference to the frequently used values * fixed a couple of issues according to reviewers feedback * fixed a couple of issues according to reviewers feedback * added python binding for TRT and enable use_cuda when use_trt is on * fixed a redefinition issue * changed shared_ptr to unique_ptr on trt engines, and made a few changes required by reviewers * enabled trtexecution provider for unit tests * renamed trt to tensorrt * added tesorrt to python binding * update submodule onnx and onnx-tensorrt * made a couple of minor changes based on reviewer's feedback * added CUDA_CHECK * removed test code * fixed broken code after merge * updated onnx-tensorrt submodule * added post processing to align trt inputs/outputs with graph inputs/outputs * updated onnx submodule * added CUDA fallback for TensorRT and fixed TensorRT cmake issue * added ci pipeline for tensorrt and removed some redundent code from trt xp * fixed syntax issue * updated onnx-tensorrt submodule * fix trt build problem by: (#602) 1. Add additional /wd for debug build 2. Add io.h for additional targets 3. Bring back mb version of getopt * Update install_ubuntu.sh * Update linux-gpu-tensorrt-ci-pipeline.yml * Update linux-gpu-tensorrt-ci-pipeline.yml * Update run_build.sh * Update run_build.sh * Update run_build.sh * Update run_build.sh * fixed the issue that GetKernelRegistry returns nullptr * merged master to this branch * moved some data types to private * fixed tensorrt CI pipeline issue * customized test data for TensorRT pipeline * added onnx-tensorrt in json file and fixed an issue in ci script * added comments
2019-03-14 19:00:39 +00:00
tensorrt_home = ""
2020-04-19 03:48:30 +00:00
if args.use_tensorrt:
tensorrt_home = (args.tensorrt_home if args.tensorrt_home
else os.getenv("TENSORRT_HOME"))
tensorrt_home_valid = (tensorrt_home is not None and
os.path.exists(tensorrt_home))
if not tensorrt_home_valid:
raise BuildError(
"tensorrt_home paths must be specified and valid.",
"tensorrt_home='{}' valid={}."
.format(tensorrt_home, tensorrt_home_valid))
# Set maximum workspace size in byte for
# TensorRT (1GB = 1073741824 bytes).
Initial commit for OpenVINO Execution Provider (#935) * Initial commit for OpenVINO Execution Provider OpenVINO Execution Provider provides the interface for ONNX Runtime applications to access Intel's hardware accelerators using Intel's OpenVINO Toolkit. * Fixed bug in GetCapability to disable custom ops Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Added OPENVINO ci pipeline Added new pipeline for openvino provider, made changes to support the docker build and onnxruntime build with openvino. Signed-off-by: Luis Daniel Castellanos <luis.daniel.castellanos@intel.com> * Enabled all unit tests for OpenVINO EP Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Fixed syntax issue in run_docker_build.sh file * Added missing default OPENVINO_VERSION Default value for OPENVINO_VERSION env was missing causing the build to fail * Added install Model Optimizer deps step * Fixed python unit tests and some tests from onnx_backend_test_series Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Fixed indentation bug Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled some of the python backend tests for OpenVINO Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled some model tests Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Remove Duplicate checks for openvino in build.py Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Modified GetCapability for FP16 Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled GPU FP32 tests that are not supported Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Convert modelProto to string and use it in compile Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Pass byte-array input args to MO * Serialized ModelProto passed in-memory to MO ModelOptimizer python module receives the serialized ModelProto in-memory. Uses appropriate ONNX function to load the serialized bytes. * Make Py_Finalize compatible with older python versions Also, remove pFunc unassigned variable possibility. * Fallback if input dims of Matmul is greater than 2 Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * fixup: Device #define syntax * Updated the documentation Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Enable dynamic dim value * removed commented out code * Added Dockerfile for openvino EP Updated instructions on dockerfiles/README.md file Signed-off-by: Luis Daniel Castellanos <luis.daniel.castellanos@intel.com> * Disabled fp16_inception_v1 test Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Code formatting with clang-format Uses style from the .clang-format file in root directory. * fixup: docker tag and build error fixes * Heuristics to automatically detect batching Distributes slices from batch into parallel infer-request objects. * Handle disabled tests in GetCapability Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled average pool and max pool if ceil_mode is 1 Also dilations are not supported if they are greater than 1 Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled Unsqueeze int32 test Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * changes to fix output results bug * Disabled a few C++ unit tests for MYRIAD FP16 Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Manually revert '9fe162bb Enable dynamic dim value' Reverts compile time setting of dynamic shape Reverting manually due to significantly huge auto-revert conflicts. * Fixed unused variable warning Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled Mul test for GPU_FP16 due to accuracy issue Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * VPU documentation update * Disabled inception_v1 for MYRIAD and HDDL *Also disabled few C++ accuracy tests for HDDL Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * updates from upstream * use the new CustomOpApis for I/O interfacing * Pass initializers as subgraph meta-def inputs in GetCapability() Requirement due to API changes introduced with PR# 1019. * Remove obsolete functions * Save indexes of graph inputs from fused_node info Both inputs and initializers are passed as data inputs to the infer function. To identify only inputs among them, save thier index info from fused_node in Compile function. * Documentation changes to enable VPU * Fix VPU related changes in documentation * Fix minor changes in documentation * Fix VPU related changes in documentation * Use Node.In/OutputDefs() to track graph inputs and outputs. Don't use graph_viewer's GetInputs() or GetInputsIncludingInitializers(). * Permit "SAME_UPPER" auto_pad attribute from MaxPool * Disabled fp16_tiny_yolov2 in onnx model tests * Updated documentation to include configuration guides for myriad and hddl Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Use 8 Infer requests only for VAD-R * disable debug prints * Clang-format source files * Updated BUILD.md with OpenVINO R5 links Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled same upper python tests Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Update test exclusion syntax * Change path of install_onnx.sh Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disable tiny_yolov2 in broken tests Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Revert "Change path of install_onnx.sh" This reverts commit ba9db165f3be430f2aff1ef413299ed04637196a. This change is only required for Intel internal CI pipeline until the settings are matched with the upstream's CI pipeline. * Added debug statements for debugging CI error Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Add --build_wheel to linux openvino pipeline Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Added -v option to onnx_test_runner for debugging Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Removed path change patch Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Added -c 1 to onnx_test_runner Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Refactor MO python invocation in separate function Cleans up Model Optimizer python invocation check and conversion logic. Invokes MO only once in GetCapability() and passes the IR strings (xml and bin) to the Compiler as meta-def attributes. * Add comments * code cleanup and comments * Code cleanup for GetCapability Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Removed unnecessary files Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Revert "Added -v option to onnx_test_runner for debugging" This reverts commit d1dd70938a94d648df1a1dbbc2e48d0b97e49ec8. * Revert "Added debug statements for debugging CI error" This reverts commit b86d41afed2aa29c3508155d6f9c8d3a7263cc60. * incorporate Status Code changes * ComputeFunc returns Status::OK() on success * Use test names to disable tests for MYRIAD and VAD-R Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Rename local identifiers from CNNNetwork to OpenVINO network CNNNetwork is an OpenVINO's API class that represents more than just convolutional neural networks (CNNs). Renaming helps to avoid confusion that the API's only support CNN type models. * Added error message if building on windows * Removed duplicate option in Cmake * Removed unnecessary parameters in activation_opt_test Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Refactor Map search and access logic for efficiently and cleanliness. * use C++ style casts * Use os.path.join for python directory path operations * use C++ style casts * EP classes should use onnxruntime namespace * Clean up fixes from PR comments * Don't explicitly shutdown Py interpreter * Remove debug print statements Prints will be re-enabled later with a logging mechanism with debug/verbose printing options. * Decrement ref counts for used pyObjects * Restore build instructions for other compilers Content under the "Using other compilers" section has been accidentally deleted by a previous commit. Restoring back that content from the latest upstream repo. * CMake code cleanup Code clean up, commenting and formatting of CMake code. * Don't pass the unused device_info parameter to OpenVINOGraph ctor. * Add support for multiple I/O data types Adds support for the following tensor data types for graph inputs and outputs: 1) float 2) float16 3) int32 4) int16 5) int8 6) uint16 7) uint8 * cleanup setup.py module list definition * Deduce index of input using tracked input index map Ignores initializers in case they are ordered before inputs. * Removed debug statement in MO code Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * PR feedback * Removed per_sample_tolerance for openvino * Removed unnecessary disabled tests Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Removed debug function Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled tiny_yolo_v2 due to accuracy issues Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Changed the disabled reason for broken tests Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled Reshape with no input Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Python formatting with Autopep8 * Minor fix for MYRIAD devices * Added zero dimension check *Removed setting batch size for the network Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Set the threshold to larger value for MNIST Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Removed setting higher threshold in provider_test_utils Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Check for --use_openvino in python wheel setup.py Add openvino modules to the setup script for building the wheel package only for --use_openvino a build option. * Removed nullptr checks for GetNode() Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com>
2019-06-18 15:58:53 +00:00
os.environ["ORT_TENSORRT_MAX_WORKSPACE_SIZE"] = "1073741824"
2020-04-19 03:48:30 +00:00
# Set maximum number of iterations to detect unsupported nodes
# and partition the models for TensorRT.
Add dynamic shape support in TensorRT execution provider (#2450) * remove onnx-tensorrt submodule * add new onnx-tensorrt submodule (experiment) for trt6 * update engine build for trt6 * update compile and compute for tensorrt6.0 * Update tensorrt_execution_provider.cc * Update tensorrt_execution_provider.cc * Update tensorrt_execution_provider.cc * Update tensorrt_execution_provider.cc * switch to onnx-tensorrt master for TensorRT6' * Update tensorrt_execution_provider.cc * Handle dynamic batch size and add memcpy in TensorRT EP * update test cases * Update tensorrt_execution_provider.cc * update onnx-tensorrt submodule * Update Dockerfile.ubuntu_tensorrt * Update Dockerfile.ubuntu_tensorrt * Update run_dockerbuild.sh * Update run_dockerbuild.sh * Update install_ubuntu.sh * Update concat_op_test.cc * Update tensorrt_execution_provider.cc * Upgrade TensorRT to version 6.0.1.5 * Update onnxruntime_providers.cmake * Update CMakeLists.txt * Update reduction_ops_test.cc * Update install_ubuntu.sh * Update Dockerfile.ubuntu_tensorrt * Update Dockerfile.tensorrt * Update BUILD.md * Update run_dockerbuild.sh * Update install_ubuntu.sh * Update onnxruntime_providers.cmake * Update install_ubuntu.sh * Update install_ubuntu.sh * Update gemm_test.cc * Update gather_op_test.cc * Update CMakeLists.txt * Removed submodule * update onnx-tensorrt submodule * update header file * Removed submodule * add submodule onnx-tensorrt kevin's branch shape-test' * add debugging code * Update tensorrt_execution_provider.cc * Update tensorrt_execution_provider.cc * merge master * Removed submodule * update onnx-tensorrt submodule * add more changes for dynamic shapes * Update tensorrt_execution_provider.cc * update for dynamic shape * update dynamic shape processing * fix logger issue * remove submodule onnx-tensorrt * add submodule onnx-tensorrt * add env variable min_subgraph_size * remove redundency * update document * use onnxruntime::make_unique * fix multi-run issue * remove some tests to save CI build time * Add dynamic shape test * Update TensorRT-ExecutionProvider.md * Add example of running Faster R-CNN model on TensorRT EP * Add more details on env variables * update environment variables * Update tensorrt_basic_test.cc * Update model tests * Update tensor_op_test.cc * remove --use_full_protobuf * Update build.py
2019-12-04 07:18:33 +00:00
os.environ["ORT_TENSORRT_MAX_PARTITION_ITERATIONS"] = "1000"
Upgrade TensorRT to version 7.0.0.11 (#2973) * update onnx-tensorrt submodule to trt7 branch * add fp16 option for TRT7 * switch to master branch of onnx tensorrt * update submodule * update to TensorRT7.0.0.11 * update to onnx-tensorrt for TensorRT7.0 * switch to private branch due to issues in master branch * remove trt_onnxify * disable warnings c4804 for TensorRT parser * disable warnings c4702 for TensorRT parser * add back sanity check of shape tensort input in the parser * disable some warnings for TensorRT7 * change fp16 threshold for TensorRT * update onn-tensorrt parser * fix cycle issue in faster-rcnn and add cycle detection in GetCapability * Update TensorRT container to v20.01 * Update TensorRT image name * Update linux-multi-gpu-tensorrt-ci-pipeline.yml * Update linux-gpu-tensorrt-ci-pipeline.yml * disable rnn tests for TensorRT * disable rnn tests for TensorRT * disabled some unit test for TensorRT * update onnx-tensorrt submodule * update build scripts for TensorRT * formating the code * Update TensorRT-ExecutionProvider.md * Update BUILD.md * Update tensorrt_execution_provider.h * Update tensorrt_execution_provider.cc * Update win-gpu-tensorrt-ci-pipeline.yml * use GetEnvironmentVar function to get env virables and switch to Win-GPU-2019 agent pool for win CI build * change tensorrt path * change tensorrt path * fix win ci build issue * update code based on the reviews * fix build issue * roll back to cuda10.0 * add RemoveCycleTest for TensorRT * fix windows ci build issues * fix ci build issues * fix file permission * fix out of range issue for max_workspace_size_env
2020-02-12 15:03:58 +00:00
2020-04-19 03:48:30 +00:00
# Set minimum subgraph node size in graph partitioning
# for TensorRT.
Add dynamic shape support in TensorRT execution provider (#2450) * remove onnx-tensorrt submodule * add new onnx-tensorrt submodule (experiment) for trt6 * update engine build for trt6 * update compile and compute for tensorrt6.0 * Update tensorrt_execution_provider.cc * Update tensorrt_execution_provider.cc * Update tensorrt_execution_provider.cc * Update tensorrt_execution_provider.cc * switch to onnx-tensorrt master for TensorRT6' * Update tensorrt_execution_provider.cc * Handle dynamic batch size and add memcpy in TensorRT EP * update test cases * Update tensorrt_execution_provider.cc * update onnx-tensorrt submodule * Update Dockerfile.ubuntu_tensorrt * Update Dockerfile.ubuntu_tensorrt * Update run_dockerbuild.sh * Update run_dockerbuild.sh * Update install_ubuntu.sh * Update concat_op_test.cc * Update tensorrt_execution_provider.cc * Upgrade TensorRT to version 6.0.1.5 * Update onnxruntime_providers.cmake * Update CMakeLists.txt * Update reduction_ops_test.cc * Update install_ubuntu.sh * Update Dockerfile.ubuntu_tensorrt * Update Dockerfile.tensorrt * Update BUILD.md * Update run_dockerbuild.sh * Update install_ubuntu.sh * Update onnxruntime_providers.cmake * Update install_ubuntu.sh * Update install_ubuntu.sh * Update gemm_test.cc * Update gather_op_test.cc * Update CMakeLists.txt * Removed submodule * update onnx-tensorrt submodule * update header file * Removed submodule * add submodule onnx-tensorrt kevin's branch shape-test' * add debugging code * Update tensorrt_execution_provider.cc * Update tensorrt_execution_provider.cc * merge master * Removed submodule * update onnx-tensorrt submodule * add more changes for dynamic shapes * Update tensorrt_execution_provider.cc * update for dynamic shape * update dynamic shape processing * fix logger issue * remove submodule onnx-tensorrt * add submodule onnx-tensorrt * add env variable min_subgraph_size * remove redundency * update document * use onnxruntime::make_unique * fix multi-run issue * remove some tests to save CI build time * Add dynamic shape test * Update TensorRT-ExecutionProvider.md * Add example of running Faster R-CNN model on TensorRT EP * Add more details on env variables * update environment variables * Update tensorrt_basic_test.cc * Update model tests * Update tensor_op_test.cc * remove --use_full_protobuf * Update build.py
2019-12-04 07:18:33 +00:00
os.environ["ORT_TENSORRT_MIN_SUBGRAPH_SIZE"] = "1"
Upgrade TensorRT to version 7.0.0.11 (#2973) * update onnx-tensorrt submodule to trt7 branch * add fp16 option for TRT7 * switch to master branch of onnx tensorrt * update submodule * update to TensorRT7.0.0.11 * update to onnx-tensorrt for TensorRT7.0 * switch to private branch due to issues in master branch * remove trt_onnxify * disable warnings c4804 for TensorRT parser * disable warnings c4702 for TensorRT parser * add back sanity check of shape tensort input in the parser * disable some warnings for TensorRT7 * change fp16 threshold for TensorRT * update onn-tensorrt parser * fix cycle issue in faster-rcnn and add cycle detection in GetCapability * Update TensorRT container to v20.01 * Update TensorRT image name * Update linux-multi-gpu-tensorrt-ci-pipeline.yml * Update linux-gpu-tensorrt-ci-pipeline.yml * disable rnn tests for TensorRT * disable rnn tests for TensorRT * disabled some unit test for TensorRT * update onnx-tensorrt submodule * update build scripts for TensorRT * formating the code * Update TensorRT-ExecutionProvider.md * Update BUILD.md * Update tensorrt_execution_provider.h * Update tensorrt_execution_provider.cc * Update win-gpu-tensorrt-ci-pipeline.yml * use GetEnvironmentVar function to get env virables and switch to Win-GPU-2019 agent pool for win CI build * change tensorrt path * change tensorrt path * fix win ci build issue * update code based on the reviews * fix build issue * roll back to cuda10.0 * add RemoveCycleTest for TensorRT * fix windows ci build issues * fix ci build issues * fix file permission * fix out of range issue for max_workspace_size_env
2020-02-12 15:03:58 +00:00
# Set FP16 flag
os.environ["ORT_TENSORRT_FP16_ENABLE"] = "0"
Trt execution provider (#382) * updated cmake files for trt * added trt execution provider * added trt basic test * removed trt_path action attribute * Add files via upload * Update build.py * Update trt_allocator.h * fixed issues found by reviewers * changed cast operator * added comment for custom kernel implementation * changed auto to auto& * changed to function compile APIs for TRT execution provider * changed to function compile APIs for TRT execution provider * added new DType DInt64 * adapted to the changes of onnxruntime_c_api * removed trt kernel (use function compile instead) * updated onnx-tensorrt submodule * set default memory type to TRT fused kernel * resolve merge conflict * fixed the issue that USE_CUDA conflicts with USE_TRT * construct graph by adding nodes in topological order * made changes for Windows * change buffers type * bypass HasImplementationOf check for TRT XP because TRT kernel is not registered * added domain to version info in rebuilt model proto * added trt to test option list * added DomainToVersionMap() to GraphViewer * removed Copy() * fixed broken code * format the code to clang format * used local reference to the frequently used values * fixed a couple of issues according to reviewers feedback * fixed a couple of issues according to reviewers feedback * added python binding for TRT and enable use_cuda when use_trt is on * fixed a redefinition issue * changed shared_ptr to unique_ptr on trt engines, and made a few changes required by reviewers * enabled trtexecution provider for unit tests * renamed trt to tensorrt * added tesorrt to python binding * update submodule onnx and onnx-tensorrt * made a couple of minor changes based on reviewer's feedback * added CUDA_CHECK * removed test code * fixed broken code after merge * updated onnx-tensorrt submodule * added post processing to align trt inputs/outputs with graph inputs/outputs * updated onnx submodule * added CUDA fallback for TensorRT and fixed TensorRT cmake issue * added ci pipeline for tensorrt and removed some redundent code from trt xp * fixed syntax issue * updated onnx-tensorrt submodule * fix trt build problem by: (#602) 1. Add additional /wd for debug build 2. Add io.h for additional targets 3. Bring back mb version of getopt * Update install_ubuntu.sh * Update linux-gpu-tensorrt-ci-pipeline.yml * Update linux-gpu-tensorrt-ci-pipeline.yml * Update run_build.sh * Update run_build.sh * Update run_build.sh * Update run_build.sh * fixed the issue that GetKernelRegistry returns nullptr * merged master to this branch * moved some data types to private * fixed tensorrt CI pipeline issue * customized test data for TensorRT pipeline * added onnx-tensorrt in json file and fixed an issue in ci script * added comments
2019-03-14 19:00:39 +00:00
return tensorrt_home
2020-04-19 03:48:30 +00:00
def setup_migraphx_vars(args):
migraphx_home = None
if (args.use_migraphx):
print("migraphx_home = {}".format(args.migraphx_home))
migraphx_home = args.migraphx_home or os.getenv("MIGRAPHX_HOME") or None
migraphx_home_not_valid = (migraphx_home and not os.path.exists(migraphx_home))
if (migraphx_home_not_valid):
raise BuildError("migraphx_home paths must be specified and valid.",
"migraphx_home='{}' valid={}."
.format(migraphx_home, migraphx_home_not_valid))
return migraphx_home or ''
def setup_dml_build(args, cmake_path, build_dir, configs):
2020-04-19 03:48:30 +00:00
if args.use_dml:
for config in configs:
2020-04-19 03:48:30 +00:00
# Run the RESTORE_PACKAGES target to perform the initial
# NuGet setup.
cmd_args = [cmake_path,
"--build", get_config_build_dir(build_dir, config),
"--config", config,
"--target", "RESTORE_PACKAGES"]
run_subprocess(cmd_args)
def setup_rocm_build(args, configs):
rocm_home = None
if (args.use_rocm):
print("rocm_home = {}".format(args.rocm_home))
rocm_home = args.rocm_home or None
rocm_home_not_valid = (rocm_home and not os.path.exists(rocm_home))
if (rocm_home_not_valid):
raise BuildError("rocm_home paths must be specified and valid.",
"rocm_home='{}' valid={}."
.format(rocm_home, rocm_home_not_valid))
for config in configs:
amd_hipify(get_config_build_dir(args.build_dir, config))
return rocm_home or ''
def adb_push(src, dest, **kwargs):
return run_subprocess(['adb', 'push', src, dest], **kwargs)
2020-04-19 03:48:30 +00:00
def adb_shell(*args, **kwargs):
return run_subprocess(['adb', 'shell', *args], **kwargs)
def run_android_tests(args, source_dir, config, cwd):
Android code coverage (#6061) * Added Onnxruntime_GCOV_COVERAGE flag for Android. * Set CMAKE_SYSTEM_NAME explicityly for Android. * Added GCOV_PREFIX option to collect code coverage data. Added a new python script to generate code coverage info. Modified build pipeline to geneate Android code coverage info * Added build command line option --android_coverage * Added a comment describing the GCOV environment variables * Fixed PEP8 issues. * Added --android_coverage option to the build command. * Increased Android emulator memory from 3K to 8K. * Increased Android partition-size from 2GB to 4GB to overcome no-space-left-on-device error * Removed source_dir from command line args. * Use cwd absolute path to run tests. * Added commands to output the contents of /data/local/tmp on the emulator. * Added run_adb_shell function. * Format changes. * Removed keywd argument cwd. * Removed Android in the --build_dir path. * Removed commands added for debugging. * Removed exxtra new-lines. * Fix MacOs build pipeline failures by uninstalling openssl before running build script. * Revert "Fix MacOs build pipeline failures by uninstalling openssl before running build script." This reverts commit 90d0568fe533e9456c20d061a2d435c8fea48266. * Change dir to the build directory where the tar file is copied. * Changed the option from --android_coverage to --code_coverage * Moved steps to generate Android code coverage to run_nnap_code_coverage.sh * Require --android option if --code_coverage is specified. * No code coverage needed for onnx_test_runner. * Expect that the emulator is running when the script is executed. * Fixed the title in the buildpipeline step. * Fixed the formatting issue. * Added a command line argument, ORT_ROOT, to run_nnapi_code_coverage.sh script Co-authored-by: Satya Jandhyala <satyajandhyala@Satyas-Mac-mini.local>
2020-12-08 18:55:02 +00:00
def run_adb_shell(cmd):
# GCOV_PREFIX_STRIP specifies the depth of the directory hierarchy to stip and
# GCOV_PREFIX specifies the root directory
# for creating the runtime code coverage files.'
nonlocal cwd
if args.code_coverage:
adb_shell(
'cd /data/local/tmp && GCOV_PREFIX=/data/local/tmp \
GCOV_PREFIX_STRIP={} {}'.format(cwd.count(os.sep) + 1, cmd))
else:
adb_shell('cd /data/local/tmp && ' + cmd)
if args.android_abi == 'x86_64':
run_subprocess([os.path.join(
source_dir, 'tools', 'ci_build', 'github', 'android',
'start_android_emulator.sh')])
adb_push('testdata', '/data/local/tmp/', cwd=cwd)
adb_push(
os.path.join(source_dir, 'cmake', 'external', 'onnx', 'onnx', 'backend', 'test'),
'/data/local/tmp/', cwd=cwd)
adb_push('onnxruntime_test_all', '/data/local/tmp/', cwd=cwd)
adb_push('onnx_test_runner', '/data/local/tmp/', cwd=cwd)
Android code coverage (#6061) * Added Onnxruntime_GCOV_COVERAGE flag for Android. * Set CMAKE_SYSTEM_NAME explicityly for Android. * Added GCOV_PREFIX option to collect code coverage data. Added a new python script to generate code coverage info. Modified build pipeline to geneate Android code coverage info * Added build command line option --android_coverage * Added a comment describing the GCOV environment variables * Fixed PEP8 issues. * Added --android_coverage option to the build command. * Increased Android emulator memory from 3K to 8K. * Increased Android partition-size from 2GB to 4GB to overcome no-space-left-on-device error * Removed source_dir from command line args. * Use cwd absolute path to run tests. * Added commands to output the contents of /data/local/tmp on the emulator. * Added run_adb_shell function. * Format changes. * Removed keywd argument cwd. * Removed Android in the --build_dir path. * Removed commands added for debugging. * Removed exxtra new-lines. * Fix MacOs build pipeline failures by uninstalling openssl before running build script. * Revert "Fix MacOs build pipeline failures by uninstalling openssl before running build script." This reverts commit 90d0568fe533e9456c20d061a2d435c8fea48266. * Change dir to the build directory where the tar file is copied. * Changed the option from --android_coverage to --code_coverage * Moved steps to generate Android code coverage to run_nnap_code_coverage.sh * Require --android option if --code_coverage is specified. * No code coverage needed for onnx_test_runner. * Expect that the emulator is running when the script is executed. * Fixed the title in the buildpipeline step. * Fixed the formatting issue. * Added a command line argument, ORT_ROOT, to run_nnapi_code_coverage.sh script Co-authored-by: Satya Jandhyala <satyajandhyala@Satyas-Mac-mini.local>
2020-12-08 18:55:02 +00:00
run_adb_shell('/data/local/tmp/onnxruntime_test_all')
if args.use_nnapi:
adb_shell('cd /data/local/tmp && /data/local/tmp/onnx_test_runner -e nnapi /data/local/tmp/test')
else:
adb_shell('cd /data/local/tmp && /data/local/tmp/onnx_test_runner /data/local/tmp/test')
# run shared_lib_test if necessary
if args.build_shared_lib:
adb_push('libonnxruntime.so', '/data/local/tmp/', cwd=cwd)
adb_push('onnxruntime_shared_lib_test', '/data/local/tmp/', cwd=cwd)
Android code coverage (#6061) * Added Onnxruntime_GCOV_COVERAGE flag for Android. * Set CMAKE_SYSTEM_NAME explicityly for Android. * Added GCOV_PREFIX option to collect code coverage data. Added a new python script to generate code coverage info. Modified build pipeline to geneate Android code coverage info * Added build command line option --android_coverage * Added a comment describing the GCOV environment variables * Fixed PEP8 issues. * Added --android_coverage option to the build command. * Increased Android emulator memory from 3K to 8K. * Increased Android partition-size from 2GB to 4GB to overcome no-space-left-on-device error * Removed source_dir from command line args. * Use cwd absolute path to run tests. * Added commands to output the contents of /data/local/tmp on the emulator. * Added run_adb_shell function. * Format changes. * Removed keywd argument cwd. * Removed Android in the --build_dir path. * Removed commands added for debugging. * Removed exxtra new-lines. * Fix MacOs build pipeline failures by uninstalling openssl before running build script. * Revert "Fix MacOs build pipeline failures by uninstalling openssl before running build script." This reverts commit 90d0568fe533e9456c20d061a2d435c8fea48266. * Change dir to the build directory where the tar file is copied. * Changed the option from --android_coverage to --code_coverage * Moved steps to generate Android code coverage to run_nnap_code_coverage.sh * Require --android option if --code_coverage is specified. * No code coverage needed for onnx_test_runner. * Expect that the emulator is running when the script is executed. * Fixed the title in the buildpipeline step. * Fixed the formatting issue. * Added a command line argument, ORT_ROOT, to run_nnapi_code_coverage.sh script Co-authored-by: Satya Jandhyala <satyajandhyala@Satyas-Mac-mini.local>
2020-12-08 18:55:02 +00:00
run_adb_shell(
'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp && ' +
'/data/local/tmp/onnxruntime_shared_lib_test')
elif args.android_abi == 'arm64-v8a':
# For Android arm64 abi we are only verify the size of the binary generated by minimal build config
# Will fail the build if the shared_lib size is larger than the threshold
if args.minimal_build and config == 'MinSizeRel' and args.build_shared_lib and args.test_binary_size:
# set current size limit to 1165KB which is 110K large than 1.5.2 release.
bin_size_threshold = 1165000
bin_actual_size = os.path.getsize(os.path.join(cwd, 'libonnxruntime.so'))
log.info('Android arm64 minsizerel libonnxruntime.so size [' + str(bin_actual_size) + 'B]')
# Write the binary size to a file for uploading later
with open(os.path.join(cwd, 'binary_size_data.txt'), 'w') as file:
file.writelines([
'os,arch,build_config,size\n',
'android,arm64-v8a,minimal-baseline,' + str(bin_actual_size) + '\n'
])
if bin_actual_size > bin_size_threshold:
raise BuildError('Android arm64 minsizerel libonnxruntime.so size [' + str(bin_actual_size) +
'B] is bigger than threshold [' + str(bin_size_threshold) + 'B]')
def run_ios_tests(args, source_dir, config, cwd):
cpr = run_subprocess(["xcodebuild", "test", "-project", "./onnxruntime.xcodeproj",
"-configuration", config,
"-scheme", "onnxruntime_test_all_xc", "-destination",
"platform=iOS Simulator,OS=latest,name=iPhone SE (2nd generation)"], cwd=cwd)
if cpr.returncode == 0:
cpr = run_subprocess(["xcodebuild", "test", "-project", "./onnxruntime.xcodeproj",
"-configuration", config,
"-scheme", "onnxruntime_shared_lib_test_xc", "-destination",
"platform=iOS Simulator,OS=latest,name=iPhone SE (2nd generation)"], cwd=cwd)
cpr.check_returncode()
def run_orttraining_test_orttrainer_frontend_separately(cwd):
class TestNameCollecterPlugin:
def __init__(self):
self.collected = set()
def pytest_collection_modifyitems(self, items):
for item in items:
print('item.name: ', item.name)
test_name = item.name
start = test_name.find('[')
if start > 0:
test_name = test_name[:start]
self.collected.add(test_name)
import pytest
plugin = TestNameCollecterPlugin()
test_script_filename = os.path.join(cwd, "orttraining_test_orttrainer_frontend.py")
pytest.main(['--collect-only', test_script_filename], plugins=[plugin])
for test_name in plugin.collected:
run_subprocess([
sys.executable, '-m', 'pytest',
'orttraining_test_orttrainer_frontend.py', '-v', '-k', test_name], cwd=cwd)
def run_training_python_frontend_tests(cwd):
run_subprocess([sys.executable, 'onnxruntime_test_ort_trainer.py'], cwd=cwd)
run_subprocess([sys.executable, 'onnxruntime_test_training_unit_tests.py'], cwd=cwd)
run_subprocess([
sys.executable, 'orttraining_test_transformers.py',
'BertModelTest.test_for_pretraining_full_precision_list_input'], cwd=cwd)
run_subprocess([
sys.executable, 'orttraining_test_transformers.py',
'BertModelTest.test_for_pretraining_full_precision_dict_input'], cwd=cwd)
run_subprocess([
sys.executable, 'orttraining_test_transformers.py',
'BertModelTest.test_for_pretraining_full_precision_list_and_dict_input'], cwd=cwd)
# TODO: use run_orttraining_test_orttrainer_frontend_separately to work around a sporadic segfault.
# shall revert to run_subprocess call once the segfault issue is resolved.
run_orttraining_test_orttrainer_frontend_separately(cwd)
# run_subprocess([sys.executable, '-m', 'pytest', '-sv', 'orttraining_test_orttrainer_frontend.py'], cwd=cwd)
run_subprocess([sys.executable, '-m', 'pytest', '-sv', 'orttraining_test_orttrainer_bert_toy_onnx.py'], cwd=cwd)
run_subprocess([sys.executable, '-m', 'pytest', '-sv', 'orttraining_test_checkpoint_storage.py'], cwd=cwd)
run_subprocess([
sys.executable, '-m', 'pytest', '-sv', 'orttraining_test_orttrainer_checkpoint_functions.py'], cwd=cwd)
def run_training_python_frontend_e2e_tests(cwd):
# frontend tests are to be added here:
log.info("Running python frontend e2e tests.")
run_subprocess(
[sys.executable, 'orttraining_run_frontend_batch_size_test.py', '-v'],
cwd=cwd, env={'CUDA_VISIBLE_DEVICES': '0'})
import torch
ngpus = torch.cuda.device_count()
if ngpus > 1:
bert_pretrain_script = 'orttraining_run_bert_pretrain.py'
# TODO: this test will be replaced with convergence test ported from backend
log.debug('RUN: mpirun -n {} ''-x' 'NCCL_DEBUG=INFO'' {} {} {}'.format(
ngpus, sys.executable, bert_pretrain_script, 'ORTBertPretrainTest.test_pretrain_convergence'))
run_subprocess([
'mpirun', '-n', str(ngpus), '-x', 'NCCL_DEBUG=INFO', sys.executable,
bert_pretrain_script, 'ORTBertPretrainTest.test_pretrain_convergence'], cwd=cwd)
log.debug('RUN: mpirun -n {} {} orttraining_run_glue.py'.format(ngpus, sys.executable))
run_subprocess([
'mpirun', '-n', str(ngpus), '-x', 'NCCL_DEBUG=INFO', sys.executable, 'orttraining_run_glue.py'], cwd=cwd)
# with orttraining_run_glue.py.
# 1. we like to force to use single GPU (with CUDA_VISIBLE_DEVICES)
# for fine-tune tests.
# 2. need to run test separately (not to mix between fp16
# and full precision runs. this need to be investigated).
run_subprocess(
[sys.executable, 'orttraining_run_glue.py', 'ORTGlueTest.test_bert_with_mrpc', '-v'],
cwd=cwd, env={'CUDA_VISIBLE_DEVICES': '0'})
run_subprocess(
[sys.executable, 'orttraining_run_glue.py', 'ORTGlueTest.test_bert_fp16_with_mrpc', '-v'],
cwd=cwd, env={'CUDA_VISIBLE_DEVICES': '0'})
run_subprocess(
[sys.executable, 'orttraining_run_glue.py', 'ORTGlueTest.test_roberta_with_mrpc', '-v'],
cwd=cwd, env={'CUDA_VISIBLE_DEVICES': '0'})
run_subprocess(
[sys.executable, 'orttraining_run_glue.py', 'ORTGlueTest.test_roberta_fp16_with_mrpc', '-v'],
cwd=cwd, env={'CUDA_VISIBLE_DEVICES': '0'})
run_subprocess(
[sys.executable, 'orttraining_run_multiple_choice.py', 'ORTMultipleChoiceTest.test_bert_fp16_with_swag', '-v'],
cwd=cwd, env={'CUDA_VISIBLE_DEVICES': '0'})
run_subprocess([sys.executable, 'onnxruntime_test_ort_trainer_with_mixed_precision.py'], cwd=cwd)
Liqun/e2e transformer test (#3540) * initial change to transformer.py * prepare e2e transformer tests * refactor transformer tests * put test python files in a flat folder * fix typo pip install transform(s) * python 3.6 * python version to 3.6 in install_ubuntu.sh * remove argparser * to use opset ver 12 * workaround loss_scale naming patch in case of loss_fn_ * assign self.loss_fn_ so it can be checked * skip a few un-needed post-process steps * fix loss_scale_input_name, clean up post process steps * skip non-frontend tests * move cpu/cuda related files to coresponding cpu/cuda folder (#3668) Co-authored-by: Weixing Zhang <wezhan@microsoft.com> * type cast for ratio is not necessary for dropout (#3682) Co-authored-by: Weixing Zhang <wezhan@microsoft.com> * thrustallocator is not needed since cub is used directly for gather now. (#3683) Co-authored-by: Weixing Zhang <wezhan@microsoft.com> * GatherND-12 Implementation (#3645) * Renamed, UT passing * Move GatherND CUDA Kerenl into onnxruntime * Merge GatherNDOpTest * Refactor Test code * Merge CPU Kernel Impl * Handle Negative Indice, Fix UT * Improve CUDA kernel to handle negative index * Minor Fixes * Preserve GatherND-1 Cuda kernel * Fix Mac build * fix UT * Fix Build * fix GatherNDOpTest.double > CUDA error cudaErrorInvalidDeviceFunction:invalid device function Co-authored-by: Sherlock Huang <bahuang@OrtTrainingDev3.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Peng Wang (pengwa) <pengwa@microsoft.com> * update with reviewers' comments * testBertTrainingGradientAccumulation was not using rtol and may fail occasionally with small (e-06) difference * fix merge mistakes Co-authored-by: liqun <liqun@OrtTrainingDev4.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Weixing Zhang <weixingzhang@users.noreply.github.com> Co-authored-by: Weixing Zhang <wezhan@microsoft.com> Co-authored-by: Sherlock <baihan.huang@gmail.com> Co-authored-by: Sherlock Huang <bahuang@OrtTrainingDev3.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Peng Wang (pengwa) <pengwa@microsoft.com>
2020-04-30 19:26:38 +00:00
run_subprocess([
sys.executable, 'orttraining_test_transformers.py',
'BertModelTest.test_for_pretraining_mixed_precision'], cwd=cwd)
# this test is not stable. it occasionally causes segfault due to its session creation/release pattern.
# need to skip to unblock release
# run_subprocess([
# sys.executable, 'orttraining_test_transformers.py',
# 'BertModelTest.test_for_pretraining_mixed_precision_with_gradient_accumulation'], cwd=cwd)
Liqun/e2e transformer test (#3540) * initial change to transformer.py * prepare e2e transformer tests * refactor transformer tests * put test python files in a flat folder * fix typo pip install transform(s) * python 3.6 * python version to 3.6 in install_ubuntu.sh * remove argparser * to use opset ver 12 * workaround loss_scale naming patch in case of loss_fn_ * assign self.loss_fn_ so it can be checked * skip a few un-needed post-process steps * fix loss_scale_input_name, clean up post process steps * skip non-frontend tests * move cpu/cuda related files to coresponding cpu/cuda folder (#3668) Co-authored-by: Weixing Zhang <wezhan@microsoft.com> * type cast for ratio is not necessary for dropout (#3682) Co-authored-by: Weixing Zhang <wezhan@microsoft.com> * thrustallocator is not needed since cub is used directly for gather now. (#3683) Co-authored-by: Weixing Zhang <wezhan@microsoft.com> * GatherND-12 Implementation (#3645) * Renamed, UT passing * Move GatherND CUDA Kerenl into onnxruntime * Merge GatherNDOpTest * Refactor Test code * Merge CPU Kernel Impl * Handle Negative Indice, Fix UT * Improve CUDA kernel to handle negative index * Minor Fixes * Preserve GatherND-1 Cuda kernel * Fix Mac build * fix UT * Fix Build * fix GatherNDOpTest.double > CUDA error cudaErrorInvalidDeviceFunction:invalid device function Co-authored-by: Sherlock Huang <bahuang@OrtTrainingDev3.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Peng Wang (pengwa) <pengwa@microsoft.com> * update with reviewers' comments * testBertTrainingGradientAccumulation was not using rtol and may fail occasionally with small (e-06) difference * fix merge mistakes Co-authored-by: liqun <liqun@OrtTrainingDev4.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Weixing Zhang <weixingzhang@users.noreply.github.com> Co-authored-by: Weixing Zhang <wezhan@microsoft.com> Co-authored-by: Sherlock <baihan.huang@gmail.com> Co-authored-by: Sherlock Huang <bahuang@OrtTrainingDev3.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Peng Wang (pengwa) <pengwa@microsoft.com>
2020-04-30 19:26:38 +00:00
def run_training_pipeline_e2e_tests(cwd):
# pipeline tests are to be added here:
log.info("Running pipeline e2e tests.")
import torch
ngpus = torch.cuda.device_count()
command = ['./onnxruntime_training_bert',
'--ort_log_severity', '1',
'--optimizer=Lamb',
'--learning_rate=3e-3',
'--max_seq_length=128',
'--max_predictions_per_seq=20',
'--warmup_ratio=0.2843',
'--warmup_mode=Poly',
'--model_name', '/bert_ort/bert_models/nv/bert-large/' +
'bert-large-uncased_L_24_H_1024_A_16_V_30528_S_512_Dp_0.1_optimized_layer_norm_opset12',
'--train_data_dir', '/bert_data/128/books_wiki_en_corpus/train',
'--test_data_dir', '/bert_data/128/books_wiki_en_corpus/test',
'--display_loss_steps', '1',
'--use_nccl',
'--use_mixed_precision',
'--allreduce_in_fp16',
'--gradient_accumulation_steps', '48',
'--num_train_steps', '96',
'--train_batch_size', '50']
# TODO: currently the CI machine only has 4 GPUs for parallel tests.
# Fill in more pipeline partition options when the machine has different GPUs counts.
if ngpus != 4:
return
# Test 4-way pipeline parallel
pp_command = ['mpirun', '-n', str(ngpus)] + command + ['--pipeline_parallel_size', '4', '--cut_group_info',
'1149:407-1219/1341/1463/1585/1707/1829,' +
'1881:407-1951/2073/2195/2317/2439/2561,' +
'2613:407-2683/2805/2927/3049/3171/3293']
command_str = ', '.join(pp_command)
log.debug('RUN: ' + command_str)
run_subprocess(pp_command, cwd=cwd)
# Test 2-way data parallel + 2-way pipeline parallel
pp_dp_command = ['mpirun', '-n', str(ngpus)]
pp_dp_command = pp_dp_command + command
pp_dp_command = pp_dp_command + ['--data_parallel_size', '2', '--pipeline_parallel_size',
'2', '--cut_group_info',
'1881:407-1951/2073/2195/2317/2439/2561/2683/2805/2927/3049/3171/3293']
command_str = ', '.join(pp_dp_command)
log.debug('RUN: ' + command_str)
run_subprocess(pp_dp_command, cwd=cwd)
def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs):
2018-11-20 00:48:22 +00:00
for config in configs:
log.info("Running tests for %s configuration", config)
cwd = get_config_build_dir(build_dir, config)
Android code coverage (#6061) * Added Onnxruntime_GCOV_COVERAGE flag for Android. * Set CMAKE_SYSTEM_NAME explicityly for Android. * Added GCOV_PREFIX option to collect code coverage data. Added a new python script to generate code coverage info. Modified build pipeline to geneate Android code coverage info * Added build command line option --android_coverage * Added a comment describing the GCOV environment variables * Fixed PEP8 issues. * Added --android_coverage option to the build command. * Increased Android emulator memory from 3K to 8K. * Increased Android partition-size from 2GB to 4GB to overcome no-space-left-on-device error * Removed source_dir from command line args. * Use cwd absolute path to run tests. * Added commands to output the contents of /data/local/tmp on the emulator. * Added run_adb_shell function. * Format changes. * Removed keywd argument cwd. * Removed Android in the --build_dir path. * Removed commands added for debugging. * Removed exxtra new-lines. * Fix MacOs build pipeline failures by uninstalling openssl before running build script. * Revert "Fix MacOs build pipeline failures by uninstalling openssl before running build script." This reverts commit 90d0568fe533e9456c20d061a2d435c8fea48266. * Change dir to the build directory where the tar file is copied. * Changed the option from --android_coverage to --code_coverage * Moved steps to generate Android code coverage to run_nnap_code_coverage.sh * Require --android option if --code_coverage is specified. * No code coverage needed for onnx_test_runner. * Expect that the emulator is running when the script is executed. * Fixed the title in the buildpipeline step. * Fixed the formatting issue. * Added a command line argument, ORT_ROOT, to run_nnapi_code_coverage.sh script Co-authored-by: Satya Jandhyala <satyajandhyala@Satyas-Mac-mini.local>
2020-12-08 18:55:02 +00:00
cwd = os.path.abspath(cwd)
Liqun/e2e transformer test (#3540) * initial change to transformer.py * prepare e2e transformer tests * refactor transformer tests * put test python files in a flat folder * fix typo pip install transform(s) * python 3.6 * python version to 3.6 in install_ubuntu.sh * remove argparser * to use opset ver 12 * workaround loss_scale naming patch in case of loss_fn_ * assign self.loss_fn_ so it can be checked * skip a few un-needed post-process steps * fix loss_scale_input_name, clean up post process steps * skip non-frontend tests * move cpu/cuda related files to coresponding cpu/cuda folder (#3668) Co-authored-by: Weixing Zhang <wezhan@microsoft.com> * type cast for ratio is not necessary for dropout (#3682) Co-authored-by: Weixing Zhang <wezhan@microsoft.com> * thrustallocator is not needed since cub is used directly for gather now. (#3683) Co-authored-by: Weixing Zhang <wezhan@microsoft.com> * GatherND-12 Implementation (#3645) * Renamed, UT passing * Move GatherND CUDA Kerenl into onnxruntime * Merge GatherNDOpTest * Refactor Test code * Merge CPU Kernel Impl * Handle Negative Indice, Fix UT * Improve CUDA kernel to handle negative index * Minor Fixes * Preserve GatherND-1 Cuda kernel * Fix Mac build * fix UT * Fix Build * fix GatherNDOpTest.double > CUDA error cudaErrorInvalidDeviceFunction:invalid device function Co-authored-by: Sherlock Huang <bahuang@OrtTrainingDev3.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Peng Wang (pengwa) <pengwa@microsoft.com> * update with reviewers' comments * testBertTrainingGradientAccumulation was not using rtol and may fail occasionally with small (e-06) difference * fix merge mistakes Co-authored-by: liqun <liqun@OrtTrainingDev4.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Weixing Zhang <weixingzhang@users.noreply.github.com> Co-authored-by: Weixing Zhang <wezhan@microsoft.com> Co-authored-by: Sherlock <baihan.huang@gmail.com> Co-authored-by: Sherlock Huang <bahuang@OrtTrainingDev3.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Peng Wang (pengwa) <pengwa@microsoft.com>
2020-04-30 19:26:38 +00:00
2020-11-23 23:18:37 +00:00
# TODO: temporarily disable this test to restore pipeline health. This test fails due to
# an OOM regression. Invetigation undergoing.
# if args.enable_training and args.use_cuda and args.enable_training_pipeline_e2e_tests:
# # run distributed pipeline test on 4-GPU CI machine.
# run_training_pipeline_e2e_tests(cwd=cwd)
# continue
if args.android:
run_android_tests(args, source_dir, config, cwd)
continue
elif args.ios:
run_ios_tests(args, source_dir, config, cwd)
continue
dll_path_list = []
if args.use_nuphar:
dll_path_list.append(os.path.join(
build_dir, config, "external", "tvm", config))
if args.use_tensorrt:
dll_path_list.append(os.path.join(args.tensorrt_home, 'lib'))
if args.use_mklml:
dll_path_list.append(os.path.join(build_dir, config, "mklml", "src", "project_mklml", "lib"))
dll_path = None
if len(dll_path_list) > 0:
dll_path = os.pathsep.join(dll_path_list)
if ctest_path is None:
2020-04-19 03:48:30 +00:00
# Get the "Google Test Adapter" for vstest.
if not os.path.exists(os.path.join(cwd,
'googletestadapter.0.17.1')):
run_subprocess(
['nuget.exe', 'restore',
os.path.join(source_dir, 'packages.config'),
'-ConfigFile', os.path.join(source_dir, 'NuGet.config'),
'-PackagesDirectory', cwd])
cwd2 = os.path.join(cwd, config)
executables = ['onnxruntime_test_all.exe']
if args.build_shared_lib:
executables.append('onnxruntime_shared_lib_test.exe')
executables.append('onnxruntime_global_thread_pools_test.exe')
2020-04-19 03:48:30 +00:00
run_subprocess(
['vstest.console.exe', '--parallel',
'--TestAdapterPath:..\\googletestadapter.0.17.1\\build\\_common', # noqa
'/Logger:trx', '/Enablecodecoverage', '/Platform:x64',
"/Settings:%s" % os.path.join(
source_dir, 'cmake\\codeconv.runsettings')] + executables,
cwd=cwd2, dll_path=dll_path)
else:
ctest_cmd = [ctest_path, "--build-config", config, "--verbose", "--timeout", "3600"]
2020-03-11 21:25:37 +00:00
run_subprocess(ctest_cmd, cwd=cwd, dll_path=dll_path)
2018-11-20 00:48:22 +00:00
if args.enable_pybind:
2020-04-19 03:48:30 +00:00
# Disable python tests for TensorRT because many tests are
# not supported yet.
if args.use_tensorrt:
return
# Disable python tests in a reduced build as we don't know which ops have been included and which
# models can run
if args.include_ops_by_model or args.include_ops_by_config or args.minimal_build != 'off':
return
2018-11-20 00:48:22 +00:00
if is_windows():
cwd = os.path.join(cwd, config)
run_subprocess([sys.executable, 'onnxruntime_test_python.py'], cwd=cwd, dll_path=dll_path)
if args.enable_symbolic_shape_infer_tests:
run_subprocess([sys.executable, 'onnxruntime_test_python_symbolic_shape_infer.py'],
cwd=cwd, dll_path=dll_path)
# For CUDA enabled builds test IOBinding feature
if args.use_cuda:
2020-07-10 21:02:28 +00:00
# We need to have Torch installed to test the IOBinding feature
# which currently uses Torch's allocator to allocate GPU memory for testing
log.info("Testing IOBinding feature")
run_subprocess([sys.executable, 'onnxruntime_test_python_iobinding.py'], cwd=cwd, dll_path=dll_path)
if not args.disable_ml_ops:
run_subprocess([sys.executable, 'onnxruntime_test_python_mlops.py'], cwd=cwd, dll_path=dll_path)
2020-03-11 21:25:37 +00:00
if args.enable_training and args.use_cuda:
# run basic frontend tests
run_training_python_frontend_tests(cwd=cwd)
2020-03-11 21:25:37 +00:00
2018-11-20 00:48:22 +00:00
try:
2020-04-19 03:48:30 +00:00
import onnx # noqa
2018-11-20 00:48:22 +00:00
onnx_test = True
except ImportError as error:
log.exception(error)
log.warning("onnx is not installed. The ONNX tests will be skipped.")
2018-11-20 00:48:22 +00:00
onnx_test = False
2018-11-20 00:48:22 +00:00
if onnx_test:
run_subprocess([sys.executable, 'onnxruntime_test_python_backend.py'], cwd=cwd, dll_path=dll_path)
if not args.disable_ml_ops:
run_subprocess([sys.executable, 'onnxruntime_test_python_backend_mlops.py'],
cwd=cwd, dll_path=dll_path)
run_subprocess([sys.executable,
os.path.join(source_dir, 'onnxruntime', 'test', 'onnx', 'gen_test_models.py'),
'--output_dir', 'test_models'], cwd=cwd)
2020-03-11 21:25:37 +00:00
if not args.skip_onnx_tests:
run_subprocess([os.path.join(cwd, 'onnx_test_runner'), 'test_models'], cwd=cwd)
if config != 'Debug':
run_subprocess([sys.executable, 'onnx_backend_test_series.py'], cwd=cwd, dll_path=dll_path)
if not args.skip_keras_test:
try:
2020-04-19 03:48:30 +00:00
import onnxmltools # noqa
import keras # noqa
onnxml_test = True
except ImportError:
2020-04-19 03:48:30 +00:00
log.warning(
"onnxmltools and keras are not installed. "
"The keras tests will be skipped.")
onnxml_test = False
if onnxml_test:
2020-04-19 03:48:30 +00:00
run_subprocess(
[sys.executable, 'onnxruntime_test_python_keras.py'],
cwd=cwd, dll_path=dll_path)
2018-11-20 00:48:22 +00:00
2020-04-19 03:48:30 +00:00
2019-11-22 20:14:03 +00:00
def nuphar_run_python_tests(build_dir, configs):
2020-04-19 03:48:30 +00:00
"""nuphar temporary function for running python tests separately
as it requires ONNX 1.5.0
"""
for config in configs:
if config == 'Debug':
continue
cwd = get_config_build_dir(build_dir, config)
if is_windows():
cwd = os.path.join(cwd, config)
dll_path = os.path.join(build_dir, config, "external", "tvm", config)
# install onnx for shape inference in testing Nuphar scripts
2020-04-19 03:48:30 +00:00
# this needs to happen after onnx_test_data preparation which
# uses onnx 1.3.0
run_subprocess(
[sys.executable, '-m', 'pip', 'install', '--user', 'onnx==1.5.0'])
run_subprocess(
[sys.executable, 'onnxruntime_test_python_nuphar.py'],
cwd=cwd, dll_path=dll_path)
def run_nodejs_tests(nodejs_binding_dir):
args = ['npm', 'test', '--', '--timeout=2000']
if is_windows():
args = ['cmd', '/c'] + args
run_subprocess(args, cwd=nodejs_binding_dir)
def build_python_wheel(
source_dir, build_dir, configs, use_cuda, use_dnnl,
use_tensorrt, use_openvino, use_nuphar, use_vitisai, use_acl, use_armnn, use_dml,
Add new PytTrch front-end (#4815) * Add ORTTrainerOptions class for the new pytorch frontend (#4382) Add ORTTrainerOptions class and some placeholders * Add _ORTTrainerModelDesc to perform validation for model description (#4416) * Add Loss Scaler classes to the new frontend (#4306) * Add TrainStepInfo used on the new frontend API (#4256) * Add Optimizer classes to the new frontend (#4280) * Add LRScheduler implementation (#4357) * Add basic ORTTrainer API (#4435) This PR presents the public API for ORTTrainer for the short term development. It also validates and saves input parameters, which will be used in the next stages, such as building ONNX model, post processing the model and configuring the training session * Add opset_version into ORTTrainerOptions and change type of ORTTrainer.loss_fn (#4592) * Update ModelDescription and minor fix on ORTTrainer ctor (#4605) * Update ModelDescription and minor fix on ORTTrainer/ORTTrainerOptions This PR keeps the public API intact, but changes how model description is stored on the backend Currently, users creates a dict with two lists of tuples. One list called 'inputs' and each tuple has the following format tuple(name, shape). The second list is called 'outputs' and each tuple can be either tuple(name, shape) or tuple(name, shape, is_loss). With this PR, when this dict is passed in to ORTTrainer, it is fully validated as usual. However, tuples are internally replaced by namedtuples and all output tuples will have tuple(name, shape, is_loss) format instead of is_loss being optionally present. Additionally to that normalization in the internal representation (which eases coding), two internal methods were created to replace a namedtuple(name, shape) to namedtuple(name, shape, dtype) or namedtuple(name, shape, is_loss, dtype) dependeing whether the tuple is an input or output. This is necessary as ORTTRainer finds out data types of each input/output during model export to onnx. Finally, a minor fix was done on ORTTrainer. It could initialize ORTTrainerOptions incorrectly when options=None * Rename input name for test * Add ONNX Model Export to New Frontend (#4612) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> * Create training session + minor improvements (#4668) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Save ONNX model in file (#4671) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Add eval step (#4674) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Add train_step (#4677) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Add LR Scheduler (#4694) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> * Add deterministic compute tests (#4716) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> * Add legacy vs experimental ORTTrainer accuracy comparison (#4727) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> * Add Mixed precision/LossScaler + several fixes (#4739) Additionally to the mixed precision/loss scaler code, this PR includes: * Fix CUDA training * Add optimization_step into TrainStepInfo class * Refactor LRSCheduler to use optimization_step instead of step * Updated several default values at ORTTrainerOptions * Add initial Gradient Accumulation supported. Untested * Fix ONNX model post processing * Refactor unit tests * Add ONNX BERT example + minor fixes (#4757) * Fix training issue when passing ONNX file into ORTTrainer Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Add Dynamic Shape support (#4758) * Update DeepSpeed Zero Stage option to a separate option group (#4772) * Add support to fetches (#4777) * Add Gradient Accumulation Steps support (#4793) * Fix Dynamic Axes feature and add unit test (#4795) * Add frozen weights test (#4807) * Move new pytorch front-end to 'experimental' namespace (#4814) * Fix build Co-authored-by: Rayan-Krishnan <rayankrishnan@live.com> Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net>
2020-08-17 16:45:25 +00:00
wheel_name_suffix, enable_training, nightly_build=False, featurizers_build=False, use_ninja=False):
2018-11-20 00:48:22 +00:00
for config in configs:
cwd = get_config_build_dir(build_dir, config)
if is_windows() and not use_ninja:
2018-11-20 00:48:22 +00:00
cwd = os.path.join(cwd, config)
2020-04-19 03:48:30 +00:00
args = [sys.executable, os.path.join(source_dir, 'setup.py'),
'bdist_wheel']
# We explicitly override the platform tag in the name of the generated build wheel
# so that we can install the wheel on Mac OS X versions 10.12+.
# Without this explicit override, we will something like this while building on MacOS 10.14 -
# [WARNING] MACOSX_DEPLOYMENT_TARGET is set to a lower value (10.12)
# than the version on which the Python interpreter was compiled (10.14) and will be ignored.
# Since we need to support 10.12+, we explicitly override the platform tag.
# See PR #3626 for more details
if is_macOS():
args += ['-p', 'macosx_10_12_x86_64']
# Any combination of the following arguments can be applied
if nightly_build:
args.append('--nightly_build')
if featurizers_build:
args.append("--use_featurizers")
if wheel_name_suffix:
args.append('--wheel_name_suffix={}'.format(wheel_name_suffix))
Add new PytTrch front-end (#4815) * Add ORTTrainerOptions class for the new pytorch frontend (#4382) Add ORTTrainerOptions class and some placeholders * Add _ORTTrainerModelDesc to perform validation for model description (#4416) * Add Loss Scaler classes to the new frontend (#4306) * Add TrainStepInfo used on the new frontend API (#4256) * Add Optimizer classes to the new frontend (#4280) * Add LRScheduler implementation (#4357) * Add basic ORTTrainer API (#4435) This PR presents the public API for ORTTrainer for the short term development. It also validates and saves input parameters, which will be used in the next stages, such as building ONNX model, post processing the model and configuring the training session * Add opset_version into ORTTrainerOptions and change type of ORTTrainer.loss_fn (#4592) * Update ModelDescription and minor fix on ORTTrainer ctor (#4605) * Update ModelDescription and minor fix on ORTTrainer/ORTTrainerOptions This PR keeps the public API intact, but changes how model description is stored on the backend Currently, users creates a dict with two lists of tuples. One list called 'inputs' and each tuple has the following format tuple(name, shape). The second list is called 'outputs' and each tuple can be either tuple(name, shape) or tuple(name, shape, is_loss). With this PR, when this dict is passed in to ORTTrainer, it is fully validated as usual. However, tuples are internally replaced by namedtuples and all output tuples will have tuple(name, shape, is_loss) format instead of is_loss being optionally present. Additionally to that normalization in the internal representation (which eases coding), two internal methods were created to replace a namedtuple(name, shape) to namedtuple(name, shape, dtype) or namedtuple(name, shape, is_loss, dtype) dependeing whether the tuple is an input or output. This is necessary as ORTTRainer finds out data types of each input/output during model export to onnx. Finally, a minor fix was done on ORTTrainer. It could initialize ORTTrainerOptions incorrectly when options=None * Rename input name for test * Add ONNX Model Export to New Frontend (#4612) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> * Create training session + minor improvements (#4668) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Save ONNX model in file (#4671) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Add eval step (#4674) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Add train_step (#4677) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Add LR Scheduler (#4694) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> * Add deterministic compute tests (#4716) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> * Add legacy vs experimental ORTTrainer accuracy comparison (#4727) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> * Add Mixed precision/LossScaler + several fixes (#4739) Additionally to the mixed precision/loss scaler code, this PR includes: * Fix CUDA training * Add optimization_step into TrainStepInfo class * Refactor LRSCheduler to use optimization_step instead of step * Updated several default values at ORTTrainerOptions * Add initial Gradient Accumulation supported. Untested * Fix ONNX model post processing * Refactor unit tests * Add ONNX BERT example + minor fixes (#4757) * Fix training issue when passing ONNX file into ORTTrainer Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Add Dynamic Shape support (#4758) * Update DeepSpeed Zero Stage option to a separate option group (#4772) * Add support to fetches (#4777) * Add Gradient Accumulation Steps support (#4793) * Fix Dynamic Axes feature and add unit test (#4795) * Add frozen weights test (#4807) * Move new pytorch front-end to 'experimental' namespace (#4814) * Fix build Co-authored-by: Rayan-Krishnan <rayankrishnan@live.com> Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net>
2020-08-17 16:45:25 +00:00
if enable_training:
args.append("--enable_training")
# The following arguments are mutually exclusive
if use_tensorrt:
args.append('--use_tensorrt')
elif use_cuda:
args.append('--use_cuda')
elif use_openvino:
args.append('--use_openvino')
OpenVINO EP v2.0 (#3585) * Added FP16 transformations * Revert "Added CMAKE_BUILD_TYPE to make building dynamic" This reverts commit d3e17af1af655cfdc4d2fec33f52055caa525e85. * Added FP16 transformations for FP16 builds * Backend logic cleanup Cleans the backend(intel_graph.*) code in the following ways:- 1. Minimize global usage: Since all the IR graphs need to be re-generated on every Infer, it is bad practice to rely on globals for their saving and usage as there would be multiple readers and writers to the same global variable leading to incorrect usages or contentions. This change replaces globals with locals where possible. This change also fixes an existing bug with due to incorrect global usage. 2. Remove all unused functions. 3. Remove all unused headers and prepocessor directives. * removed commented out code * Disabled default optimization for Intel EP Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Fix missed plugins.xml for python bindings * Fixed the build after latest master changes Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled unsupported ops for accelerators Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Added some more disabled ops Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Added environment variable to enable debugging Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Added more debug statements Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Fixed unsupported ops list for GPU and VPU Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Fixed unsqueeze unit tests Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Added error message to the status Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Overwrite Model proto with shape info from data Overwrites the shape info of Model proto with the shape from actual input data. Needed for inferring models with Dynamic shapes. * Removed print statement and disabled where op Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled Reshape with Empty initializer * Added more debug statements for 1P * Don't allow 1D inputs with symbol for dimension * Disabled some 3rd phase ops * Disabled split and added zero dimension check for OutputDefs * Cleanup zero dimensionality check * Added different data type check for inputs and initializers * Added conditions for Mod, Cast and Pad * Removed unused variable * Disabled scan and added conditions for squeeze * Added changes for fixing all C++ unit tests * Implements Backend Manager class for caching Backend Manager provides a layer of indirection between EP interface and OV backend that provides caching services for models with symbolic dims in input shapes. * clean up commented blocks * clang-formatting * Read I/O type info from ModleProto Read the tensor element type information from ModelProto object, as FusedNode is no longer available. * code cleanup * clang-formatting * Added print statement for jenkins * Disabled some python tests * Changed the path of convert fp32 to fp16 hpp * Added conditions for BatchNorm in GetCapability * Fixed failed tests * Revert "Added conditions for BatchNorm in GetCapability" This reverts commit c3c28c3b00d27892c42546b35dacdd807a48ee90. * Added Intel to onnxruntime backends * pick up vars set by OV package setupvars.sh * Added conditions for Identity * remove a few cout prints * Added conditions for GPU_FP32 unit tests * Revert "pick up vars set by OV package setupvars.sh" This reverts commit 8199e029c03eae21a1a7ef6bfdc93d00e5d0198b. * Commented out fatal message for protobuf * Might need to be removed * Add interface class for current backend * moved common logic to base class * simplified cpu backend * Removed unused headers * use vectors to save i/o tensors for windows compatibility * move utils fxns to backend_utils namespace * rename ov_backend to ibackend * Factory pattern for backend creation * rename CPU backend to Basic backend * renamed to vad-M and added to factory list * Added conditions for VPU * Added print statements * Changed the logic for checking for symbolic shapes * Modified logic for zero dimension check * Removed VPU single dimension condition * Removed comments * Modified logic in DimensionCheck method * Remove legacy OpenVINO EP Remove all the legacy code for OpenVINO EP. UEP code will take its place going forward. This change does NOT remove OVEP files in the following areas asa they will be reused by UEP:- 1. Documentation: All .md files 2. Docker releated files 3. Python bindings 4. Java bindings 5. C# bindings 6. ORT Server 7. CI pipeline setup files * Rename Intel EP to OpenVINO EP * Added unique names to the subgraphs * Removed subgraphs with only constant inputs * Modified subgraph partitioning algorithm to remove const input subgraphs * Apply suggestion to onnxruntime/core/providers/openvino/openvino_execution_provider.cc * Tracking output names to fix the output order bug * Changed output names to a unordered map * Modified logic to check for symbolic input shapes * Fixed a bug in Reshape check * Added empty model path to Model constructor * Made necessary changes to cmake to build from the binary package * Changed INTEL_CVSDK_DIR to INTEL_OPENVINO_DIR * Enable dyn device selection with C++ API * Added Round operator to unsupported list * Modified subgraph partition logic for MYRIAD * Removed supported ops from the list * Enable dyn dev selection in Py API's * Add documentation for dynamic device selection * Use MYRIAD || HDDL instead of VPU * Removed temporary cast of Int64 to FP32 * Disabled unit Tests for CPU_FP32 and GPU_FP32 * Removed default "CPU" from unit tests to allow overriding * Removed ops Concat, Squeeze, Unsqueeze from unsupported list * Get the device id from info * Removed overwriting device_id and precision * Enabled ConvTranspose and EyeLike * Reordered unsupported ops in alphabetical order * Fixed syntax error * Fixed syntax error * Code clean-up: Handle exceptions, logs and formatting Code formatted according to ORT coding guidelines. * remove debug print from pybind code * updated docs with ops and models * formatting prints * Added default values for c and j for openvino * Overriding the values set for c and j to be 1 * BACKEND_OPENVINO should be empty if openvino is not in build * Overriding c value with default for perftest * fix VAD-M device string bug * Add IE error details to exceptions * Use IE specific device names in EP * Add VAD-F (FPGA) device support * Removed unecessary libraries from whl package * Code changes for Windows compatibility * Add VAD-F option to python API * [revert before merge] cmake changes for RC * Enable Windows build in CMake * Unset macro OPTIONAL for windows builds inference_engine.hpp's include chain defines a macro 'OPTIONAL' which conflicts with onnx project's headers when using MSVC. So would need to explictly unset it for MSVC. * Use a single copy of plugin/IE::Core Defined as a static member in Backend manager * Remove restriction of single subgraphs for myriad * Passed subgraph name to Backend to enhance log statements * Disabled zero dimension conditions * Disabled concat to remove zero dims * Enabled building ngraph as part of ORT * Removed serializing and added versioning * Fix CPU_FP32 unit tests * Removed unecessary condition * add ngraph.so.0.0 to .whl * Check for zero dimensions only for inputs and outputs * Restrict loading only 10 subgraphs on myriad * Build ngraph.dll within UEP. Doesn't link yet * Rename Linux included libngraph.so to libovep_ngraph.so Renames locally built libngraph.so containing ONNX importer to libovep_ngraph.so in order to avoid linkage conflicts with libngraph.so supplied by OpenVINO binary installer. Applies only for Linux builds. * use output_name cmake properties for lib name * fix .so name format in lib_name.patch * CMake code cleanup * Rename WIN32 included ngraph.dll to ovep_ngraph.dll To avoid conflict with ngraph.dll distributed by openvino. * Added myriad config for networks without 4 dimensions * Loading the 10 max clusters for inference on myriad * Refactor code and add Batching support Encapsulate subgraph settings into context structs. Add batching support for completely supported models. * Disabled some broken tests * use input_indexes to avoid batch-checking initializers * Avoid static initialization order error on WOS * Added candy to broken tests * InternalCI changes for 2020.2 * Updated DLDT instructions * Unsaved changed in install_openvino.sh * Changes after manual check * Remove custom ngraph onnx_import build for WOS ONNX Importer on WOS does not have protobuf issue. * Remove FP32ToFP16 ngraph pass This conversion is performed implicitly within IE. * Surround debug logic by #ifndef NDEBUG * remove invalid TODO comments * removed references to ngrpah-ep * clang-formatting * remove commented code * comment edits * updating copyright year to that of first OpenVINO-EP release * remove redundant log msg * Modified operator and topology support * Update build instructions * doc formatting * Fixed clip unit tests * Revert "Remove FP32ToFP16 ngraph pass" This reverts commit ec962ca5f315a5658ad980e740196f19de2639c1. * Applying FP16 transformation only for GPU FP16 * Fixed GPU FP32 python tests * automatically use full protobuf * disable onnxrt server for now * Disabled upsample * update dockerfile instructions * Removed MO paths and added ngraph path * Remove OVEP from ORT Server docs Will put it back in after validation * Updated path to Ngraph lib * Disabled Resize and some other python tests * Removed unnecesary header files * Use commit SHA to fetch ngraph repo * Avoid un-needed file changes due to version update * Fixed clip tests * Fixed Pow, max and min onnx tests * build.md doc typo * Update cmake patch command for ngraph src * remove dead cmake code for onnxruntime_USE_OPENVINO_BINARY * use spaces instead of tab * remove commented code * Add info about protobuf version * edit debug env var and enable for WIN32 * specify only version tag of 2020.2 for dockerbuilds * remove unnecessary file changes * Pass empty string as default argument to C# tests * Use ${OPENVINO_VERSION} to name openvino install directory in CI builds * Enabled unnecessarily disabled tests * Fixed ngraph protobuf patch * Fixed error in protobuf patch * Revert "Use ${OPENVINO_VERSION} to name openvino install directory in CI builds" This reverts commit 89e72adb8bf3b9712f5c81c5e13fe68c6c0df002. * Remove unsetting OPTIONAL macro This is no longer used in recent ONNX update onnx/onnx@da13be2, so this unset workaround is no longer necessary. * Use a null string default argument for C# API * Set OpenVINO version yml files and pass to CI Docker builds Git Tag info for DLDT as well as install directory are set using this value. This reverts commit 9fa9c20348ed72ae360a95c98e9b074d2f9fafc5. * Documentation: recommendation and instructions for disabling ORT graph optimizations * more doc updates * Reduced the number of models according to CI time constraints Co-authored-by: ynimmaga <yamini.nimmagadda@intel.com> Co-authored-by: suryasidd <surya.siddharth.pemmaraju@intel.com> Co-authored-by: Mikhail Treskin <mikhail.treskin@intel.com> Co-authored-by: mbencer <mateusz.bencer@intel.com> Co-authored-by: Aravind <aravindx.gunda@intel.com> Co-authored-by: suryasidd <48925384+suryasidd@users.noreply.github.com>
2020-04-24 11:06:02 +00:00
elif use_dnnl:
args.append('--use_dnnl')
elif use_nuphar:
args.append('--use_nuphar')
elif use_vitisai:
args.append('--use_vitisai')
elif use_acl:
args.append('--use_acl')
elif use_armnn:
args.append('--use_armnn')
elif use_dml:
args.append('--use_dml')
run_subprocess(args, cwd=cwd)
2018-11-20 00:48:22 +00:00
2020-04-19 03:48:30 +00:00
def derive_linux_build_property():
if is_windows():
return "/p:IsLinuxBuild=\"false\""
else:
return "/p:IsLinuxBuild=\"true\""
def build_nuget_package(source_dir, build_dir, configs, use_cuda, use_openvino, use_tensorrt, use_dnnl, use_mklml):
if not (is_windows() or is_linux()):
raise BuildError(
'Currently csharp builds and nuget package creation is only supportted '
'on Windows and Linux platforms.')
csharp_build_dir = os.path.join(source_dir, 'csharp')
is_linux_build = derive_linux_build_property()
# derive package name and execution provider based on the build args
execution_provider = "/p:ExecutionProvider=\"None\""
package_name = "/p:OrtPackageId=\"Microsoft.ML.OnnxRuntime\""
if use_openvino:
execution_provider = "/p:ExecutionProvider=\"openvino\""
package_name = "/p:OrtPackageId=\"Microsoft.ML.OnnxRuntime.OpenVino\""
elif use_tensorrt:
execution_provider = "/p:ExecutionProvider=\"tensorrt\""
package_name = "/p:OrtPackageId=\"Microsoft.ML.OnnxRuntime.TensorRT\""
elif use_dnnl:
execution_provider = "/p:ExecutionProvider=\"dnnl\""
package_name = "/p:OrtPackageId=\"Microsoft.ML.OnnxRuntime.DNNL\""
elif use_cuda:
package_name = "/p:OrtPackageId=\"Microsoft.ML.OnnxRuntime.Gpu\""
elif use_mklml:
package_name = "/p:OrtPackageId=\"Microsoft.ML.OnnxRuntime.MKLML\""
else:
pass
# set build directory based on build_dir arg
native_dir = os.path.normpath(os.path.join(source_dir, build_dir))
ort_build_dir = "/p:OnnxRuntimeBuildDirectory=\"" + native_dir + "\""
# dotnet restore
cmd_args = ["dotnet", "restore", "OnnxRuntime.CSharp.sln", "--configfile", "Nuget.CSharp.config"]
run_subprocess(cmd_args, cwd=csharp_build_dir)
# build csharp bindings and create nuget package for each config
for config in configs:
if is_linux():
native_build_dir = os.path.join(native_dir, config)
cmd_args = ["make", "install", "DESTDIR=.//nuget-staging"]
run_subprocess(cmd_args, cwd=native_build_dir)
configuration = "/p:Configuration=\"" + config + "\""
cmd_args = ["dotnet", "msbuild", "OnnxRuntime.CSharp.sln", configuration, package_name, is_linux_build,
ort_build_dir]
run_subprocess(cmd_args, cwd=csharp_build_dir)
cmd_args = [
"dotnet", "msbuild", "OnnxRuntime.CSharp.proj", "/t:CreatePackage",
package_name, configuration, execution_provider, is_linux_build, ort_build_dir]
run_subprocess(cmd_args, cwd=csharp_build_dir)
def run_csharp_tests(source_dir, build_dir, use_cuda, use_openvino, use_tensorrt, use_dnnl):
# Currently only running tests on windows.
if not is_windows():
return
csharp_source_dir = os.path.join(source_dir, 'csharp')
is_linux_build = derive_linux_build_property()
# define macros based on build args
macros = ""
if use_openvino:
macros += "USE_OPENVINO;"
if use_tensorrt:
macros += "USE_TENSORRT;"
if use_dnnl:
macros += "USE_DNNL;"
if use_cuda:
macros += "USE_CUDA;"
define_constants = ""
if macros != "":
define_constants = "/p:DefineConstants=\"" + macros + "\""
# set build directory based on build_dir arg
native_build_dir = os.path.normpath(os.path.join(source_dir, build_dir))
ort_build_dir = "/p:OnnxRuntimeBuildDirectory=\"" + native_build_dir + "\""
# Skip pretrained models test. Only run unit tests as part of the build
# add "--verbosity", "detailed" to this command if required
cmd_args = ["dotnet", "test", "test\\Microsoft.ML.OnnxRuntime.Tests\\Microsoft.ML.OnnxRuntime.Tests.csproj",
"--filter", "FullyQualifiedName!=Microsoft.ML.OnnxRuntime.Tests.InferenceTest.TestPreTrainedModels",
is_linux_build, define_constants, ort_build_dir]
run_subprocess(cmd_args, cwd=csharp_source_dir)
def is_cross_compiling_on_apple(args):
if not is_macOS():
return False
if args.ios:
return True
if args.osx_arch != platform.machine():
return True
return False
def build_protoc_for_host(cmake_path, source_dir, build_dir, args):
if (args.arm or args.arm64 or args.enable_windows_store) and \
not (is_windows() or is_cross_compiling_on_apple(args)):
2020-04-19 03:48:30 +00:00
raise BuildError(
'Currently only support building protoc for Windows host while '
'cross-compiling for ARM/ARM64/Store and linux cross-compiling iOS')
2020-04-19 03:48:30 +00:00
log.info(
"Building protoc for host to be used in cross-compiled build process")
protoc_build_dir = os.path.join(os.getcwd(), build_dir, 'host_protoc')
os.makedirs(protoc_build_dir, exist_ok=True)
# Generate step
2020-04-19 03:48:30 +00:00
cmd_args = [
cmake_path,
os.path.join(source_dir, 'cmake', 'external', 'protobuf', 'cmake'),
'-Dprotobuf_BUILD_TESTS=OFF',
'-Dprotobuf_WITH_ZLIB_DEFAULT=OFF',
'-Dprotobuf_BUILD_SHARED_LIBS=OFF'
]
is_ninja = args.cmake_generator == 'Ninja'
if args.cmake_generator is not None and not (is_macOS() and args.use_xcode):
cmd_args += ['-G', args.cmake_generator]
if is_windows():
if not is_ninja:
cmd_args += ['-T', 'host=x64']
elif is_macOS():
if args.use_xcode:
cmd_args += ['-G', 'Xcode']
# CMake < 3.18 has a bug setting system arch to arm64 (if not specified) for Xcode 12,
# protoc for host should be built using host architecture
# Explicitly specify the CMAKE_OSX_ARCHITECTURES for x86_64 Mac.
cmd_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(
'arm64' if platform.machine() == 'arm64' else 'x86_64')]
2020-04-19 03:48:30 +00:00
run_subprocess(cmd_args, cwd=protoc_build_dir)
# Build step
cmd_args = [cmake_path,
"--build", protoc_build_dir,
"--config", "Release",
"--target", "protoc"]
run_subprocess(cmd_args)
# Absolute protoc path is needed for cmake
config_dir = ''
suffix = ''
if (is_windows() and not is_ninja) or (is_macOS() and args.use_xcode):
config_dir = 'Release'
if is_windows():
suffix = '.exe'
expected_protoc_path = os.path.join(protoc_build_dir, config_dir, 'protoc' + suffix)
if not os.path.exists(expected_protoc_path):
raise BuildError("Couldn't find {}. Host build of protoc failed.".format(expected_protoc_path))
return expected_protoc_path
2020-04-19 03:48:30 +00:00
def generate_documentation(source_dir, build_dir, configs):
operator_doc_path = os.path.join(source_dir, 'docs', 'ContribOperators.md')
opkernel_doc_path = os.path.join(source_dir, 'docs', 'OperatorKernels.md')
for config in configs:
2020-04-19 03:48:30 +00:00
# Copy the gen_contrib_doc.py.
shutil.copy(
os.path.join(source_dir, 'tools', 'python', 'gen_contrib_doc.py'),
os.path.join(build_dir, config))
2020-04-19 03:48:30 +00:00
shutil.copy(
os.path.join(source_dir, 'tools', 'python', 'gen_opkernel_doc.py'),
os.path.join(build_dir, config))
2020-04-19 03:48:30 +00:00
run_subprocess(
[sys.executable,
'gen_contrib_doc.py',
'--output_path', operator_doc_path],
cwd=os.path.join(build_dir, config))
2020-04-19 03:48:30 +00:00
run_subprocess(
[sys.executable,
'gen_opkernel_doc.py',
'--output_path', opkernel_doc_path],
cwd=os.path.join(build_dir, config))
docdiff = ''
Initial commit for OpenVINO Execution Provider (#935) * Initial commit for OpenVINO Execution Provider OpenVINO Execution Provider provides the interface for ONNX Runtime applications to access Intel's hardware accelerators using Intel's OpenVINO Toolkit. * Fixed bug in GetCapability to disable custom ops Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Added OPENVINO ci pipeline Added new pipeline for openvino provider, made changes to support the docker build and onnxruntime build with openvino. Signed-off-by: Luis Daniel Castellanos <luis.daniel.castellanos@intel.com> * Enabled all unit tests for OpenVINO EP Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Fixed syntax issue in run_docker_build.sh file * Added missing default OPENVINO_VERSION Default value for OPENVINO_VERSION env was missing causing the build to fail * Added install Model Optimizer deps step * Fixed python unit tests and some tests from onnx_backend_test_series Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Fixed indentation bug Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled some of the python backend tests for OpenVINO Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled some model tests Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Remove Duplicate checks for openvino in build.py Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Modified GetCapability for FP16 Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled GPU FP32 tests that are not supported Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Convert modelProto to string and use it in compile Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Pass byte-array input args to MO * Serialized ModelProto passed in-memory to MO ModelOptimizer python module receives the serialized ModelProto in-memory. Uses appropriate ONNX function to load the serialized bytes. * Make Py_Finalize compatible with older python versions Also, remove pFunc unassigned variable possibility. * Fallback if input dims of Matmul is greater than 2 Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * fixup: Device #define syntax * Updated the documentation Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Enable dynamic dim value * removed commented out code * Added Dockerfile for openvino EP Updated instructions on dockerfiles/README.md file Signed-off-by: Luis Daniel Castellanos <luis.daniel.castellanos@intel.com> * Disabled fp16_inception_v1 test Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Code formatting with clang-format Uses style from the .clang-format file in root directory. * fixup: docker tag and build error fixes * Heuristics to automatically detect batching Distributes slices from batch into parallel infer-request objects. * Handle disabled tests in GetCapability Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled average pool and max pool if ceil_mode is 1 Also dilations are not supported if they are greater than 1 Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled Unsqueeze int32 test Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * changes to fix output results bug * Disabled a few C++ unit tests for MYRIAD FP16 Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Manually revert '9fe162bb Enable dynamic dim value' Reverts compile time setting of dynamic shape Reverting manually due to significantly huge auto-revert conflicts. * Fixed unused variable warning Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled Mul test for GPU_FP16 due to accuracy issue Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * VPU documentation update * Disabled inception_v1 for MYRIAD and HDDL *Also disabled few C++ accuracy tests for HDDL Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * updates from upstream * use the new CustomOpApis for I/O interfacing * Pass initializers as subgraph meta-def inputs in GetCapability() Requirement due to API changes introduced with PR# 1019. * Remove obsolete functions * Save indexes of graph inputs from fused_node info Both inputs and initializers are passed as data inputs to the infer function. To identify only inputs among them, save thier index info from fused_node in Compile function. * Documentation changes to enable VPU * Fix VPU related changes in documentation * Fix minor changes in documentation * Fix VPU related changes in documentation * Use Node.In/OutputDefs() to track graph inputs and outputs. Don't use graph_viewer's GetInputs() or GetInputsIncludingInitializers(). * Permit "SAME_UPPER" auto_pad attribute from MaxPool * Disabled fp16_tiny_yolov2 in onnx model tests * Updated documentation to include configuration guides for myriad and hddl Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Use 8 Infer requests only for VAD-R * disable debug prints * Clang-format source files * Updated BUILD.md with OpenVINO R5 links Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled same upper python tests Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Update test exclusion syntax * Change path of install_onnx.sh Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disable tiny_yolov2 in broken tests Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Revert "Change path of install_onnx.sh" This reverts commit ba9db165f3be430f2aff1ef413299ed04637196a. This change is only required for Intel internal CI pipeline until the settings are matched with the upstream's CI pipeline. * Added debug statements for debugging CI error Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Add --build_wheel to linux openvino pipeline Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Added -v option to onnx_test_runner for debugging Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Removed path change patch Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Added -c 1 to onnx_test_runner Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Refactor MO python invocation in separate function Cleans up Model Optimizer python invocation check and conversion logic. Invokes MO only once in GetCapability() and passes the IR strings (xml and bin) to the Compiler as meta-def attributes. * Add comments * code cleanup and comments * Code cleanup for GetCapability Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Removed unnecessary files Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Revert "Added -v option to onnx_test_runner for debugging" This reverts commit d1dd70938a94d648df1a1dbbc2e48d0b97e49ec8. * Revert "Added debug statements for debugging CI error" This reverts commit b86d41afed2aa29c3508155d6f9c8d3a7263cc60. * incorporate Status Code changes * ComputeFunc returns Status::OK() on success * Use test names to disable tests for MYRIAD and VAD-R Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Rename local identifiers from CNNNetwork to OpenVINO network CNNNetwork is an OpenVINO's API class that represents more than just convolutional neural networks (CNNs). Renaming helps to avoid confusion that the API's only support CNN type models. * Added error message if building on windows * Removed duplicate option in Cmake * Removed unnecessary parameters in activation_opt_test Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Refactor Map search and access logic for efficiently and cleanliness. * use C++ style casts * Use os.path.join for python directory path operations * use C++ style casts * EP classes should use onnxruntime namespace * Clean up fixes from PR comments * Don't explicitly shutdown Py interpreter * Remove debug print statements Prints will be re-enabled later with a logging mechanism with debug/verbose printing options. * Decrement ref counts for used pyObjects * Restore build instructions for other compilers Content under the "Using other compilers" section has been accidentally deleted by a previous commit. Restoring back that content from the latest upstream repo. * CMake code cleanup Code clean up, commenting and formatting of CMake code. * Don't pass the unused device_info parameter to OpenVINOGraph ctor. * Add support for multiple I/O data types Adds support for the following tensor data types for graph inputs and outputs: 1) float 2) float16 3) int32 4) int16 5) int8 6) uint16 7) uint8 * cleanup setup.py module list definition * Deduce index of input using tracked input index map Ignores initializers in case they are ordered before inputs. * Removed debug statement in MO code Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * PR feedback * Removed per_sample_tolerance for openvino * Removed unnecessary disabled tests Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Removed debug function Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled tiny_yolo_v2 due to accuracy issues Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Changed the disabled reason for broken tests Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled Reshape with no input Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Python formatting with Autopep8 * Minor fix for MYRIAD devices * Added zero dimension check *Removed setting batch size for the network Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Set the threshold to larger value for MNIST Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Removed setting higher threshold in provider_test_utils Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Check for --use_openvino in python wheel setup.py Add openvino modules to the setup script for building the wheel package only for --use_openvino a build option. * Removed nullptr checks for GetNode() Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com>
2019-06-18 15:58:53 +00:00
try:
docdiff = subprocess.check_output(['git', 'diff', opkernel_doc_path])
except subprocess.CalledProcessError:
print('git diff returned non-zero error code')
if len(docdiff) > 0:
2020-04-19 03:48:30 +00:00
# Show warning instead of throwing exception, because it is
# dependent on build configuration for including
# execution propviders
log.warning(
'The updated opkernel document file ' + str(opkernel_doc_path) +
' is different from the checked in version. Consider '
'regenerating the file with CPU, DNNL and CUDA providers enabled.')
2020-04-19 03:48:30 +00:00
log.debug('diff:\n' + str(docdiff))
Initial commit for OpenVINO Execution Provider (#935) * Initial commit for OpenVINO Execution Provider OpenVINO Execution Provider provides the interface for ONNX Runtime applications to access Intel's hardware accelerators using Intel's OpenVINO Toolkit. * Fixed bug in GetCapability to disable custom ops Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Added OPENVINO ci pipeline Added new pipeline for openvino provider, made changes to support the docker build and onnxruntime build with openvino. Signed-off-by: Luis Daniel Castellanos <luis.daniel.castellanos@intel.com> * Enabled all unit tests for OpenVINO EP Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Fixed syntax issue in run_docker_build.sh file * Added missing default OPENVINO_VERSION Default value for OPENVINO_VERSION env was missing causing the build to fail * Added install Model Optimizer deps step * Fixed python unit tests and some tests from onnx_backend_test_series Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Fixed indentation bug Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled some of the python backend tests for OpenVINO Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled some model tests Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Remove Duplicate checks for openvino in build.py Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Modified GetCapability for FP16 Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled GPU FP32 tests that are not supported Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Convert modelProto to string and use it in compile Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Pass byte-array input args to MO * Serialized ModelProto passed in-memory to MO ModelOptimizer python module receives the serialized ModelProto in-memory. Uses appropriate ONNX function to load the serialized bytes. * Make Py_Finalize compatible with older python versions Also, remove pFunc unassigned variable possibility. * Fallback if input dims of Matmul is greater than 2 Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * fixup: Device #define syntax * Updated the documentation Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Enable dynamic dim value * removed commented out code * Added Dockerfile for openvino EP Updated instructions on dockerfiles/README.md file Signed-off-by: Luis Daniel Castellanos <luis.daniel.castellanos@intel.com> * Disabled fp16_inception_v1 test Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Code formatting with clang-format Uses style from the .clang-format file in root directory. * fixup: docker tag and build error fixes * Heuristics to automatically detect batching Distributes slices from batch into parallel infer-request objects. * Handle disabled tests in GetCapability Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled average pool and max pool if ceil_mode is 1 Also dilations are not supported if they are greater than 1 Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled Unsqueeze int32 test Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * changes to fix output results bug * Disabled a few C++ unit tests for MYRIAD FP16 Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Manually revert '9fe162bb Enable dynamic dim value' Reverts compile time setting of dynamic shape Reverting manually due to significantly huge auto-revert conflicts. * Fixed unused variable warning Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled Mul test for GPU_FP16 due to accuracy issue Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * VPU documentation update * Disabled inception_v1 for MYRIAD and HDDL *Also disabled few C++ accuracy tests for HDDL Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * updates from upstream * use the new CustomOpApis for I/O interfacing * Pass initializers as subgraph meta-def inputs in GetCapability() Requirement due to API changes introduced with PR# 1019. * Remove obsolete functions * Save indexes of graph inputs from fused_node info Both inputs and initializers are passed as data inputs to the infer function. To identify only inputs among them, save thier index info from fused_node in Compile function. * Documentation changes to enable VPU * Fix VPU related changes in documentation * Fix minor changes in documentation * Fix VPU related changes in documentation * Use Node.In/OutputDefs() to track graph inputs and outputs. Don't use graph_viewer's GetInputs() or GetInputsIncludingInitializers(). * Permit "SAME_UPPER" auto_pad attribute from MaxPool * Disabled fp16_tiny_yolov2 in onnx model tests * Updated documentation to include configuration guides for myriad and hddl Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Use 8 Infer requests only for VAD-R * disable debug prints * Clang-format source files * Updated BUILD.md with OpenVINO R5 links Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled same upper python tests Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Update test exclusion syntax * Change path of install_onnx.sh Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disable tiny_yolov2 in broken tests Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Revert "Change path of install_onnx.sh" This reverts commit ba9db165f3be430f2aff1ef413299ed04637196a. This change is only required for Intel internal CI pipeline until the settings are matched with the upstream's CI pipeline. * Added debug statements for debugging CI error Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Add --build_wheel to linux openvino pipeline Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Added -v option to onnx_test_runner for debugging Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Removed path change patch Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Added -c 1 to onnx_test_runner Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Refactor MO python invocation in separate function Cleans up Model Optimizer python invocation check and conversion logic. Invokes MO only once in GetCapability() and passes the IR strings (xml and bin) to the Compiler as meta-def attributes. * Add comments * code cleanup and comments * Code cleanup for GetCapability Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Removed unnecessary files Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Revert "Added -v option to onnx_test_runner for debugging" This reverts commit d1dd70938a94d648df1a1dbbc2e48d0b97e49ec8. * Revert "Added debug statements for debugging CI error" This reverts commit b86d41afed2aa29c3508155d6f9c8d3a7263cc60. * incorporate Status Code changes * ComputeFunc returns Status::OK() on success * Use test names to disable tests for MYRIAD and VAD-R Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Rename local identifiers from CNNNetwork to OpenVINO network CNNNetwork is an OpenVINO's API class that represents more than just convolutional neural networks (CNNs). Renaming helps to avoid confusion that the API's only support CNN type models. * Added error message if building on windows * Removed duplicate option in Cmake * Removed unnecessary parameters in activation_opt_test Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Refactor Map search and access logic for efficiently and cleanliness. * use C++ style casts * Use os.path.join for python directory path operations * use C++ style casts * EP classes should use onnxruntime namespace * Clean up fixes from PR comments * Don't explicitly shutdown Py interpreter * Remove debug print statements Prints will be re-enabled later with a logging mechanism with debug/verbose printing options. * Decrement ref counts for used pyObjects * Restore build instructions for other compilers Content under the "Using other compilers" section has been accidentally deleted by a previous commit. Restoring back that content from the latest upstream repo. * CMake code cleanup Code clean up, commenting and formatting of CMake code. * Don't pass the unused device_info parameter to OpenVINOGraph ctor. * Add support for multiple I/O data types Adds support for the following tensor data types for graph inputs and outputs: 1) float 2) float16 3) int32 4) int16 5) int8 6) uint16 7) uint8 * cleanup setup.py module list definition * Deduce index of input using tracked input index map Ignores initializers in case they are ordered before inputs. * Removed debug statement in MO code Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * PR feedback * Removed per_sample_tolerance for openvino * Removed unnecessary disabled tests Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Removed debug function Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled tiny_yolo_v2 due to accuracy issues Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Changed the disabled reason for broken tests Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Disabled Reshape with no input Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Python formatting with Autopep8 * Minor fix for MYRIAD devices * Added zero dimension check *Removed setting batch size for the network Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Set the threshold to larger value for MNIST Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Removed setting higher threshold in provider_test_utils Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com> * Check for --use_openvino in python wheel setup.py Add openvino modules to the setup script for building the wheel package only for --use_openvino a build option. * Removed nullptr checks for GetNode() Signed-off-by: suryasidd <surya.siddharth.pemmaraju@intel.com>
2019-06-18 15:58:53 +00:00
docdiff = ''
try:
docdiff = subprocess.check_output(['git', 'diff', operator_doc_path])
except subprocess.CalledProcessError:
print('git diff returned non-zero error code')
if len(docdiff) > 0:
2020-04-19 03:48:30 +00:00
raise BuildError(
'The updated operator document file ' +
str(operator_doc_path) + ' must be checked in.\n diff:\n' +
str(docdiff))
2018-11-20 00:48:22 +00:00
def main():
args = parse_arguments()
2020-04-19 03:48:30 +00:00
cmake_extra_defines = (args.cmake_extra_defines
if args.cmake_extra_defines else [])
cross_compiling = args.arm or args.arm64 or args.android
2020-04-19 03:48:30 +00:00
# If there was no explicit argument saying what to do, default
# to update, build and test (for native builds).
if not (args.update or args.clean or args.build or args.test):
log.debug(
"Defaulting to running update, build "
"[and test for native builds].")
2018-11-20 00:48:22 +00:00
args.update = True
args.build = True
if cross_compiling:
args.test = args.android_abi == 'x86_64' or args.android_abi == 'arm64-v8a'
else:
args.test = True
2018-11-20 00:48:22 +00:00
if args.skip_tests:
args.test = False
if args.include_ops_by_model or args.include_ops_by_config:
from exclude_unused_ops import exclude_unused_ops
models_path = args.include_ops_by_model if args.include_ops_by_model else ''
config_path = args.include_ops_by_config if args.include_ops_by_config else ''
exclude_unused_ops(models_path, config_path, use_cuda=args.use_cuda)
Trt execution provider (#382) * updated cmake files for trt * added trt execution provider * added trt basic test * removed trt_path action attribute * Add files via upload * Update build.py * Update trt_allocator.h * fixed issues found by reviewers * changed cast operator * added comment for custom kernel implementation * changed auto to auto& * changed to function compile APIs for TRT execution provider * changed to function compile APIs for TRT execution provider * added new DType DInt64 * adapted to the changes of onnxruntime_c_api * removed trt kernel (use function compile instead) * updated onnx-tensorrt submodule * set default memory type to TRT fused kernel * resolve merge conflict * fixed the issue that USE_CUDA conflicts with USE_TRT * construct graph by adding nodes in topological order * made changes for Windows * change buffers type * bypass HasImplementationOf check for TRT XP because TRT kernel is not registered * added domain to version info in rebuilt model proto * added trt to test option list * added DomainToVersionMap() to GraphViewer * removed Copy() * fixed broken code * format the code to clang format * used local reference to the frequently used values * fixed a couple of issues according to reviewers feedback * fixed a couple of issues according to reviewers feedback * added python binding for TRT and enable use_cuda when use_trt is on * fixed a redefinition issue * changed shared_ptr to unique_ptr on trt engines, and made a few changes required by reviewers * enabled trtexecution provider for unit tests * renamed trt to tensorrt * added tesorrt to python binding * update submodule onnx and onnx-tensorrt * made a couple of minor changes based on reviewer's feedback * added CUDA_CHECK * removed test code * fixed broken code after merge * updated onnx-tensorrt submodule * added post processing to align trt inputs/outputs with graph inputs/outputs * updated onnx submodule * added CUDA fallback for TensorRT and fixed TensorRT cmake issue * added ci pipeline for tensorrt and removed some redundent code from trt xp * fixed syntax issue * updated onnx-tensorrt submodule * fix trt build problem by: (#602) 1. Add additional /wd for debug build 2. Add io.h for additional targets 3. Bring back mb version of getopt * Update install_ubuntu.sh * Update linux-gpu-tensorrt-ci-pipeline.yml * Update linux-gpu-tensorrt-ci-pipeline.yml * Update run_build.sh * Update run_build.sh * Update run_build.sh * Update run_build.sh * fixed the issue that GetKernelRegistry returns nullptr * merged master to this branch * moved some data types to private * fixed tensorrt CI pipeline issue * customized test data for TensorRT pipeline * added onnx-tensorrt in json file and fixed an issue in ci script * added comments
2019-03-14 19:00:39 +00:00
if args.use_tensorrt:
args.use_cuda = True
if args.build_wheel or args.gen_doc:
2018-11-20 00:48:22 +00:00
args.enable_pybind = True
if args.build_csharp or args.build_nuget or args.build_java or args.build_nodejs:
args.build_shared_lib = True
2018-11-20 00:48:22 +00:00
if args.build_nuget and cross_compiling:
raise BuildError('Currently nuget package creation is not supported while cross-compiling')
if args.enable_pybind and args.disable_exceptions:
raise BuildError('Python bindings require exceptions to be enabled.')
if args.minimal_build and args.disable_ort_format_load:
raise BuildError('Minimal build requires loading ORT format models.')
if args.nnapi_min_api:
if not args.use_nnapi:
raise BuildError("Using --nnapi_min_api requires --use_nnapi")
if args.nnapi_min_api < 27:
raise BuildError("--nnapi_min_api should be 27+")
Android code coverage (#6061) * Added Onnxruntime_GCOV_COVERAGE flag for Android. * Set CMAKE_SYSTEM_NAME explicityly for Android. * Added GCOV_PREFIX option to collect code coverage data. Added a new python script to generate code coverage info. Modified build pipeline to geneate Android code coverage info * Added build command line option --android_coverage * Added a comment describing the GCOV environment variables * Fixed PEP8 issues. * Added --android_coverage option to the build command. * Increased Android emulator memory from 3K to 8K. * Increased Android partition-size from 2GB to 4GB to overcome no-space-left-on-device error * Removed source_dir from command line args. * Use cwd absolute path to run tests. * Added commands to output the contents of /data/local/tmp on the emulator. * Added run_adb_shell function. * Format changes. * Removed keywd argument cwd. * Removed Android in the --build_dir path. * Removed commands added for debugging. * Removed exxtra new-lines. * Fix MacOs build pipeline failures by uninstalling openssl before running build script. * Revert "Fix MacOs build pipeline failures by uninstalling openssl before running build script." This reverts commit 90d0568fe533e9456c20d061a2d435c8fea48266. * Change dir to the build directory where the tar file is copied. * Changed the option from --android_coverage to --code_coverage * Moved steps to generate Android code coverage to run_nnap_code_coverage.sh * Require --android option if --code_coverage is specified. * No code coverage needed for onnx_test_runner. * Expect that the emulator is running when the script is executed. * Fixed the title in the buildpipeline step. * Fixed the formatting issue. * Added a command line argument, ORT_ROOT, to run_nnapi_code_coverage.sh script Co-authored-by: Satya Jandhyala <satyajandhyala@Satyas-Mac-mini.local>
2020-12-08 18:55:02 +00:00
if args.code_coverage and not args.android:
raise BuildError("Using --code_coverage requires --android")
2020-04-19 03:48:30 +00:00
# Disabling unit tests for VAD-F as FPGA only supports
# models with NCHW layout
if args.use_openvino == "VAD-F_FP32":
args.test = False
2018-11-20 00:48:22 +00:00
configs = set(args.config)
# setup paths and directories
cmake_path = resolve_executable_path(args.cmake_path)
2020-04-19 03:48:30 +00:00
ctest_path = None if args.use_vstest else resolve_executable_path(
args.ctest_path)
2018-11-20 00:48:22 +00:00
build_dir = args.build_dir
script_dir = os.path.realpath(os.path.dirname(__file__))
source_dir = os.path.normpath(os.path.join(script_dir, "..", ".."))
# if using cuda, setup cuda paths and env vars
cuda_home, cudnn_home = setup_cuda_vars(args)
mpi_home = args.mpi_home
nccl_home = args.nccl_home
acl_home = args.acl_home
acl_libs = args.acl_libs
armnn_home = args.armnn_home
armnn_libs = args.armnn_libs
Trt execution provider (#382) * updated cmake files for trt * added trt execution provider * added trt basic test * removed trt_path action attribute * Add files via upload * Update build.py * Update trt_allocator.h * fixed issues found by reviewers * changed cast operator * added comment for custom kernel implementation * changed auto to auto& * changed to function compile APIs for TRT execution provider * changed to function compile APIs for TRT execution provider * added new DType DInt64 * adapted to the changes of onnxruntime_c_api * removed trt kernel (use function compile instead) * updated onnx-tensorrt submodule * set default memory type to TRT fused kernel * resolve merge conflict * fixed the issue that USE_CUDA conflicts with USE_TRT * construct graph by adding nodes in topological order * made changes for Windows * change buffers type * bypass HasImplementationOf check for TRT XP because TRT kernel is not registered * added domain to version info in rebuilt model proto * added trt to test option list * added DomainToVersionMap() to GraphViewer * removed Copy() * fixed broken code * format the code to clang format * used local reference to the frequently used values * fixed a couple of issues according to reviewers feedback * fixed a couple of issues according to reviewers feedback * added python binding for TRT and enable use_cuda when use_trt is on * fixed a redefinition issue * changed shared_ptr to unique_ptr on trt engines, and made a few changes required by reviewers * enabled trtexecution provider for unit tests * renamed trt to tensorrt * added tesorrt to python binding * update submodule onnx and onnx-tensorrt * made a couple of minor changes based on reviewer's feedback * added CUDA_CHECK * removed test code * fixed broken code after merge * updated onnx-tensorrt submodule * added post processing to align trt inputs/outputs with graph inputs/outputs * updated onnx submodule * added CUDA fallback for TensorRT and fixed TensorRT cmake issue * added ci pipeline for tensorrt and removed some redundent code from trt xp * fixed syntax issue * updated onnx-tensorrt submodule * fix trt build problem by: (#602) 1. Add additional /wd for debug build 2. Add io.h for additional targets 3. Bring back mb version of getopt * Update install_ubuntu.sh * Update linux-gpu-tensorrt-ci-pipeline.yml * Update linux-gpu-tensorrt-ci-pipeline.yml * Update run_build.sh * Update run_build.sh * Update run_build.sh * Update run_build.sh * fixed the issue that GetKernelRegistry returns nullptr * merged master to this branch * moved some data types to private * fixed tensorrt CI pipeline issue * customized test data for TensorRT pipeline * added onnx-tensorrt in json file and fixed an issue in ci script * added comments
2019-03-14 19:00:39 +00:00
# if using tensorrt, setup tensorrt paths
tensorrt_home = setup_tensorrt_vars(args)
# if using migraphx, setup migraphx paths
migraphx_home = setup_migraphx_vars(args)
# if using rocm, setup rocm paths
rocm_home = setup_rocm_build(args, configs)
2018-11-20 00:48:22 +00:00
os.makedirs(build_dir, exist_ok=True)
log.info("Build started")
2020-04-19 03:48:30 +00:00
if args.update:
2019-01-15 18:29:00 +00:00
cmake_extra_args = []
path_to_protoc_exe = args.path_to_protoc_exe
if not args.skip_submodule_sync:
update_submodules(source_dir)
2020-04-19 03:48:30 +00:00
if is_windows():
if args.cmake_generator == 'Ninja':
if args.x86 or args.arm or args.arm64:
2020-04-19 03:48:30 +00:00
raise BuildError(
"To cross-compile with Ninja, load the toolset "
"environment for the target processor (e.g. Cross "
"Tools Command Prompt for VS)")
cmake_extra_args = ['-G', args.cmake_generator]
2020-04-19 03:48:30 +00:00
elif args.x86:
cmake_extra_args = [
'-A', 'Win32', '-T', 'host=x64', '-G', args.cmake_generator
]
elif args.arm or args.arm64:
# Cross-compiling for ARM(64) architecture
# First build protoc for host to use during cross-compilation
if path_to_protoc_exe is None:
2020-04-19 03:48:30 +00:00
path_to_protoc_exe = build_protoc_for_host(
cmake_path, source_dir, build_dir, args)
if args.arm:
cmake_extra_args = ['-A', 'ARM']
else:
cmake_extra_args = ['-A', 'ARM64']
cmake_extra_args += ['-G', args.cmake_generator]
2020-04-19 03:48:30 +00:00
# Cannot test on host build machine for cross-compiled
# builds (Override any user-defined behaviour for test if any)
if args.test:
log.warning(
2020-04-19 03:48:30 +00:00
"Cannot test on host build machine for cross-compiled "
"ARM(64) builds. Will skip test running after build.")
args.test = False
else:
2020-04-19 03:48:30 +00:00
if (args.msvc_toolset == '14.16' and
args.cmake_generator == 'Visual Studio 16 2019'):
# CUDA 10.0 requires _MSC_VER >= 1700 and
# _MSC_VER < 1920, aka Visual Studio version
# in [2012, 2019). In VS2019, we have to use
# Side-by-side minor version MSVC toolsets from
# Visual Studio 2017 14.16 is MSVC version
# 141 is MSVC Toolset Version
# Cuda VS extension should be installed to
# C:\Program Files (x86)\Microsoft Visual
# Studio\2019\Enterprise\MSBuild\Microsoft\VC\v160\BuildCustomizations # noqa
toolset = 'v141,host=x64,version=' + args.msvc_toolset
elif args.msvc_toolset:
toolset = 'host=x64,version=' + args.msvc_toolset
else:
toolset = 'host=x64'
2020-04-19 03:48:30 +00:00
if args.cuda_version:
toolset += ',cuda=' + args.cuda_version
2020-04-19 03:48:30 +00:00
cmake_extra_args = [
'-A', 'x64', '-T', toolset, '-G', args.cmake_generator
]
if args.enable_windows_store:
2020-04-19 03:48:30 +00:00
cmake_extra_args.append(
'-DCMAKE_TOOLCHAIN_FILE=' + os.path.join(
source_dir, 'cmake', 'store_toolchain.cmake'))
if args.enable_wcos:
cmake_extra_args.append('-DCMAKE_USER_MAKE_RULES_OVERRIDE=wcos_rules_override.cmake')
elif args.cmake_generator is not None and not (is_macOS() and args.use_xcode):
cmake_extra_args += ['-G', args.cmake_generator]
elif is_macOS():
if args.use_xcode:
cmake_extra_args += ['-G', 'Xcode']
if not args.ios and not args.android and \
args.osx_arch == 'arm64' and platform.machine() == 'x86_64':
if args.test:
log.warning(
"Cannot test ARM64 build on X86_64. Will skip test running after build.")
args.test = False
if (args.android or args.ios or args.enable_windows_store
or is_cross_compiling_on_apple(args)) and args.path_to_protoc_exe is None:
# Cross-compiling for Android and iOS
2020-04-19 03:48:30 +00:00
path_to_protoc_exe = build_protoc_for_host(
cmake_path, source_dir, build_dir, args)
2018-11-20 00:48:22 +00:00
if is_ubuntu_1604():
if (args.arm or args.arm64):
2020-04-19 03:48:30 +00:00
raise BuildError(
"Only Windows ARM(64) cross-compiled builds supported "
"currently through this script")
2018-11-20 00:48:22 +00:00
install_ubuntu_deps(args)
if not is_docker() and not args.use_acl and not args.use_armnn:
install_python_deps()
2020-04-19 03:48:30 +00:00
if args.enable_pybind and is_windows():
install_python_deps(args.numpy_version)
2019-11-22 20:14:03 +00:00
if args.enable_onnx_tests:
setup_test_data(build_dir, configs)
2020-04-19 03:48:30 +00:00
generate_build_tree(
cmake_path, source_dir, build_dir, cuda_home, cudnn_home, rocm_home, mpi_home, nccl_home,
tensorrt_home, migraphx_home, acl_home, acl_libs, armnn_home, armnn_libs,
path_to_protoc_exe, configs, cmake_extra_defines, args, cmake_extra_args)
2020-04-19 03:48:30 +00:00
if args.clean:
2018-11-20 00:48:22 +00:00
clean_targets(cmake_path, build_dir, configs)
# if using DML, perform initial nuget package restore
setup_dml_build(args, cmake_path, build_dir, configs)
2020-04-19 03:48:30 +00:00
if args.build:
if args.parallel < 0:
raise BuildError("Invalid parallel job count: {}".format(args.parallel))
num_parallel_jobs = os.cpu_count() if args.parallel == 0 else args.parallel
build_targets(args, cmake_path, build_dir, configs, num_parallel_jobs, args.target)
2018-11-20 00:48:22 +00:00
2020-04-19 03:48:30 +00:00
if args.test:
run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs)
# run nuphar python tests last, as it installs ONNX 1.5.0
if args.enable_pybind and not args.skip_onnx_tests and args.use_nuphar:
2019-11-22 20:14:03 +00:00
nuphar_run_python_tests(build_dir, configs)
# run node.js binding tests
if args.build_nodejs and not args.skip_nodejs_tests:
nodejs_binding_dir = os.path.normpath(os.path.join(source_dir, "nodejs"))
run_nodejs_tests(nodejs_binding_dir)
2019-01-15 18:29:00 +00:00
if args.build:
if args.build_wheel:
nightly_build = bool(os.getenv('NIGHTLY_BUILD') == '1')
build_python_wheel(
source_dir,
build_dir,
configs,
args.use_cuda,
args.use_dnnl,
args.use_tensorrt,
args.use_openvino,
args.use_nuphar,
args.use_vitisai,
args.use_acl,
args.use_armnn,
args.use_dml,
args.wheel_name_suffix,
Add new PytTrch front-end (#4815) * Add ORTTrainerOptions class for the new pytorch frontend (#4382) Add ORTTrainerOptions class and some placeholders * Add _ORTTrainerModelDesc to perform validation for model description (#4416) * Add Loss Scaler classes to the new frontend (#4306) * Add TrainStepInfo used on the new frontend API (#4256) * Add Optimizer classes to the new frontend (#4280) * Add LRScheduler implementation (#4357) * Add basic ORTTrainer API (#4435) This PR presents the public API for ORTTrainer for the short term development. It also validates and saves input parameters, which will be used in the next stages, such as building ONNX model, post processing the model and configuring the training session * Add opset_version into ORTTrainerOptions and change type of ORTTrainer.loss_fn (#4592) * Update ModelDescription and minor fix on ORTTrainer ctor (#4605) * Update ModelDescription and minor fix on ORTTrainer/ORTTrainerOptions This PR keeps the public API intact, but changes how model description is stored on the backend Currently, users creates a dict with two lists of tuples. One list called 'inputs' and each tuple has the following format tuple(name, shape). The second list is called 'outputs' and each tuple can be either tuple(name, shape) or tuple(name, shape, is_loss). With this PR, when this dict is passed in to ORTTrainer, it is fully validated as usual. However, tuples are internally replaced by namedtuples and all output tuples will have tuple(name, shape, is_loss) format instead of is_loss being optionally present. Additionally to that normalization in the internal representation (which eases coding), two internal methods were created to replace a namedtuple(name, shape) to namedtuple(name, shape, dtype) or namedtuple(name, shape, is_loss, dtype) dependeing whether the tuple is an input or output. This is necessary as ORTTRainer finds out data types of each input/output during model export to onnx. Finally, a minor fix was done on ORTTrainer. It could initialize ORTTrainerOptions incorrectly when options=None * Rename input name for test * Add ONNX Model Export to New Frontend (#4612) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> * Create training session + minor improvements (#4668) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Save ONNX model in file (#4671) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Add eval step (#4674) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Add train_step (#4677) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Add LR Scheduler (#4694) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> * Add deterministic compute tests (#4716) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> * Add legacy vs experimental ORTTrainer accuracy comparison (#4727) Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> * Add Mixed precision/LossScaler + several fixes (#4739) Additionally to the mixed precision/loss scaler code, this PR includes: * Fix CUDA training * Add optimization_step into TrainStepInfo class * Refactor LRSCheduler to use optimization_step instead of step * Updated several default values at ORTTrainerOptions * Add initial Gradient Accumulation supported. Untested * Fix ONNX model post processing * Refactor unit tests * Add ONNX BERT example + minor fixes (#4757) * Fix training issue when passing ONNX file into ORTTrainer Co-authored-by: Thiago Crepaldi <thiago.crepaldi@microsoft.com> Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> * Add Dynamic Shape support (#4758) * Update DeepSpeed Zero Stage option to a separate option group (#4772) * Add support to fetches (#4777) * Add Gradient Accumulation Steps support (#4793) * Fix Dynamic Axes feature and add unit test (#4795) * Add frozen weights test (#4807) * Move new pytorch front-end to 'experimental' namespace (#4814) * Fix build Co-authored-by: Rayan-Krishnan <rayankrishnan@live.com> Co-authored-by: Rayan Krishnan <t-rakr@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net>
2020-08-17 16:45:25 +00:00
args.enable_training,
nightly_build=nightly_build,
featurizers_build=args.use_featurizers,
use_ninja=(args.cmake_generator == 'Ninja')
)
if args.build_nuget:
build_nuget_package(
source_dir,
build_dir,
configs,
args.use_cuda,
args.use_openvino,
args.use_tensorrt,
args.use_dnnl,
args.use_mklml
)
if args.test and args.build_nuget:
run_csharp_tests(
source_dir,
build_dir,
args.use_cuda,
args.use_openvino,
args.use_tensorrt,
args.use_dnnl)
if args.gen_doc and (args.build or args.test):
generate_documentation(source_dir, build_dir, configs)
2018-11-20 00:48:22 +00:00
log.info("Build complete")
2020-04-19 03:48:30 +00:00
2018-11-20 00:48:22 +00:00
if __name__ == "__main__":
try:
sys.exit(main())
except BaseError as e:
log.error(str(e))
sys.exit(1)