diff --git a/BUILD.md b/BUILD.md index ee7c76c828..7da408cf6c 100644 --- a/BUILD.md +++ b/BUILD.md @@ -20,7 +20,7 @@ python3 setup.py bdist_wheel pip3 install --upgrade dist/*.whl ``` -5. Run `build.bat --config RelWithDebInfo --build_shared_lib --parallel`. +5. Run `build.bat --config RelWithDebInfo --build_shared_lib --parallel`. *Note: The default Windows CMake Generator is Visual Studio 2017, but you can also use the newer Visual Studio 2019 by passing `--cmake_generator "Visual Studio 16 2019"` to build.bat.* @@ -132,7 +132,7 @@ Then run it ## Build ONNX Runtime Server on Linux Read more about ONNX Runtime Server [here](https://github.com/microsoft/onnxruntime/blob/master/docs/ONNX_Runtime_Server_Usage.md) -1. ONNX Runtime server (and only the server) requires you to have Go installed to build, due to building BoringSSL. +1. ONNX Runtime server (and only the server) requires you to have Go installed to build, due to building BoringSSL. See https://golang.org/doc/install for installation instructions. 2. In the ONNX Runtime root folder, run `./build.sh --config RelWithDebInfo --build_server --use_openmp --parallel` 3. ONNX Runtime Server supports sending log to [rsyslog](https://www.rsyslog.com/) daemon. To enable it, please build with an additional parameter: `--cmake_extra_defines onnxruntime_USE_SYSLOG=1`. The build command will look like this: `./build.sh --config RelWithDebInfo --build_server --use_openmp --parallel --cmake_extra_defines onnxruntime_USE_SYSLOG=1` @@ -146,7 +146,7 @@ For Linux, please use [this Dockerfile](https://github.com/microsoft/onnxruntime ONNX Runtime supports CUDA builds. You will need to download and install [CUDA](https://developer.nvidia.com/cuda-toolkit) and [cuDNN](https://developer.nvidia.com/cudnn). -ONNX Runtime is built and tested with CUDA 10.0 and cuDNN 7.3 using the Visual Studio 2017 14.11 toolset (i.e. Visual Studio 2017 v15.3). +ONNX Runtime is built and tested with CUDA 10.0 and cuDNN 7.3 using the Visual Studio 2017 14.11 toolset (i.e. Visual Studio 2017 v15.3). CUDA versions from 9.1 up to 10.1, and cuDNN versions from 7.1 up to 7.4 should also work with Visual Studio 2017. - The path to the CUDA installation must be provided via the CUDA_PATH environment variable, or the `--cuda_home parameter`. @@ -246,6 +246,8 @@ The OpenVINO Execution Provider can be built using the following commands: - To configure Intel® Vision Accelerator Design based on 8 MovidiusTM MyriadX VPUs, please follow the configuration guide from (https://docs.openvinotoolkit.org/2019_R1.1/_docs_install_guides_installing_openvino_linux.html#install-VPU (Linux)) (https://docs.openvinotoolkit.org/2019_R1.1/_docs_install_guides_installing_openvino_windows.html#hddl-myriad (Windows)) +- To configure Intel® Vision Accelerator Design with an Intel® Arria® 10 FPGA, please follow the configuration guide from (https://docs.openvinotoolkit.org/2019_R1.1/_docs_install_guides_VisionAcceleratorFPGA_Configure_2019R1.html) + - Build ONNX Runtime using the below command. @@ -270,9 +272,10 @@ The OpenVINO Execution Provider can be built using the following commands: | GPU_FP16 | Intel® Integrated Graphics with FP16 quantization of models | | MYRIAD_FP16 | Intel® MovidiusTM USB sticks |  | VAD-M_FP16 | Intel® Vision Accelerator Design based on 8 MovidiusTM MyriadX VPUs | +| VAD-F_FP32 | Intel® Vision Accelerator Design with an Intel® Arria® 10 FPGA | For more information on OpenVINO Execution Provider's ONNX Layer support, Topology support, and Intel hardware enabled, please refer to the document OpenVINO-ExecutionProvider.md in $onnxruntime_root/docs/execution_providers - + --- ### Android @@ -350,7 +353,7 @@ Once you have the OpenBLAS binaries, build ONNX Runtime with `./build.bat --use_ For Linux (e.g. Ubuntu 16.04), install libopenblas-dev package `sudo apt-get install libopenblas-dev` and build with `./build.sh --use_openblas` ---- +--- ## Architectures ### x86 diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index e95c794182..0064976240 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -102,7 +102,7 @@ if(onnxruntime_USE_OPENMP) endif() else() set(onnxruntime_USE_OPENMP OFF) - endif() + endif() endif() if(onnxruntime_ENABLE_LTO) #TODO: figure out why nsync doesn't work @@ -473,7 +473,7 @@ if (onnxruntime_USE_JEMALLOC) endif() include_directories( - ${ONNXRUNTIME_INCLUDE_DIR} + ${ONNXRUNTIME_INCLUDE_DIR} ${REPO_ROOT}/include/onnxruntime/core/session ) @@ -549,6 +549,10 @@ if(onnxruntime_USE_OPENVINO) if(onnxruntime_USE_OPENVINO_VAD_M) add_definitions(-DOPENVINO_CONFIG_VAD_M=1) + endif() + + if(onnxruntime_USE_OPENVINO_VAD_F) + add_definitions(-DOPENVINO_CONFIG_VAD_F=1) endif() endif() diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index a20af22f27..c08d1ea2bd 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -463,6 +463,10 @@ std::vector> OpenVINOExecutionProvider::GetCa device_id = "HDDL"; #endif +#ifdef OPENVINO_CONFIG_VAD_F + device_id = "FPGA"; +#endif + int counter = 0; std::unique_ptr sub_graph = std::make_unique(); diff --git a/onnxruntime/core/providers/openvino/openvino_graph.cc b/onnxruntime/core/providers/openvino/openvino_graph.cc index bddece2b0e..0688523935 100644 --- a/onnxruntime/core/providers/openvino/openvino_graph.cc +++ b/onnxruntime/core/providers/openvino/openvino_graph.cc @@ -63,6 +63,13 @@ OpenVINOGraph::OpenVINOGraph(const onnxruntime::Node* fused_node) { precision_str = "FP16"; #endif +#ifdef OPENVINO_CONFIG_VAD_F + device_id_ = "HETERO:FPGA,CPU"; + precision_ = InferenceEngine::Precision::FP32; + precision_str = "FP32"; +#endif + + // Infer Request class represents OpenVINO's logical hardware instance. These logical // instances are bound to physical hardware instances at runtime depending // on the physical hardware availability. If multiple Infer Requests are mapped to @@ -100,15 +107,27 @@ OpenVINOGraph::OpenVINOGraph(const onnxruntime::Node* fused_node) { input_indexes_.push_back(index); } + class FPGA_ErrorListener : public InferenceEngine::IErrorListener{ + + void onError(const char *msg) noexcept override { + LOGS_DEFAULT(INFO) << log_tag << msg; + } + }; + + FPGA_ErrorListener err_listener; + // Create hardware agnostic OpenVINO network representation openvino_network_ = BuildOpenVINONetworkWithMO(); // Create hardware specific OpenVINO network representation GetExecutableHandle(openvino_network_); - - plugin_ = InferenceEngine::PluginDispatcher().getPluginByDevice(device_id_); + + plugin_ = InferenceEngine::PluginDispatcher().getPluginByDevice(device_id_); + + static_cast(plugin_)->SetLogCallback(err_listener); + //Loading model to the plugin InferenceEngine::ExecutableNetwork exeNetwork = plugin_.LoadNetwork(*openvino_network_, {}); @@ -123,19 +142,6 @@ OpenVINOGraph::OpenVINOGraph(const onnxruntime::Node* fused_node) { LOGS_DEFAULT(INFO) << log_tag << "Infer requests created: " << num_inf_reqs_; } -std::vector OpenVINOGraph::GetEnvLdLibraryPath() const { - std::string plugin_path = std::getenv("LD_LIBRARY_PATH"); - std::vector paths; - std::string token; - std::istringstream tokenStream(plugin_path); - char delimiter = ':'; - - while (std::getline(tokenStream, token, delimiter)) { - paths.push_back(token); - } - return paths; -} - void OpenVINOGraph::ConvertONNXModelToOpenVINOIR(const std::string& onnx_model, std::string& openvino_xml, std::string& openvino_bin, bool precision_fp32) { Py_Initialize(); diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index a1e5f492f5..839e45619d 100755 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -132,7 +132,7 @@ Use the individual flags to only run the specified stages. parser.add_argument("--use_automl", action='store_true', help="Build with AutoML support.") parser.add_argument("--use_ngraph", action='store_true', help="Build with nGraph.") parser.add_argument("--use_openvino", nargs="?", const="CPU_FP32", - choices=["CPU_FP32","GPU_FP32","GPU_FP16","VAD-M_FP16","MYRIAD_FP16"], help="Build with OpenVINO for specific hardware.") + choices=["CPU_FP32","GPU_FP32","GPU_FP16","VAD-M_FP16","MYRIAD_FP16","VAD-F_FP32"], help="Build with OpenVINO for specific hardware.") parser.add_argument("--use_dnnlibrary", action='store_true', help="Build with DNNLibrary.") parser.add_argument("--use_nsync", action='store_true', help="Build with NSYNC.") parser.add_argument("--use_preinstalled_eigen", action='store_true', help="Use pre-installed eigen.") @@ -156,7 +156,7 @@ Use the individual flags to only run the specified stages. parser.add_argument("--skip_onnx_tests", action='store_true', help="Explicitly disable all onnx related tests") parser.add_argument("--enable_msvc_static_runtime", action='store_true', help="Enable static linking of MSVC runtimes.") parser.add_argument("--enable_language_interop_ops", action='store_true', help="Enable operator implemented in language other than cpp") - parser.add_argument("--cmake_generator", choices=['Visual Studio 15 2017', 'Visual Studio 16 2019'], + parser.add_argument("--cmake_generator", choices=['Visual Studio 15 2017', 'Visual Studio 16 2019'], default='Visual Studio 15 2017', help="Specify the generator that CMake invokes. This is only supported on Windows") return parser.parse_args() @@ -329,7 +329,7 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home "-Donnxruntime_USE_CUDA=" + ("ON" if args.use_cuda else "OFF"), "-Donnxruntime_USE_NSYNC=" + ("OFF" if is_windows() or not args.use_nsync else "ON"), "-Donnxruntime_CUDNN_HOME=" + (cudnn_home if args.use_cuda else ""), - "-Donnxruntime_USE_AUTOML=" + ("ON" if args.use_automl else "OFF"), + "-Donnxruntime_USE_AUTOML=" + ("ON" if args.use_automl else "OFF"), "-Donnxruntime_CUDA_HOME=" + (cuda_home if args.use_cuda else ""), "-Donnxruntime_USE_JEMALLOC=" + ("ON" if args.use_jemalloc else "OFF"), "-Donnxruntime_USE_MIMALLOC=" + ("ON" if args.use_mimalloc else "OFF"), @@ -350,6 +350,7 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home "-Donnxruntime_USE_OPENVINO_GPU_FP16=" + ("ON" if args.use_openvino == "GPU_FP16" else "OFF"), "-Donnxruntime_USE_OPENVINO_CPU_FP32=" + ("ON" if args.use_openvino == "CPU_FP32" else "OFF"), "-Donnxruntime_USE_OPENVINO_VAD_M=" + ("ON" if args.use_openvino == "VAD-M_FP16" else "OFF"), + "-Donnxruntime_USE_OPENVINO_VAD_F=" + ("ON" if args.use_openvino == "VAD-F_FP32" else "OFF"), "-Donnxruntime_USE_NNAPI=" + ("ON" if args.use_dnnlibrary else "OFF"), "-Donnxruntime_USE_OPENMP=" + ("ON" if args.use_openmp and not args.use_dnnlibrary and not args.use_mklml and not args.use_ngraph else "OFF"), "-Donnxruntime_USE_TVM=" + ("ON" if args.use_tvm else "OFF"), @@ -514,7 +515,7 @@ def setup_cuda_vars(args): "Current version is {}. CUDA 9.2 requires version 14.11.*".format(vc_ver_str), "If necessary manually install the 14.11 toolset using the Visual Studio 2017 updater.", "See 'Windows CUDA Build' in build.md in the root directory of this repository.") - + # TODO: check if cuda_version >=10.1, when cuda is enabled and VS version >=2019 return cuda_home, cudnn_home @@ -836,7 +837,7 @@ def generate_documentation(source_dir, build_dir, configs): except subprocess.CalledProcessError: print('git diff returned non-zero error code') if len(docdiff) > 0: - # Show warning instead of throwing exception, because it is dependent on build configuration for including execution propviders + # Show warning instead of throwing exception, because it is dependent on build configuration for including execution propviders log.warning('The updated opkernel document file '+str(opkernel_doc_path)+' is different from the checked in version. Consider regenrating the file with CPU, MKLDNN and CUDA providers enabled.') log.debug('diff:\n'+str(docdiff)) @@ -875,6 +876,10 @@ def main(): if args.build_csharp: args.build_shared_lib = True + # Disabling unit tests for VAD-F as FPGA only supports models with NCHW layout + if args.use_openvino == "VAD-F_FP32": + args.test = False + configs = set(args.config) # setup paths and directories