Cherry pick outstanding changes into release branch (round 2) (#7921)

* [OpenVINO-EP] Adding OpenVINO-EP samples to Msft Repo (#7826) * Added ONNX_OV_EP samples ->Added cpp, python and csharp samples using OpenVINO Execution Provider. Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * [js/web] update README.md (#7894) * Add API_IMPL_* blocks around shared provider methods as they are C APIs (#7908) * Missing logic for cuda nuget package (#7911) Co-authored-by: Maajid khan <n.maajidkhan@gmail.com> Co-authored-by: Yulong Wang <yulongw@microsoft.com> Co-authored-by: Ryan Hill <38674843+RyanUnderhill@users.noreply.github.com>
2026-06-24 02:47:54 +00:00 · 2021-06-02 10:24:11 -07:00 · 2021-06-02 10:24:11 -07:00 · d4106deeb6
commit d4106deeb6
parent a54716e5a9
9 changed files with 1894 additions and 5 deletions
--- a/js/web/README.md
+++ b/js/web/README.md
@ -10,9 +10,9 @@ The [Open Neural Network Exchange](http://onnx.ai/) (ONNX) is an open standard f

 ### Why ONNX Runtime Web

-With ONNX Runtime Web, web developers can score pre-trained ONNX models directly on browsers with various benefits of reducing server-client communication and protecting user privacy, as well as offering install-free and cross-platform in-browser ML experience.
+With ONNX Runtime Web, web developers can score models directly on browsers with various benefits including reducing server-client communication and protecting user privacy, as well as offering install-free and cross-platform in-browser ML experience.

-ONNX Runtime Web can run on both CPU and GPU. For running on CPU, [WebAssembly](https://developer.mozilla.org/en-US/docs/WebAssembly) is adopted to execute the model at near-native speed. Furthermore, ONNX Runtime Web utilizes [Web Workers](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers) to provide a "multi-threaded" environment to parallelize data processing. Empirical evaluation shows very promising performance gains on CPU by taking full advantage of WebAssembly and Web Workers. For running on GPUs, a popular standard for accessing GPU capabilities - WebGL is adopted. ONNX Runtime Web has further adopted several novel optimization techniques for reducing data transfer between CPU and GPU, as well as some techniques to reduce GPU processing cycles to further push the performance to the maximum.
+ONNX Runtime Web can run on both CPU and GPU. On CPU side, [WebAssembly](https://developer.mozilla.org/en-US/docs/WebAssembly) is adopted to execute the model at near-native speed. ONNX Runtime Web complies the native ONNX Runtime CPU engine into WebAssembly backend by using Emscripten, so it supports most functionalities native ONNX Runtime offers, including full ONNX operator coverage, multi-threading, [ONNX Runtime Quantization](https://www.onnxruntime.ai/docs/how-to/quantization.html) as well as [ONNX Runtime Mobile](http://www.onnxruntime.ai/docs/how-to/deploy-on-mobile.html). For performance acceleration with GPUs, ONNX Runtime Web leverages WebGL, a popular standard for accessing GPU capabilities. We are keeping improving op coverage and optimizing performance in WebGL backend.

 See [Compatibility](#Compatibility) and [Operators Supported](#Operators) for a list of platforms and operators ONNX Runtime Web currently supports.

--- a/onnxruntime/core/framework/provider_bridge_ort.cc
+++ b/onnxruntime/core/framework/provider_bridge_ort.cc
@ -7,6 +7,7 @@
 #include "core/framework/compute_capability.h"
 #include "core/framework/data_types.h"
 #include "core/framework/data_transfer_manager.h"
+#include "core/framework/error_code_helper.h"
 #include "core/framework/execution_provider.h"
 #include "core/framework/kernel_registry.h"
 #include "core/framework/provider_bridge_ort.h"
@ -1084,6 +1085,7 @@ INcclService& INcclService::GetInstance() {
 }  // namespace onnxruntime

 ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Dnnl, _In_ OrtSessionOptions* options, int use_arena) {
+  API_IMPL_BEGIN
  auto factory = onnxruntime::CreateExecutionProviderFactory_Dnnl(use_arena);
  if (!factory) {
    return OrtApis::CreateStatus(ORT_FAIL, "OrtSessionOptionsAppendExecutionProvider_Dnnl: Failed to load shared library");
@ -1091,9 +1093,11 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Dnnl, _In_ OrtSessi

  options->provider_factories.push_back(factory);
  return nullptr;
+  API_IMPL_END
 }

 ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id) {
+  API_IMPL_BEGIN
  auto factory = onnxruntime::CreateExecutionProviderFactory_Tensorrt(device_id);
  if (!factory) {
    return OrtApis::CreateStatus(ORT_FAIL, "OrtSessionOptionsAppendExecutionProvider_Tensorrt: Failed to load shared library");
@ -1101,9 +1105,11 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtS

  options->provider_factories.push_back(factory);
  return nullptr;
+  API_IMPL_END
 }

 ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, _In_ const OrtTensorRTProviderOptions* tensorrt_options) {
+  API_IMPL_BEGIN
  auto factory = onnxruntime::CreateExecutionProviderFactory_Tensorrt(tensorrt_options);
  if (!factory) {
    return OrtApis::CreateStatus(ORT_FAIL, "SessionOptionsAppendExecutionProvider_Tensorrt: Failed to load shared library");
@ -1111,9 +1117,11 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In

  options->provider_factories.push_back(factory);
  return nullptr;
+  API_IMPL_END
 }

 ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO, _In_ OrtSessionOptions* options, _In_ const OrtOpenVINOProviderOptions* provider_options) {
+  API_IMPL_BEGIN
  auto factory = onnxruntime::CreateExecutionProviderFactory_OpenVINO(provider_options);
  if (!factory) {
    return OrtApis::CreateStatus(ORT_FAIL, "SessionOptionsAppendExecutionProvider_OpenVINO: Failed to load shared library");
@ -1121,10 +1129,11 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO, _In

  options->provider_factories.push_back(factory);
  return nullptr;
+  API_IMPL_END
 }

 ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_OpenVINO, _In_ OrtSessionOptions* options, _In_ const char* device_type) {
-  OrtOpenVINOProviderOptions provider_options;
+  OrtOpenVINOProviderOptions provider_options{};
  provider_options.device_type = device_type;
  return OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO(options, &provider_options);
 }
@ -1137,18 +1146,23 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_CUDA, _In_ OrtSessi
 }

 ORT_API_STATUS_IMPL(OrtApis::SetCurrentGpuDeviceId, _In_ int device_id) {
+  API_IMPL_BEGIN
  if (auto* info = onnxruntime::GetProviderInfo_CUDA())
    return info->SetCurrentGpuDeviceId(device_id);
  return CreateStatus(ORT_FAIL, "CUDA execution provider is not enabled.");
+  API_IMPL_END
 }

 ORT_API_STATUS_IMPL(OrtApis::GetCurrentGpuDeviceId, _In_ int* device_id) {
+  API_IMPL_BEGIN
  if (auto* info = onnxruntime::GetProviderInfo_CUDA())
    return info->GetCurrentGpuDeviceId(device_id);
  return CreateStatus(ORT_FAIL, "CUDA execution provider is not enabled.");
+  API_IMPL_END
 }

 ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_CUDA, _In_ OrtSessionOptions* options, _In_ const OrtCUDAProviderOptions* cuda_options) {
+  API_IMPL_BEGIN
  auto factory = onnxruntime::CreateExecutionProviderFactory_Cuda(cuda_options);
  if (!factory) {
    return OrtApis::CreateStatus(ORT_FAIL, "OrtSessionOptionsAppendExecutionProvider_Cuda: Failed to load shared library");
@ -1156,4 +1170,5 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_CUDA, _In_ Or

  options->provider_factories.push_back(factory);
  return nullptr;
+  API_IMPL_END
 }
--- a/samples/c_cxx/OpenVINO_EP/squeezenet_classification/squeezenet_cpp_app.cpp
+++ b/samples/c_cxx/OpenVINO_EP/squeezenet_classification/squeezenet_cpp_app.cpp
@ -0,0 +1,384 @@
+/*
+Copyright (C) 2021, Intel Corporation
+SPDX-License-Identifier: Apache-2.0
+
+Portions of this software are copyright of their respective authors and released under the MIT license:
+- ONNX-Runtime-Inference, Copyright 2020 Lei Mao. For licensing see https://github.com/leimao/ONNX-Runtime-Inference/blob/main/LICENSE.md
+*/
+
+#include <onnxruntime_cxx_api.h>
+#include <opencv2/dnn/dnn.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include <chrono>
+#include <cmath>
+#include <exception>
+#include <fstream>
+#include <iostream>
+#include <limits>
+#include <numeric>
+#include <string>
+#include <vector>
+#include <stdexcept> // To use runtime_error
+
+template <typename T>
+T vectorProduct(const std::vector<T>& v)
+{
+    return accumulate(v.begin(), v.end(), 1, std::multiplies<T>());
+}
+
+/**
+ * @brief Operator overloading for printing vectors
+ * @tparam T
+ * @param os
+ * @param v
+ * @return std::ostream&
+ */
+
+template <typename T>
+std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
+{
+    os << "[";
+    for (int i = 0; i < v.size(); ++i)
+    {
+        os << v[i];
+        if (i != v.size() - 1)
+        {
+            os << ", ";
+        }
+    }
+    os << "]";
+    return os;
+}
+
+// Function to validate the input image file extension.
+bool imageFileExtension(std::string str)
+{
+  // is empty throw error
+  if (str.empty())
+    throw std::runtime_error("[ ERROR ] The image File path is empty");
+
+  size_t pos = str.rfind('.');
+  if (pos == std::string::npos)
+    return false;
+
+  std::string ext = str.substr(pos+1);
+
+  if (ext == "jpg" || ext == "jpeg" || ext == "gif" || ext == "png" || ext == "jfif" || 
+        ext == "JPG" || ext == "JPEG" || ext == "GIF" || ext == "PNG" || ext == "JFIF") {
+            return true;
+  }
+
+  return false;
+}
+
+// Function to read the labels from the labelFilepath.
+std::vector<std::string> readLabels(std::string& labelFilepath)
+{
+    std::vector<std::string> labels;
+    std::string line;
+    std::ifstream fp(labelFilepath);
+    while (std::getline(fp, line))
+    {
+        labels.push_back(line);
+    }
+    return labels;
+}
+
+// Function to validate the input model file extension.
+bool checkModelExtension(const std::string& filename)
+{
+    if(filename.empty())
+    {
+        throw std::runtime_error("[ ERROR ] The Model file path is empty");
+    }
+    size_t pos = filename.rfind('.');
+    if (pos == std::string::npos)
+        return false;
+    std::string ext = filename.substr(pos+1);
+    if (ext == "onnx")
+        return true;
+    return false;
+}
+
+// Function to validate the Label file extension.
+bool checkLabelFileExtension(const std::string& filename)
+{
+    size_t pos = filename.rfind('.');
+    if (filename.empty())
+    {
+        throw std::runtime_error("[ ERROR ] The Label file path is empty");
+    }
+    if (pos == std::string::npos)
+        return false;
+    std::string ext = filename.substr(pos+1);
+    if (ext == "txt") {
+        return true;
+    } else {
+        return false;
+    }
+}
+
+//Handling divide by zero
+float division(float num, float den){
+   if (den == 0) {
+      throw std::runtime_error("[ ERROR ] Math error: Attempted to divide by Zero\n");
+   }
+   return (num / den);
+}
+
+void printHelp() {
+    std::cout << "To run the model, use the following command:\n";
+    std::cout << "Example: ./run_squeezenet --use_openvino <path_to_the_model> <path_to_the_image> <path_to_the_classes_file>" << std::endl;
+    std::cout << "\n To Run using OpenVINO EP.\nExample: ./run_squeezenet --use_openvino squeezenet1.1-7.onnx demo.jpeg synset.txt \n" << std::endl;
+    std::cout << "\n To Run on Default CPU.\n Example: ./run_squeezenet --use_cpu squeezenet1.1-7.onnx demo.jpeg synset.txt \n" << std::endl;
+}
+
+int main(int argc, char* argv[])
+{
+    bool useOPENVINO{true};
+    const char* useOPENVINOFlag = "--use_openvino";
+    const char* useCPUFlag = "--use_cpu";
+
+    if(argc == 2) {
+        std::string option = argv[1];
+        if (option == "--help" || option == "-help" || option == "--h" || option == "-h") {
+            printHelp();
+        }
+        return 0;
+    } else if(argc != 5) {
+        std::cout << "[ ERROR ] you have used the wrong command to run your program." << std::endl;
+        printHelp();
+        return 0;
+    } else if (strcmp(argv[1], useOPENVINOFlag) == 0) {
+        useOPENVINO = true;
+    } else if (strcmp(argv[1], useCPUFlag) == 0) {
+        useOPENVINO = false;
+    }
+
+    if (useOPENVINO)
+    {
+        std::cout << "Inference Execution Provider: OPENVINO" << std::endl;
+    }
+    else
+    {
+        std::cout << "Inference Execution Provider: CPU" << std::endl;
+    }
+
+    std::string instanceName{"image-classification-inference"};
+
+    std::string modelFilepath = argv[2]; // .onnx file
+
+    //validate ModelFilePath
+    checkModelExtension(modelFilepath);
+    if(!checkModelExtension(modelFilepath)) {
+        throw std::runtime_error("[ ERROR ] The ModelFilepath is not correct. Make sure you are setting the path to an onnx model file (.onnx)");
+    }
+    std::string imageFilepath = argv[3];
+
+    // Validate ImageFilePath
+    imageFileExtension(imageFilepath);
+    if(!imageFileExtension(imageFilepath)) {
+        throw std::runtime_error("[ ERROR ] The imageFilepath doesn't have correct image extension. Choose from jpeg, jpg, gif, png, PNG, jfif");
+    }
+    std::ifstream f(imageFilepath.c_str());
+    if(!f.good()) {
+        throw std::runtime_error("[ ERROR ] The imageFilepath is not set correctly or doesn't exist");
+    }
+
+    // Validate LabelFilePath
+    std::string labelFilepath = argv[4];
+    if(!checkLabelFileExtension(labelFilepath)) {
+        throw std::runtime_error("[ ERROR ] The LabelFilepath is not set correctly and the labels file should end with extension .txt");
+    }
+
+    std::vector<std::string> labels{readLabels(labelFilepath)};
+
+    Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,
+                 instanceName.c_str());
+    Ort::SessionOptions sessionOptions;
+    sessionOptions.SetIntraOpNumThreads(1);
+
+    //Appending OpenVINO Execution Provider API
+    if (useOPENVINO) {
+        // Using OPENVINO backend
+        OrtOpenVINOProviderOptions options;
+        options.device_type = "CPU_FP32"; //Other options are: GPU_FP32, GPU_FP16, MYRIAD_FP16
+        std::cout << "OpenVINO device type is set to: " << options.device_type << std::endl;
+        sessionOptions.AppendExecutionProvider_OpenVINO(options);
+    }
+    
+    // Sets graph optimization level
+    // Available levels are
+    // ORT_DISABLE_ALL -> To disable all optimizations
+    // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node
+    // removals) ORT_ENABLE_EXTENDED -> To enable extended optimizations
+    // (Includes level 1 + more complex optimizations like node fusions)
+    // ORT_ENABLE_ALL -> To Enable All possible optimizations
+    sessionOptions.SetGraphOptimizationLevel(
+        GraphOptimizationLevel::ORT_DISABLE_ALL);
+
+    //Creation: The Ort::Session is created here
+    Ort::Session session(env, modelFilepath.c_str(), sessionOptions);
+
+    Ort::AllocatorWithDefaultOptions allocator;
+
+    size_t numInputNodes = session.GetInputCount();
+    size_t numOutputNodes = session.GetOutputCount();
+
+    std::cout << "Number of Input Nodes: " << numInputNodes << std::endl;
+    std::cout << "Number of Output Nodes: " << numOutputNodes << std::endl;
+
+    const char* inputName = session.GetInputName(0, allocator);
+    std::cout << "Input Name: " << inputName << std::endl;
+
+    Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
+    auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();
+
+    ONNXTensorElementDataType inputType = inputTensorInfo.GetElementType();
+    std::cout << "Input Type: " << inputType << std::endl;
+
+    std::vector<int64_t> inputDims = inputTensorInfo.GetShape();
+    std::cout << "Input Dimensions: " << inputDims << std::endl;
+
+    const char* outputName = session.GetOutputName(0, allocator);
+    std::cout << "Output Name: " << outputName << std::endl;
+
+    Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(0);
+    auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();
+
+    ONNXTensorElementDataType outputType = outputTensorInfo.GetElementType();
+    std::cout << "Output Type: " << outputType << std::endl;
+
+    std::vector<int64_t> outputDims = outputTensorInfo.GetShape();
+    std::cout << "Output Dimensions: " << outputDims << std::endl;
+    //pre-processing the Image
+    // step 1: Read an image in HWC BGR UINT8 format.
+    cv::Mat imageBGR = cv::imread(imageFilepath, cv::ImreadModes::IMREAD_COLOR);
+
+    // step 2: Resize the image.
+    cv::Mat resizedImageBGR, resizedImageRGB, resizedImage, preprocessedImage;
+    cv::resize(imageBGR, resizedImageBGR,
+               cv::Size(inputDims.at(2), inputDims.at(3)),
+               cv::InterpolationFlags::INTER_CUBIC);
+
+    // step 3: Convert the image to HWC RGB UINT8 format.
+    cv::cvtColor(resizedImageBGR, resizedImageRGB,
+                 cv::ColorConversionCodes::COLOR_BGR2RGB);
+    // step 4: Convert the image to HWC RGB float format by dividing each pixel by 255.
+    resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);
+
+    // step 5: Split the RGB channels from the image.   
+    cv::Mat channels[3];
+    cv::split(resizedImage, channels);
+
+    //step 6: Normalize each channel.
+    // Normalization per channel
+    // Normalization parameters obtained from
+    // https://github.com/onnx/models/tree/master/vision/classification/squeezenet
+    channels[0] = (channels[0] - 0.485) / 0.229;
+    channels[1] = (channels[1] - 0.456) / 0.224;
+    channels[2] = (channels[2] - 0.406) / 0.225;
+
+    //step 7: Merge the RGB channels back to the image.
+    cv::merge(channels, 3, resizedImage);
+
+    // step 8: Convert the image to CHW RGB float format.
+    // HWC to CHW
+    cv::dnn::blobFromImage(resizedImage, preprocessedImage);
+
+
+    //Run Inference
+
+    /* To run inference using ONNX Runtime, the user is responsible for creating and managing the 
+    input and output buffers. These buffers could be created and managed via std::vector.
+    The linear-format input data should be copied to the buffer for ONNX Runtime inference. */
+
+    size_t inputTensorSize = vectorProduct(inputDims);
+    std::vector<float> inputTensorValues(inputTensorSize);
+    inputTensorValues.assign(preprocessedImage.begin<float>(),
+                             preprocessedImage.end<float>());
+
+    size_t outputTensorSize = vectorProduct(outputDims);
+    assert(("Output tensor size should equal to the label set size.",
+            labels.size() == outputTensorSize));
+    std::vector<float> outputTensorValues(outputTensorSize);
+
+
+    /* Once the buffers were created, they would be used for creating instances of Ort::Value 
+    which is the tensor format for ONNX Runtime. There could be multiple inputs for a neural network, 
+    so we have to prepare an array of Ort::Value instances for inputs and outputs respectively even if 
+    we only have one input and one output. */
+
+    std::vector<const char*> inputNames{inputName};
+    std::vector<const char*> outputNames{outputName};
+    std::vector<Ort::Value> inputTensors;
+    std::vector<Ort::Value> outputTensors;
+
+    /*
+    Creating ONNX Runtime inference sessions, querying input and output names, 
+    dimensions, and types are trivial.
+    Setup inputs & outputs: The input & output tensors are created here. */
+
+    Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
+        OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
+    inputTensors.push_back(Ort::Value::CreateTensor<float>(
+        memoryInfo, inputTensorValues.data(), inputTensorSize, inputDims.data(),
+        inputDims.size()));
+    outputTensors.push_back(Ort::Value::CreateTensor<float>(
+        memoryInfo, outputTensorValues.data(), outputTensorSize,
+        outputDims.data(), outputDims.size()));
+
+    /* To run inference, we provide the run options, an array of input names corresponding to the 
+    inputs in the input tensor, an array of input tensor, number of inputs, an array of output names 
+    corresponding to the the outputs in the output tensor, an array of output tensor, number of outputs. */
+
+    session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
+                inputTensors.data(), 1, outputNames.data(),
+                outputTensors.data(), 1);
+
+    int predId = 0;
+    float activation = 0;
+    float maxActivation = std::numeric_limits<float>::lowest();
+    float expSum = 0;
+    /* The inference result could be found in the buffer for the output tensors, 
+    which are usually the buffer from std::vector instances. */
+    for (int i = 0; i < labels.size(); i++) {
+        activation = outputTensorValues.at(i);
+        expSum += std::exp(activation);
+        if (activation > maxActivation)
+        {
+            predId = i;
+            maxActivation = activation;
+        }
+    }
+    std::cout << "Predicted Label ID: " << predId << std::endl;
+    std::cout << "Predicted Label: " << labels.at(predId) << std::endl;
+    float result;
+    try {
+      result = division(std::exp(maxActivation), expSum);
+      std::cout << "Uncalibrated Confidence: " << result << std::endl;
+    }
+    catch (std::runtime_error& e) {
+      std::cout << "Exception occurred" << std::endl << e.what();
+    }
+
+    // Measure latency
+    int numTests{100};
+    std::chrono::steady_clock::time_point begin =
+        std::chrono::steady_clock::now();
+
+    //Run: Running the session is done in the Run() method:
+    for (int i = 0; i < numTests; i++) {
+        session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
+                    inputTensors.data(), 1, outputNames.data(),
+                    outputTensors.data(), 1);
+    }
+    std::chrono::steady_clock::time_point end =
+        std::chrono::steady_clock::now();
+    std::cout << "Minimum Inference Latency: "
+              << std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() / static_cast<float>(numTests)
+              << " ms" << std::endl;
+    return 0;
+}
--- a/samples/c_cxx/OpenVINO_EP/squeezenet_classification/synset.txt
+++ b/samples/c_cxx/OpenVINO_EP/squeezenet_classification/synset.txt
--- a/samples/c_sharp/OpenVINO_EP/yolov3_object_detection/Label.cs
+++ b/samples/c_sharp/OpenVINO_EP/yolov3_object_detection/Label.cs
@ -0,0 +1,91 @@
+/*
+Copyright (C) 2021, Intel Corporation
+SPDX-License-Identifier: Apache-2.0
+*/
+
+namespace yolov3
+{
+    public class LabelMap
+    {
+        public static readonly string[] Labels = new[] {"person",
+                                                        "bicycle",
+                                                        "car",
+                                                        "motorcycle",
+                                                        "airplane",
+                                                        "bus",
+                                                        "train",
+                                                        "truck",
+                                                        "boat",
+                                                        "traffic light",
+                                                        "fire hydrant",
+                                                        "stop sign",
+                                                        "parking meter",
+                                                        "bench",
+                                                        "bird",
+                                                        "cat",
+                                                        "dog",
+                                                        "horse",
+                                                        "sheep",
+                                                        "cow",
+                                                        "elephant",
+                                                        "bear",
+                                                        "zebra",
+                                                        "giraffe",
+                                                        "backpack",
+                                                        "umbrella",
+                                                        "handbag",
+                                                        "tie",
+                                                        "suitcase",
+                                                        "frisbee",
+                                                        "skis",
+                                                        "snowboard",
+                                                        "sports ball",
+                                                        "kite",
+                                                        "baseball bat",
+                                                        "baseball glove",
+                                                        "skateboard",
+                                                        "surfboard",
+                                                        "tennis racket",
+                                                        "bottle",
+                                                        "wine glass",
+                                                        "cup",
+                                                        "fork",
+                                                        "knife",
+                                                        "spoon",
+                                                        "bowl",
+                                                        "banana",
+                                                        "apple",
+                                                        "sandwich",
+                                                        "orange",
+                                                        "broccoli",
+                                                        "carrot",
+                                                        "hot dog",
+                                                        "pizza",
+                                                        "donut",
+                                                        "cake",
+                                                        "chair",
+                                                        "couch",
+                                                        "potted plant",
+                                                        "bed",
+                                                        "dining table",
+                                                        "toilet",
+                                                        "tv",
+                                                        "laptop",
+                                                        "mouse",
+                                                        "remote",
+                                                        "keyboard",
+                                                        "cell phone",
+                                                        "microwave",
+                                                        "oven",
+                                                        "toaster",
+                                                        "sink",
+                                                        "refrigerator",
+                                                        "book",
+                                                        "clock",
+                                                        "vase",
+                                                        "scissors",
+                                                        "teddy bear",
+                                                        "hair drier",
+                                                        "toothbrush"};
+    }
+}
--- a/samples/c_sharp/OpenVINO_EP/yolov3_object_detection/Prediction.cs
+++ b/samples/c_sharp/OpenVINO_EP/yolov3_object_detection/Prediction.cs
@ -0,0 +1,31 @@
+/*
+Copyright (C) 2021, Intel Corporation
+SPDX-License-Identifier: Apache-2.0
+*/
+
+namespace yolov3
+{
+    public class Prediction
+    {
+        public Box Box { get; set; }
+        public string Class { get; set; }
+        public float Score { get; set; }
+    }
+
+    public class Box
+    {
+        public float Xmin { get; set; }
+        public float Ymin { get; set; }
+        public float Xmax { get; set; }
+        public float Ymax { get; set; }
+
+        public Box(float xmin, float ymin, float xmax, float ymax)
+        {
+            Xmin = xmin;
+            Ymin = ymin;
+            Xmax = xmax;
+            Ymax = ymax;
+
+        }
+    }
+}
--- a/samples/c_sharp/OpenVINO_EP/yolov3_object_detection/Program.cs
+++ b/samples/c_sharp/OpenVINO_EP/yolov3_object_detection/Program.cs
@ -0,0 +1,173 @@
+/*
+Copyright (C) 2021, Intel Corporation
+SPDX-License-Identifier: Apache-2.0
+*/
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using Microsoft.ML.OnnxRuntime.Tensors;
+using Microsoft.ML.OnnxRuntime;
+using SixLabors.ImageSharp;
+using SixLabors.ImageSharp.PixelFormats;
+using SixLabors.ImageSharp.Processing;
+using SixLabors.ImageSharp.Formats;
+using SixLabors.ImageSharp.Drawing.Processing;
+using SixLabors.Fonts;
+
+namespace yolov3
+{
+    class Program
+    {
+        static void Main(string[] args)
+        {
+            // string is null or empty 
+            if (args == null || args.Length < 3)
+            {
+                Console.WriteLine("Usage information: dotnet run model.onnx input.jpg output.jpg");
+                return;
+            } else
+            {
+                if(!(File.Exists(args[0])))
+                {
+                    Console.WriteLine("Model Path does not exist");
+                    return;
+                }
+                if (!(File.Exists(args[1])))
+                {
+                    Console.WriteLine("Input Path does not exist");
+                    return;
+                }
+            }
+
+            // Read paths
+            string modelFilePath = args[0];
+            string imageFilePath = args[1];
+            string outImageFilePath = args[2];
+
+            using Image imageOrg = Image.Load(imageFilePath, out IImageFormat format);
+
+            //Letterbox image
+            var iw = imageOrg.Width;
+            var ih = imageOrg.Height;
+            var w = 416;
+            var h = 416;
+
+            if ((iw == 0) || (ih == 0))
+            {
+                Console.WriteLine("Math error: Attempted to divide by Zero");
+                return;
+            }
+
+            float width = (float)w / iw;
+            float height = (float)h / ih;
+
+            float scale = Math.Min(width, height);
+
+            var nw = (int)(iw * scale);
+            var nh = (int)(ih * scale);
+
+            var pad_dims_w = (w - nw) / 2;
+            var pad_dims_h = (h - nh) / 2;
+
+            // Resize image using default bicubic sampler 
+            var image = imageOrg.Clone(x => x.Resize((nw), (nh)));
+
+            var clone = new Image<Rgb24>(w, h);
+            clone.Mutate(i => i.Fill(Color.Gray));
+            clone.Mutate(o => o.DrawImage(image, new Point(pad_dims_w, pad_dims_h), 1f)); // draw the first one top left
+
+            //Preprocessing image
+            Tensor<float> input = new DenseTensor<float>(new[] { 1, 3, h, w });
+            for (int y = 0; y < clone.Height; y++)
+            {
+                Span<Rgb24> pixelSpan = clone.GetPixelRowSpan(y);
+                for (int x = 0; x < clone.Width; x++)
+                {
+                    input[0, 0, y, x] = pixelSpan[x].B / 255f;
+                    input[0, 1, y, x] = pixelSpan[x].G / 255f;
+                    input[0, 2, y, x] = pixelSpan[x].R / 255f;
+                }
+            }
+
+            //Get the Image Shape
+            var image_shape = new DenseTensor<float>(new[] { 1, 2 });
+            image_shape[0, 0] = ih;
+            image_shape[0, 1] = iw;
+
+            // Setup inputs and outputs
+            var container = new List<NamedOnnxValue>();
+            container.Add(NamedOnnxValue.CreateFromTensor("input_1", input));
+            container.Add(NamedOnnxValue.CreateFromTensor("image_shape", image_shape));
+
+            // Session Options
+            SessionOptions options = new SessionOptions();
+            options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
+            options.AppendExecutionProvider_OpenVINO(@"MYRIAD_FP16");
+            options.AppendExecutionProvider_CPU(1);
+
+            // Run inference
+            using var session = new InferenceSession(modelFilePath,options);
+            
+            using IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results = session.Run(container);
+
+            Console.WriteLine("Inference done");
+
+            //Post Processing Steps
+            var resultsArray = results.ToArray();
+            Tensor<float> boxes = resultsArray[0].AsTensor<float>();
+            Tensor<float> scores = resultsArray[1].AsTensor<float>();
+            int[] indices = resultsArray[2].AsTensor<int>().ToArray();
+
+            var len = indices.Length / 3;
+            var out_classes = new int[len];
+            float[] out_scores = new float[len];
+            
+            var predictions = new List<Prediction>();
+            var count = 0;
+            for (int i = 0; i < indices.Length; i = i + 3)
+            {
+                out_classes[count] = indices[i + 1];
+                out_scores[count] = scores[indices[i], indices[i + 1], indices[i + 2]];
+                predictions.Add(new Prediction
+                {
+                       Box = new Box(boxes[indices[i], indices[i + 2], 1],
+                                     boxes[indices[i], indices[i + 2], 0],
+                                     boxes[indices[i], indices[i + 2], 3],
+                                     boxes[indices[i], indices[i + 2], 2]),
+                        Class = LabelMap.Labels[out_classes[count]],
+                        Score = out_scores[count]
+                });
+                count++;
+            }
+
+            // Put boxes, labels and confidence on image and save for viewing
+            using var outputImage = File.OpenWrite(outImageFilePath);
+            Font font = SystemFonts.CreateFont("Arial", 16);
+            foreach (var p in predictions)
+            {
+                imageOrg.Mutate(x =>
+                {
+                    x.DrawLines(Color.Red, 2f, new PointF[] {
+
+                        new PointF(p.Box.Xmin, p.Box.Ymin),
+                        new PointF(p.Box.Xmax, p.Box.Ymin),
+
+                        new PointF(p.Box.Xmax, p.Box.Ymin),
+                        new PointF(p.Box.Xmax, p.Box.Ymax),
+
+                        new PointF(p.Box.Xmax, p.Box.Ymax),
+                        new PointF(p.Box.Xmin, p.Box.Ymax),
+
+                        new PointF(p.Box.Xmin, p.Box.Ymax),
+                        new PointF(p.Box.Xmin, p.Box.Ymin)
+                    });
+                    x.DrawText($"{p.Class}, {p.Score:0.00}", font, Color.White, new PointF(p.Box.Xmin, p.Box.Ymin));
+                });
+            }
+            imageOrg.Save(outputImage, format);
+
+        }
+    }
+}
--- a/samples/python/OpenVINO_EP/tiny_yolo_v2_object_detection/tiny_yolov2_obj_detection_sample.py
+++ b/samples/python/OpenVINO_EP/tiny_yolo_v2_object_detection/tiny_yolov2_obj_detection_sample.py
@ -0,0 +1,195 @@
+'''
+Copyright (C) 2021, Intel Corporation
+SPDX-License-Identifier: Apache-2.0
+'''
+
+import numpy as np
+import onnxruntime as rt
+import cv2
+import time
+import os
+
+def sigmoid(x, derivative=False):
+  return x*(1-x) if derivative else 1/(1+np.exp(-x))
+
+def softmax(x):
+  scoreMatExp = np.exp(np.asarray(x))
+  return scoreMatExp / scoreMatExp.sum(0)
+
+def checkModelExtension(fp):
+  # Split the extension from the path and normalise it to lowercase.
+  ext = os.path.splitext(fp)[-1].lower()
+
+  # Now we can simply use != to check for inequality, no need for wildcards.
+  if(ext != ".onnx"):
+    raise Exception(fp, "is an unknown file format. Use the model ending with .onnx format")
+  
+  if not os.path.exists(fp):
+    raise Exception("[ ERROR ] Path of the onnx model file is Invalid")
+
+def checkVideoFileExtension(fp):
+  # Split the extension from the path and normalise it to lowercase.
+  ext = os.path.splitext(fp)[-1].lower()
+  # Now we can simply use != to check for inequality, no need for wildcards.
+  
+  if(ext == ".mp4" or ext == ".avi" or ext == ".mov"):
+    pass
+  else:
+    raise Exception(fp, "is an unknown file format. Use the video file ending with .mp4 or .avi or .mov formats")
+  
+  if not os.path.exists(fp):
+    raise Exception("[ ERROR ] Path of the video file is Invalid")
+
+# color look up table for different classes for object detection sample
+clut = [(0,0,0),(255,0,0),(255,0,255),(0,0,255),(0,255,0),(0,255,128),
+        (128,255,0),(128,128,0),(0,128,255),(128,0,128),
+        (255,0,128),(128,0,255),(255,128,128),(128,255,128),(255,255,0),
+        (255,128,128),(128,128,255),(255,128,128),(128,255,128),(128,255,128)]
+
+# 20 labels that the tiny-yolov2 model can do the object_detection on
+label = ["aeroplane","bicycle","bird","boat","bottle",
+         "bus","car","cat","chair","cow","diningtable",
+         "dog","horse","motorbike","person","pottedplant",
+          "sheep","sofa","train","tvmonitor"]
+
+model_file_path = "tiny_yolo_v2_zoo_model.onnx"
+# TODO: You need to modify the path to the input onnx model based on where it is located on your device after downloading it from ONNX Model zoo.
+
+# Validate model file path
+checkModelExtension(model_file_path)
+
+# Load the model
+sess = rt.InferenceSession(model_file_path)
+
+# Get the input name of the model
+input_name = sess.get_inputs()[0].name
+
+device = 'CPU_FP32'
+# Set OpenVINO as the Execution provider to infer this model
+sess.set_providers(['OpenVINOExecutionProvider'], [{'device_type' : device}])
+'''
+other 'device_type' options are: (Any hardware target can be assigned if you have the access to it)
+
+'CPU_FP32', 'GPU_FP32', 'GPU_FP16', 'MYRIAD_FP16', 'VAD-M_FP16', 'VAD-F_FP32',
+'HETERO:MYRIAD,CPU',  'MULTI:MYRIAD,GPU,CPU'
+
+'''
+
+#Path to video file has to be provided
+video_file_path = "sample_demo_video.mp4"
+# TODO: You need to specify the path to your own sample video based on where it is located on your device.
+
+#validate video file input path
+checkVideoFileExtension(video_file_path)
+
+#Path to video file has to be provided
+cap = cv2.VideoCapture(video_file_path)
+
+# capturing different metrics of the image from the video
+fps = cap.get(cv2.CAP_PROP_FPS)
+width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+x_scale = float(width)/416.0  #In the document of tino-yolo-v2, input shape of this network is (1,3,416,416).
+y_scale = float(height)/416.0
+
+# writing the inferencing output as a video to the local disk
+fourcc = cv2.VideoWriter_fourcc(*'XVID')
+output_video_name = device + "_output.avi"
+output_video = cv2.VideoWriter(output_video_name,fourcc, float(17.0), (640,360))
+
+# capturing one frame at a time from the video feed and performing the inference
+i = 0
+while cap.isOpened():
+        l_start = time.time()
+        ret, frame = cap.read()
+        if not ret:
+            break
+        initial_w = cap.get(3)
+        initial_h = cap.get(4)
+        
+        # preprocessing the input frame and reshaping it.
+        #In the document of tino-yolo-v2, input shape of this network is (1,3,416,416). so we resize the model frame w.r.t that size.
+        in_frame = cv2.resize(frame, (416, 416))
+        X = np.asarray(in_frame)
+        X = X.astype(np.float32)
+        X = X.transpose(2,0,1)
+        # Reshaping the input array to align with the input shape of the model
+        X = X.reshape(1,3,416,416)
+        
+        start = time.time()
+        #Running the session by passing in the input data of the model
+        out = sess.run(None, {input_name: X})
+        end = time.time()
+        inference_time = end - start
+        out = out[0][0]
+
+        numClasses = 20
+        anchors = [1.08, 1.19, 3.42, 4.41, 6.63, 11.38, 9.42, 5.11, 16.62, 10.52]
+
+        existingLabels = {l: [] for l in label}
+
+        #Inside this loop we compute the bounding box b for grid cell (cy, cx)
+        for cy in range(0,13):
+         for cx in range(0,13):
+          for b in range(0,5):
+            # First we read the tx, ty, width(tw), and height(th) for the bounding box from the out array, as well as the confidence score
+            channel = b*(numClasses+5)
+            tx = out[channel  ][cy][cx]
+            ty = out[channel+1][cy][cx]
+            tw = out[channel+2][cy][cx]
+            th = out[channel+3][cy][cx]
+            tc = out[channel+4][cy][cx]
+
+            x = (float(cx) + sigmoid(tx))*32
+            y = (float(cy) + sigmoid(ty))*32
+
+            w = np.exp(tw) * 32 * anchors[2*b  ]
+            h = np.exp(th) * 32 * anchors[2*b+1] 
+
+            #calculating the confidence score
+            confidence = sigmoid(tc) # The confidence value for the bounding box is given by tc
+
+            classes = np.zeros(numClasses)
+            for c in range(0,numClasses):
+               classes[c] = out[channel + 5 +c][cy][cx]
+            # we take the softmax to turn the array into a probability distribution. And then we pick the class with the largest score as the winner.
+            classes = softmax(classes)
+            detectedClass = classes.argmax()
+            
+            # Now we can compute the final score for this bounding box and we only want to keep the ones whose combined score is over a certain threshold
+            if 0.45< classes[detectedClass]*confidence:
+               color =clut[detectedClass]
+               x = (x - w/2)*x_scale
+               y = (y - h/2)*y_scale
+               w *= x_scale
+               h *= y_scale
+               
+               labelX = int((x+x+w)/2)
+               labelY = int((y+y+h)/2)
+               addLabel = True
+               labThreshold = 40
+               for point in existingLabels[label[detectedClass]]:
+                  if labelX < point[0] + labThreshold and labelX > point[0] - labThreshold and \
+                     labelY < point[1] + labThreshold and labelY > point[1] - labThreshold:
+                     addLabel = False
+               #Adding class labels to the output of the frame and also drawing a rectangular bounding box around the object detected.
+               if addLabel:
+                  cv2.rectangle(frame, (int(x),int(y)),(int(x+w),int(y+h)),color,2)
+                  cv2.rectangle(frame, (int(x),int(y-13)),(int(x)+9*len(label[detectedClass]),int(y)),color,-1)
+                  cv2.putText(frame,label[detectedClass],(int(x)+2,int(y)-3),cv2.FONT_HERSHEY_COMPLEX,0.4,(255,255,255),1)
+                  existingLabels[label[detectedClass]].append((labelX,labelY))
+               print('{} detected in frame {}'.format(label[detectedClass],i))
+        output_video.write(frame)
+        cv2.putText(frame,device,(10,20),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
+        cv2.putText(frame,'FPS: {}'.format(1.0/inference_time),(10,40),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
+        cv2.imshow('frame',frame)
+
+        #Press 'q' to quit the process
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+          break
+        print('Processed Frame {}'.format(i))
+        i += 1
+        l_end = time.time()
+        print('Loop Time = {}'.format(l_end - l_start))
+output_video.release()
+cv2.destroyAllWindows()
--- a/tools/nuget/generate_nuspec_for_native_nuget.py
+++ b/tools/nuget/generate_nuspec_for_native_nuget.py
@ -25,7 +25,7 @@ def parse_arguments():
    parser.add_argument("--is_release_build", required=False, default=None, type=str,
                        help="Flag indicating if the build is a release build. Accepted values: true/false.")
    parser.add_argument("--execution_provider", required=False, default='None', type=str,
-                        choices=['dnnl', 'openvino', 'tensorrt', 'None'],
+                        choices=['cuda', 'dnnl', 'openvino', 'tensorrt', 'None'],
                        help="The selected execution provider for this build.")

    return parser.parse_args()
@ -359,7 +359,7 @@ def generate_files(list, args):
                          nuget_dependencies['openvino_ep_shared_lib']) +
                          runtimes_target + args.target_architecture + '\\native" />')

-    if args.execution_provider == "cuda":
+    if args.execution_provider == "cuda" or is_cuda_gpu_package:
        files_list.append('<file src=' + '"' + os.path.join(args.native_build_path,
                          nuget_dependencies['providers_shared_lib']) +
                          runtimes_target + args.target_architecture + '\\native" />')