Cherry pick outstanding changes into release branch (round 2) (#7921)

* [OpenVINO-EP] Adding OpenVINO-EP samples to Msft Repo (#7826)

* Added ONNX_OV_EP samples

->Added cpp, python and csharp samples
using OpenVINO Execution Provider.

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* [js/web] update README.md (#7894)

* Add API_IMPL_* blocks around shared provider methods as they are C APIs (#7908)

* Missing logic for cuda nuget package (#7911)

Co-authored-by: Maajid khan <n.maajidkhan@gmail.com>
Co-authored-by: Yulong Wang <yulongw@microsoft.com>
Co-authored-by: Ryan Hill <38674843+RyanUnderhill@users.noreply.github.com>
This commit is contained in:
Xueyun Zhu 2021-06-02 10:24:11 -07:00 committed by GitHub
parent a54716e5a9
commit d4106deeb6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 1894 additions and 5 deletions

View file

@ -10,9 +10,9 @@ The [Open Neural Network Exchange](http://onnx.ai/) (ONNX) is an open standard f
### Why ONNX Runtime Web
With ONNX Runtime Web, web developers can score pre-trained ONNX models directly on browsers with various benefits of reducing server-client communication and protecting user privacy, as well as offering install-free and cross-platform in-browser ML experience.
With ONNX Runtime Web, web developers can score models directly on browsers with various benefits including reducing server-client communication and protecting user privacy, as well as offering install-free and cross-platform in-browser ML experience.
ONNX Runtime Web can run on both CPU and GPU. For running on CPU, [WebAssembly](https://developer.mozilla.org/en-US/docs/WebAssembly) is adopted to execute the model at near-native speed. Furthermore, ONNX Runtime Web utilizes [Web Workers](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers) to provide a "multi-threaded" environment to parallelize data processing. Empirical evaluation shows very promising performance gains on CPU by taking full advantage of WebAssembly and Web Workers. For running on GPUs, a popular standard for accessing GPU capabilities - WebGL is adopted. ONNX Runtime Web has further adopted several novel optimization techniques for reducing data transfer between CPU and GPU, as well as some techniques to reduce GPU processing cycles to further push the performance to the maximum.
ONNX Runtime Web can run on both CPU and GPU. On CPU side, [WebAssembly](https://developer.mozilla.org/en-US/docs/WebAssembly) is adopted to execute the model at near-native speed. ONNX Runtime Web complies the native ONNX Runtime CPU engine into WebAssembly backend by using Emscripten, so it supports most functionalities native ONNX Runtime offers, including full ONNX operator coverage, multi-threading, [ONNX Runtime Quantization](https://www.onnxruntime.ai/docs/how-to/quantization.html) as well as [ONNX Runtime Mobile](http://www.onnxruntime.ai/docs/how-to/deploy-on-mobile.html). For performance acceleration with GPUs, ONNX Runtime Web leverages WebGL, a popular standard for accessing GPU capabilities. We are keeping improving op coverage and optimizing performance in WebGL backend.
See [Compatibility](#Compatibility) and [Operators Supported](#Operators) for a list of platforms and operators ONNX Runtime Web currently supports.

View file

@ -7,6 +7,7 @@
#include "core/framework/compute_capability.h"
#include "core/framework/data_types.h"
#include "core/framework/data_transfer_manager.h"
#include "core/framework/error_code_helper.h"
#include "core/framework/execution_provider.h"
#include "core/framework/kernel_registry.h"
#include "core/framework/provider_bridge_ort.h"
@ -1084,6 +1085,7 @@ INcclService& INcclService::GetInstance() {
} // namespace onnxruntime
ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Dnnl, _In_ OrtSessionOptions* options, int use_arena) {
API_IMPL_BEGIN
auto factory = onnxruntime::CreateExecutionProviderFactory_Dnnl(use_arena);
if (!factory) {
return OrtApis::CreateStatus(ORT_FAIL, "OrtSessionOptionsAppendExecutionProvider_Dnnl: Failed to load shared library");
@ -1091,9 +1093,11 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Dnnl, _In_ OrtSessi
options->provider_factories.push_back(factory);
return nullptr;
API_IMPL_END
}
ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id) {
API_IMPL_BEGIN
auto factory = onnxruntime::CreateExecutionProviderFactory_Tensorrt(device_id);
if (!factory) {
return OrtApis::CreateStatus(ORT_FAIL, "OrtSessionOptionsAppendExecutionProvider_Tensorrt: Failed to load shared library");
@ -1101,9 +1105,11 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtS
options->provider_factories.push_back(factory);
return nullptr;
API_IMPL_END
}
ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, _In_ const OrtTensorRTProviderOptions* tensorrt_options) {
API_IMPL_BEGIN
auto factory = onnxruntime::CreateExecutionProviderFactory_Tensorrt(tensorrt_options);
if (!factory) {
return OrtApis::CreateStatus(ORT_FAIL, "SessionOptionsAppendExecutionProvider_Tensorrt: Failed to load shared library");
@ -1111,9 +1117,11 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In
options->provider_factories.push_back(factory);
return nullptr;
API_IMPL_END
}
ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO, _In_ OrtSessionOptions* options, _In_ const OrtOpenVINOProviderOptions* provider_options) {
API_IMPL_BEGIN
auto factory = onnxruntime::CreateExecutionProviderFactory_OpenVINO(provider_options);
if (!factory) {
return OrtApis::CreateStatus(ORT_FAIL, "SessionOptionsAppendExecutionProvider_OpenVINO: Failed to load shared library");
@ -1121,10 +1129,11 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO, _In
options->provider_factories.push_back(factory);
return nullptr;
API_IMPL_END
}
ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_OpenVINO, _In_ OrtSessionOptions* options, _In_ const char* device_type) {
OrtOpenVINOProviderOptions provider_options;
OrtOpenVINOProviderOptions provider_options{};
provider_options.device_type = device_type;
return OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO(options, &provider_options);
}
@ -1137,18 +1146,23 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_CUDA, _In_ OrtSessi
}
ORT_API_STATUS_IMPL(OrtApis::SetCurrentGpuDeviceId, _In_ int device_id) {
API_IMPL_BEGIN
if (auto* info = onnxruntime::GetProviderInfo_CUDA())
return info->SetCurrentGpuDeviceId(device_id);
return CreateStatus(ORT_FAIL, "CUDA execution provider is not enabled.");
API_IMPL_END
}
ORT_API_STATUS_IMPL(OrtApis::GetCurrentGpuDeviceId, _In_ int* device_id) {
API_IMPL_BEGIN
if (auto* info = onnxruntime::GetProviderInfo_CUDA())
return info->GetCurrentGpuDeviceId(device_id);
return CreateStatus(ORT_FAIL, "CUDA execution provider is not enabled.");
API_IMPL_END
}
ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_CUDA, _In_ OrtSessionOptions* options, _In_ const OrtCUDAProviderOptions* cuda_options) {
API_IMPL_BEGIN
auto factory = onnxruntime::CreateExecutionProviderFactory_Cuda(cuda_options);
if (!factory) {
return OrtApis::CreateStatus(ORT_FAIL, "OrtSessionOptionsAppendExecutionProvider_Cuda: Failed to load shared library");
@ -1156,4 +1170,5 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_CUDA, _In_ Or
options->provider_factories.push_back(factory);
return nullptr;
API_IMPL_END
}

View file

@ -0,0 +1,384 @@
/*
Copyright (C) 2021, Intel Corporation
SPDX-License-Identifier: Apache-2.0
Portions of this software are copyright of their respective authors and released under the MIT license:
- ONNX-Runtime-Inference, Copyright 2020 Lei Mao. For licensing see https://github.com/leimao/ONNX-Runtime-Inference/blob/main/LICENSE.md
*/
#include <onnxruntime_cxx_api.h>
#include <opencv2/dnn/dnn.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <chrono>
#include <cmath>
#include <exception>
#include <fstream>
#include <iostream>
#include <limits>
#include <numeric>
#include <string>
#include <vector>
#include <stdexcept> // To use runtime_error
template <typename T>
T vectorProduct(const std::vector<T>& v)
{
return accumulate(v.begin(), v.end(), 1, std::multiplies<T>());
}
/**
* @brief Operator overloading for printing vectors
* @tparam T
* @param os
* @param v
* @return std::ostream&
*/
template <typename T>
std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
{
os << "[";
for (int i = 0; i < v.size(); ++i)
{
os << v[i];
if (i != v.size() - 1)
{
os << ", ";
}
}
os << "]";
return os;
}
// Function to validate the input image file extension.
bool imageFileExtension(std::string str)
{
// is empty throw error
if (str.empty())
throw std::runtime_error("[ ERROR ] The image File path is empty");
size_t pos = str.rfind('.');
if (pos == std::string::npos)
return false;
std::string ext = str.substr(pos+1);
if (ext == "jpg" || ext == "jpeg" || ext == "gif" || ext == "png" || ext == "jfif" ||
ext == "JPG" || ext == "JPEG" || ext == "GIF" || ext == "PNG" || ext == "JFIF") {
return true;
}
return false;
}
// Function to read the labels from the labelFilepath.
std::vector<std::string> readLabels(std::string& labelFilepath)
{
std::vector<std::string> labels;
std::string line;
std::ifstream fp(labelFilepath);
while (std::getline(fp, line))
{
labels.push_back(line);
}
return labels;
}
// Function to validate the input model file extension.
bool checkModelExtension(const std::string& filename)
{
if(filename.empty())
{
throw std::runtime_error("[ ERROR ] The Model file path is empty");
}
size_t pos = filename.rfind('.');
if (pos == std::string::npos)
return false;
std::string ext = filename.substr(pos+1);
if (ext == "onnx")
return true;
return false;
}
// Function to validate the Label file extension.
bool checkLabelFileExtension(const std::string& filename)
{
size_t pos = filename.rfind('.');
if (filename.empty())
{
throw std::runtime_error("[ ERROR ] The Label file path is empty");
}
if (pos == std::string::npos)
return false;
std::string ext = filename.substr(pos+1);
if (ext == "txt") {
return true;
} else {
return false;
}
}
//Handling divide by zero
float division(float num, float den){
if (den == 0) {
throw std::runtime_error("[ ERROR ] Math error: Attempted to divide by Zero\n");
}
return (num / den);
}
void printHelp() {
std::cout << "To run the model, use the following command:\n";
std::cout << "Example: ./run_squeezenet --use_openvino <path_to_the_model> <path_to_the_image> <path_to_the_classes_file>" << std::endl;
std::cout << "\n To Run using OpenVINO EP.\nExample: ./run_squeezenet --use_openvino squeezenet1.1-7.onnx demo.jpeg synset.txt \n" << std::endl;
std::cout << "\n To Run on Default CPU.\n Example: ./run_squeezenet --use_cpu squeezenet1.1-7.onnx demo.jpeg synset.txt \n" << std::endl;
}
int main(int argc, char* argv[])
{
bool useOPENVINO{true};
const char* useOPENVINOFlag = "--use_openvino";
const char* useCPUFlag = "--use_cpu";
if(argc == 2) {
std::string option = argv[1];
if (option == "--help" || option == "-help" || option == "--h" || option == "-h") {
printHelp();
}
return 0;
} else if(argc != 5) {
std::cout << "[ ERROR ] you have used the wrong command to run your program." << std::endl;
printHelp();
return 0;
} else if (strcmp(argv[1], useOPENVINOFlag) == 0) {
useOPENVINO = true;
} else if (strcmp(argv[1], useCPUFlag) == 0) {
useOPENVINO = false;
}
if (useOPENVINO)
{
std::cout << "Inference Execution Provider: OPENVINO" << std::endl;
}
else
{
std::cout << "Inference Execution Provider: CPU" << std::endl;
}
std::string instanceName{"image-classification-inference"};
std::string modelFilepath = argv[2]; // .onnx file
//validate ModelFilePath
checkModelExtension(modelFilepath);
if(!checkModelExtension(modelFilepath)) {
throw std::runtime_error("[ ERROR ] The ModelFilepath is not correct. Make sure you are setting the path to an onnx model file (.onnx)");
}
std::string imageFilepath = argv[3];
// Validate ImageFilePath
imageFileExtension(imageFilepath);
if(!imageFileExtension(imageFilepath)) {
throw std::runtime_error("[ ERROR ] The imageFilepath doesn't have correct image extension. Choose from jpeg, jpg, gif, png, PNG, jfif");
}
std::ifstream f(imageFilepath.c_str());
if(!f.good()) {
throw std::runtime_error("[ ERROR ] The imageFilepath is not set correctly or doesn't exist");
}
// Validate LabelFilePath
std::string labelFilepath = argv[4];
if(!checkLabelFileExtension(labelFilepath)) {
throw std::runtime_error("[ ERROR ] The LabelFilepath is not set correctly and the labels file should end with extension .txt");
}
std::vector<std::string> labels{readLabels(labelFilepath)};
Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,
instanceName.c_str());
Ort::SessionOptions sessionOptions;
sessionOptions.SetIntraOpNumThreads(1);
//Appending OpenVINO Execution Provider API
if (useOPENVINO) {
// Using OPENVINO backend
OrtOpenVINOProviderOptions options;
options.device_type = "CPU_FP32"; //Other options are: GPU_FP32, GPU_FP16, MYRIAD_FP16
std::cout << "OpenVINO device type is set to: " << options.device_type << std::endl;
sessionOptions.AppendExecutionProvider_OpenVINO(options);
}
// Sets graph optimization level
// Available levels are
// ORT_DISABLE_ALL -> To disable all optimizations
// ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node
// removals) ORT_ENABLE_EXTENDED -> To enable extended optimizations
// (Includes level 1 + more complex optimizations like node fusions)
// ORT_ENABLE_ALL -> To Enable All possible optimizations
sessionOptions.SetGraphOptimizationLevel(
GraphOptimizationLevel::ORT_DISABLE_ALL);
//Creation: The Ort::Session is created here
Ort::Session session(env, modelFilepath.c_str(), sessionOptions);
Ort::AllocatorWithDefaultOptions allocator;
size_t numInputNodes = session.GetInputCount();
size_t numOutputNodes = session.GetOutputCount();
std::cout << "Number of Input Nodes: " << numInputNodes << std::endl;
std::cout << "Number of Output Nodes: " << numOutputNodes << std::endl;
const char* inputName = session.GetInputName(0, allocator);
std::cout << "Input Name: " << inputName << std::endl;
Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();
ONNXTensorElementDataType inputType = inputTensorInfo.GetElementType();
std::cout << "Input Type: " << inputType << std::endl;
std::vector<int64_t> inputDims = inputTensorInfo.GetShape();
std::cout << "Input Dimensions: " << inputDims << std::endl;
const char* outputName = session.GetOutputName(0, allocator);
std::cout << "Output Name: " << outputName << std::endl;
Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(0);
auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();
ONNXTensorElementDataType outputType = outputTensorInfo.GetElementType();
std::cout << "Output Type: " << outputType << std::endl;
std::vector<int64_t> outputDims = outputTensorInfo.GetShape();
std::cout << "Output Dimensions: " << outputDims << std::endl;
//pre-processing the Image
// step 1: Read an image in HWC BGR UINT8 format.
cv::Mat imageBGR = cv::imread(imageFilepath, cv::ImreadModes::IMREAD_COLOR);
// step 2: Resize the image.
cv::Mat resizedImageBGR, resizedImageRGB, resizedImage, preprocessedImage;
cv::resize(imageBGR, resizedImageBGR,
cv::Size(inputDims.at(2), inputDims.at(3)),
cv::InterpolationFlags::INTER_CUBIC);
// step 3: Convert the image to HWC RGB UINT8 format.
cv::cvtColor(resizedImageBGR, resizedImageRGB,
cv::ColorConversionCodes::COLOR_BGR2RGB);
// step 4: Convert the image to HWC RGB float format by dividing each pixel by 255.
resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);
// step 5: Split the RGB channels from the image.
cv::Mat channels[3];
cv::split(resizedImage, channels);
//step 6: Normalize each channel.
// Normalization per channel
// Normalization parameters obtained from
// https://github.com/onnx/models/tree/master/vision/classification/squeezenet
channels[0] = (channels[0] - 0.485) / 0.229;
channels[1] = (channels[1] - 0.456) / 0.224;
channels[2] = (channels[2] - 0.406) / 0.225;
//step 7: Merge the RGB channels back to the image.
cv::merge(channels, 3, resizedImage);
// step 8: Convert the image to CHW RGB float format.
// HWC to CHW
cv::dnn::blobFromImage(resizedImage, preprocessedImage);
//Run Inference
/* To run inference using ONNX Runtime, the user is responsible for creating and managing the
input and output buffers. These buffers could be created and managed via std::vector.
The linear-format input data should be copied to the buffer for ONNX Runtime inference. */
size_t inputTensorSize = vectorProduct(inputDims);
std::vector<float> inputTensorValues(inputTensorSize);
inputTensorValues.assign(preprocessedImage.begin<float>(),
preprocessedImage.end<float>());
size_t outputTensorSize = vectorProduct(outputDims);
assert(("Output tensor size should equal to the label set size.",
labels.size() == outputTensorSize));
std::vector<float> outputTensorValues(outputTensorSize);
/* Once the buffers were created, they would be used for creating instances of Ort::Value
which is the tensor format for ONNX Runtime. There could be multiple inputs for a neural network,
so we have to prepare an array of Ort::Value instances for inputs and outputs respectively even if
we only have one input and one output. */
std::vector<const char*> inputNames{inputName};
std::vector<const char*> outputNames{outputName};
std::vector<Ort::Value> inputTensors;
std::vector<Ort::Value> outputTensors;
/*
Creating ONNX Runtime inference sessions, querying input and output names,
dimensions, and types are trivial.
Setup inputs & outputs: The input & output tensors are created here. */
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
inputTensors.push_back(Ort::Value::CreateTensor<float>(
memoryInfo, inputTensorValues.data(), inputTensorSize, inputDims.data(),
inputDims.size()));
outputTensors.push_back(Ort::Value::CreateTensor<float>(
memoryInfo, outputTensorValues.data(), outputTensorSize,
outputDims.data(), outputDims.size()));
/* To run inference, we provide the run options, an array of input names corresponding to the
inputs in the input tensor, an array of input tensor, number of inputs, an array of output names
corresponding to the the outputs in the output tensor, an array of output tensor, number of outputs. */
session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
inputTensors.data(), 1, outputNames.data(),
outputTensors.data(), 1);
int predId = 0;
float activation = 0;
float maxActivation = std::numeric_limits<float>::lowest();
float expSum = 0;
/* The inference result could be found in the buffer for the output tensors,
which are usually the buffer from std::vector instances. */
for (int i = 0; i < labels.size(); i++) {
activation = outputTensorValues.at(i);
expSum += std::exp(activation);
if (activation > maxActivation)
{
predId = i;
maxActivation = activation;
}
}
std::cout << "Predicted Label ID: " << predId << std::endl;
std::cout << "Predicted Label: " << labels.at(predId) << std::endl;
float result;
try {
result = division(std::exp(maxActivation), expSum);
std::cout << "Uncalibrated Confidence: " << result << std::endl;
}
catch (std::runtime_error& e) {
std::cout << "Exception occurred" << std::endl << e.what();
}
// Measure latency
int numTests{100};
std::chrono::steady_clock::time_point begin =
std::chrono::steady_clock::now();
//Run: Running the session is done in the Run() method:
for (int i = 0; i < numTests; i++) {
session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
inputTensors.data(), 1, outputNames.data(),
outputTensors.data(), 1);
}
std::chrono::steady_clock::time_point end =
std::chrono::steady_clock::now();
std::cout << "Minimum Inference Latency: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() / static_cast<float>(numTests)
<< " ms" << std::endl;
return 0;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,91 @@
/*
Copyright (C) 2021, Intel Corporation
SPDX-License-Identifier: Apache-2.0
*/
namespace yolov3
{
public class LabelMap
{
public static readonly string[] Labels = new[] {"person",
"bicycle",
"car",
"motorcycle",
"airplane",
"bus",
"train",
"truck",
"boat",
"traffic light",
"fire hydrant",
"stop sign",
"parking meter",
"bench",
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"backpack",
"umbrella",
"handbag",
"tie",
"suitcase",
"frisbee",
"skis",
"snowboard",
"sports ball",
"kite",
"baseball bat",
"baseball glove",
"skateboard",
"surfboard",
"tennis racket",
"bottle",
"wine glass",
"cup",
"fork",
"knife",
"spoon",
"bowl",
"banana",
"apple",
"sandwich",
"orange",
"broccoli",
"carrot",
"hot dog",
"pizza",
"donut",
"cake",
"chair",
"couch",
"potted plant",
"bed",
"dining table",
"toilet",
"tv",
"laptop",
"mouse",
"remote",
"keyboard",
"cell phone",
"microwave",
"oven",
"toaster",
"sink",
"refrigerator",
"book",
"clock",
"vase",
"scissors",
"teddy bear",
"hair drier",
"toothbrush"};
}
}

View file

@ -0,0 +1,31 @@
/*
Copyright (C) 2021, Intel Corporation
SPDX-License-Identifier: Apache-2.0
*/
namespace yolov3
{
public class Prediction
{
public Box Box { get; set; }
public string Class { get; set; }
public float Score { get; set; }
}
public class Box
{
public float Xmin { get; set; }
public float Ymin { get; set; }
public float Xmax { get; set; }
public float Ymax { get; set; }
public Box(float xmin, float ymin, float xmax, float ymax)
{
Xmin = xmin;
Ymin = ymin;
Xmax = xmax;
Ymax = ymax;
}
}
}

View file

@ -0,0 +1,173 @@
/*
Copyright (C) 2021, Intel Corporation
SPDX-License-Identifier: Apache-2.0
*/
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML.OnnxRuntime.Tensors;
using Microsoft.ML.OnnxRuntime;
using SixLabors.ImageSharp;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Processing;
using SixLabors.ImageSharp.Formats;
using SixLabors.ImageSharp.Drawing.Processing;
using SixLabors.Fonts;
namespace yolov3
{
class Program
{
static void Main(string[] args)
{
// string is null or empty
if (args == null || args.Length < 3)
{
Console.WriteLine("Usage information: dotnet run model.onnx input.jpg output.jpg");
return;
} else
{
if(!(File.Exists(args[0])))
{
Console.WriteLine("Model Path does not exist");
return;
}
if (!(File.Exists(args[1])))
{
Console.WriteLine("Input Path does not exist");
return;
}
}
// Read paths
string modelFilePath = args[0];
string imageFilePath = args[1];
string outImageFilePath = args[2];
using Image imageOrg = Image.Load(imageFilePath, out IImageFormat format);
//Letterbox image
var iw = imageOrg.Width;
var ih = imageOrg.Height;
var w = 416;
var h = 416;
if ((iw == 0) || (ih == 0))
{
Console.WriteLine("Math error: Attempted to divide by Zero");
return;
}
float width = (float)w / iw;
float height = (float)h / ih;
float scale = Math.Min(width, height);
var nw = (int)(iw * scale);
var nh = (int)(ih * scale);
var pad_dims_w = (w - nw) / 2;
var pad_dims_h = (h - nh) / 2;
// Resize image using default bicubic sampler
var image = imageOrg.Clone(x => x.Resize((nw), (nh)));
var clone = new Image<Rgb24>(w, h);
clone.Mutate(i => i.Fill(Color.Gray));
clone.Mutate(o => o.DrawImage(image, new Point(pad_dims_w, pad_dims_h), 1f)); // draw the first one top left
//Preprocessing image
Tensor<float> input = new DenseTensor<float>(new[] { 1, 3, h, w });
for (int y = 0; y < clone.Height; y++)
{
Span<Rgb24> pixelSpan = clone.GetPixelRowSpan(y);
for (int x = 0; x < clone.Width; x++)
{
input[0, 0, y, x] = pixelSpan[x].B / 255f;
input[0, 1, y, x] = pixelSpan[x].G / 255f;
input[0, 2, y, x] = pixelSpan[x].R / 255f;
}
}
//Get the Image Shape
var image_shape = new DenseTensor<float>(new[] { 1, 2 });
image_shape[0, 0] = ih;
image_shape[0, 1] = iw;
// Setup inputs and outputs
var container = new List<NamedOnnxValue>();
container.Add(NamedOnnxValue.CreateFromTensor("input_1", input));
container.Add(NamedOnnxValue.CreateFromTensor("image_shape", image_shape));
// Session Options
SessionOptions options = new SessionOptions();
options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
options.AppendExecutionProvider_OpenVINO(@"MYRIAD_FP16");
options.AppendExecutionProvider_CPU(1);
// Run inference
using var session = new InferenceSession(modelFilePath,options);
using IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results = session.Run(container);
Console.WriteLine("Inference done");
//Post Processing Steps
var resultsArray = results.ToArray();
Tensor<float> boxes = resultsArray[0].AsTensor<float>();
Tensor<float> scores = resultsArray[1].AsTensor<float>();
int[] indices = resultsArray[2].AsTensor<int>().ToArray();
var len = indices.Length / 3;
var out_classes = new int[len];
float[] out_scores = new float[len];
var predictions = new List<Prediction>();
var count = 0;
for (int i = 0; i < indices.Length; i = i + 3)
{
out_classes[count] = indices[i + 1];
out_scores[count] = scores[indices[i], indices[i + 1], indices[i + 2]];
predictions.Add(new Prediction
{
Box = new Box(boxes[indices[i], indices[i + 2], 1],
boxes[indices[i], indices[i + 2], 0],
boxes[indices[i], indices[i + 2], 3],
boxes[indices[i], indices[i + 2], 2]),
Class = LabelMap.Labels[out_classes[count]],
Score = out_scores[count]
});
count++;
}
// Put boxes, labels and confidence on image and save for viewing
using var outputImage = File.OpenWrite(outImageFilePath);
Font font = SystemFonts.CreateFont("Arial", 16);
foreach (var p in predictions)
{
imageOrg.Mutate(x =>
{
x.DrawLines(Color.Red, 2f, new PointF[] {
new PointF(p.Box.Xmin, p.Box.Ymin),
new PointF(p.Box.Xmax, p.Box.Ymin),
new PointF(p.Box.Xmax, p.Box.Ymin),
new PointF(p.Box.Xmax, p.Box.Ymax),
new PointF(p.Box.Xmax, p.Box.Ymax),
new PointF(p.Box.Xmin, p.Box.Ymax),
new PointF(p.Box.Xmin, p.Box.Ymax),
new PointF(p.Box.Xmin, p.Box.Ymin)
});
x.DrawText($"{p.Class}, {p.Score:0.00}", font, Color.White, new PointF(p.Box.Xmin, p.Box.Ymin));
});
}
imageOrg.Save(outputImage, format);
}
}
}

View file

@ -0,0 +1,195 @@
'''
Copyright (C) 2021, Intel Corporation
SPDX-License-Identifier: Apache-2.0
'''
import numpy as np
import onnxruntime as rt
import cv2
import time
import os
def sigmoid(x, derivative=False):
return x*(1-x) if derivative else 1/(1+np.exp(-x))
def softmax(x):
scoreMatExp = np.exp(np.asarray(x))
return scoreMatExp / scoreMatExp.sum(0)
def checkModelExtension(fp):
# Split the extension from the path and normalise it to lowercase.
ext = os.path.splitext(fp)[-1].lower()
# Now we can simply use != to check for inequality, no need for wildcards.
if(ext != ".onnx"):
raise Exception(fp, "is an unknown file format. Use the model ending with .onnx format")
if not os.path.exists(fp):
raise Exception("[ ERROR ] Path of the onnx model file is Invalid")
def checkVideoFileExtension(fp):
# Split the extension from the path and normalise it to lowercase.
ext = os.path.splitext(fp)[-1].lower()
# Now we can simply use != to check for inequality, no need for wildcards.
if(ext == ".mp4" or ext == ".avi" or ext == ".mov"):
pass
else:
raise Exception(fp, "is an unknown file format. Use the video file ending with .mp4 or .avi or .mov formats")
if not os.path.exists(fp):
raise Exception("[ ERROR ] Path of the video file is Invalid")
# color look up table for different classes for object detection sample
clut = [(0,0,0),(255,0,0),(255,0,255),(0,0,255),(0,255,0),(0,255,128),
(128,255,0),(128,128,0),(0,128,255),(128,0,128),
(255,0,128),(128,0,255),(255,128,128),(128,255,128),(255,255,0),
(255,128,128),(128,128,255),(255,128,128),(128,255,128),(128,255,128)]
# 20 labels that the tiny-yolov2 model can do the object_detection on
label = ["aeroplane","bicycle","bird","boat","bottle",
"bus","car","cat","chair","cow","diningtable",
"dog","horse","motorbike","person","pottedplant",
"sheep","sofa","train","tvmonitor"]
model_file_path = "tiny_yolo_v2_zoo_model.onnx"
# TODO: You need to modify the path to the input onnx model based on where it is located on your device after downloading it from ONNX Model zoo.
# Validate model file path
checkModelExtension(model_file_path)
# Load the model
sess = rt.InferenceSession(model_file_path)
# Get the input name of the model
input_name = sess.get_inputs()[0].name
device = 'CPU_FP32'
# Set OpenVINO as the Execution provider to infer this model
sess.set_providers(['OpenVINOExecutionProvider'], [{'device_type' : device}])
'''
other 'device_type' options are: (Any hardware target can be assigned if you have the access to it)
'CPU_FP32', 'GPU_FP32', 'GPU_FP16', 'MYRIAD_FP16', 'VAD-M_FP16', 'VAD-F_FP32',
'HETERO:MYRIAD,CPU', 'MULTI:MYRIAD,GPU,CPU'
'''
#Path to video file has to be provided
video_file_path = "sample_demo_video.mp4"
# TODO: You need to specify the path to your own sample video based on where it is located on your device.
#validate video file input path
checkVideoFileExtension(video_file_path)
#Path to video file has to be provided
cap = cv2.VideoCapture(video_file_path)
# capturing different metrics of the image from the video
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
x_scale = float(width)/416.0 #In the document of tino-yolo-v2, input shape of this network is (1,3,416,416).
y_scale = float(height)/416.0
# writing the inferencing output as a video to the local disk
fourcc = cv2.VideoWriter_fourcc(*'XVID')
output_video_name = device + "_output.avi"
output_video = cv2.VideoWriter(output_video_name,fourcc, float(17.0), (640,360))
# capturing one frame at a time from the video feed and performing the inference
i = 0
while cap.isOpened():
l_start = time.time()
ret, frame = cap.read()
if not ret:
break
initial_w = cap.get(3)
initial_h = cap.get(4)
# preprocessing the input frame and reshaping it.
#In the document of tino-yolo-v2, input shape of this network is (1,3,416,416). so we resize the model frame w.r.t that size.
in_frame = cv2.resize(frame, (416, 416))
X = np.asarray(in_frame)
X = X.astype(np.float32)
X = X.transpose(2,0,1)
# Reshaping the input array to align with the input shape of the model
X = X.reshape(1,3,416,416)
start = time.time()
#Running the session by passing in the input data of the model
out = sess.run(None, {input_name: X})
end = time.time()
inference_time = end - start
out = out[0][0]
numClasses = 20
anchors = [1.08, 1.19, 3.42, 4.41, 6.63, 11.38, 9.42, 5.11, 16.62, 10.52]
existingLabels = {l: [] for l in label}
#Inside this loop we compute the bounding box b for grid cell (cy, cx)
for cy in range(0,13):
for cx in range(0,13):
for b in range(0,5):
# First we read the tx, ty, width(tw), and height(th) for the bounding box from the out array, as well as the confidence score
channel = b*(numClasses+5)
tx = out[channel ][cy][cx]
ty = out[channel+1][cy][cx]
tw = out[channel+2][cy][cx]
th = out[channel+3][cy][cx]
tc = out[channel+4][cy][cx]
x = (float(cx) + sigmoid(tx))*32
y = (float(cy) + sigmoid(ty))*32
w = np.exp(tw) * 32 * anchors[2*b ]
h = np.exp(th) * 32 * anchors[2*b+1]
#calculating the confidence score
confidence = sigmoid(tc) # The confidence value for the bounding box is given by tc
classes = np.zeros(numClasses)
for c in range(0,numClasses):
classes[c] = out[channel + 5 +c][cy][cx]
# we take the softmax to turn the array into a probability distribution. And then we pick the class with the largest score as the winner.
classes = softmax(classes)
detectedClass = classes.argmax()
# Now we can compute the final score for this bounding box and we only want to keep the ones whose combined score is over a certain threshold
if 0.45< classes[detectedClass]*confidence:
color =clut[detectedClass]
x = (x - w/2)*x_scale
y = (y - h/2)*y_scale
w *= x_scale
h *= y_scale
labelX = int((x+x+w)/2)
labelY = int((y+y+h)/2)
addLabel = True
labThreshold = 40
for point in existingLabels[label[detectedClass]]:
if labelX < point[0] + labThreshold and labelX > point[0] - labThreshold and \
labelY < point[1] + labThreshold and labelY > point[1] - labThreshold:
addLabel = False
#Adding class labels to the output of the frame and also drawing a rectangular bounding box around the object detected.
if addLabel:
cv2.rectangle(frame, (int(x),int(y)),(int(x+w),int(y+h)),color,2)
cv2.rectangle(frame, (int(x),int(y-13)),(int(x)+9*len(label[detectedClass]),int(y)),color,-1)
cv2.putText(frame,label[detectedClass],(int(x)+2,int(y)-3),cv2.FONT_HERSHEY_COMPLEX,0.4,(255,255,255),1)
existingLabels[label[detectedClass]].append((labelX,labelY))
print('{} detected in frame {}'.format(label[detectedClass],i))
output_video.write(frame)
cv2.putText(frame,device,(10,20),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
cv2.putText(frame,'FPS: {}'.format(1.0/inference_time),(10,40),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
cv2.imshow('frame',frame)
#Press 'q' to quit the process
if cv2.waitKey(1) & 0xFF == ord('q'):
break
print('Processed Frame {}'.format(i))
i += 1
l_end = time.time()
print('Loop Time = {}'.format(l_end - l_start))
output_video.release()
cv2.destroyAllWindows()

View file

@ -25,7 +25,7 @@ def parse_arguments():
parser.add_argument("--is_release_build", required=False, default=None, type=str,
help="Flag indicating if the build is a release build. Accepted values: true/false.")
parser.add_argument("--execution_provider", required=False, default='None', type=str,
choices=['dnnl', 'openvino', 'tensorrt', 'None'],
choices=['cuda', 'dnnl', 'openvino', 'tensorrt', 'None'],
help="The selected execution provider for this build.")
return parser.parse_args()
@ -359,7 +359,7 @@ def generate_files(list, args):
nuget_dependencies['openvino_ep_shared_lib']) +
runtimes_target + args.target_architecture + '\\native" />')
if args.execution_provider == "cuda":
if args.execution_provider == "cuda" or is_cuda_gpu_package:
files_list.append('<file src=' + '"' + os.path.join(args.native_build_path,
nuget_dependencies['providers_shared_lib']) +
runtimes_target + args.target_architecture + '\\native" />')