mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-03 03:58:54 +00:00
[OpenVINO-EP] Adding OpenVINO-EP samples to Msft Repo (#7826)
* Added ONNX_OV_EP samples ->Added cpp, python and csharp samples using OpenVINO Execution Provider. Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>
This commit is contained in:
parent
ab4b5055c7
commit
e41e042de6
6 changed files with 1874 additions and 0 deletions
|
|
@ -0,0 +1,384 @@
|
|||
/*
|
||||
Copyright (C) 2021, Intel Corporation
|
||||
SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
Portions of this software are copyright of their respective authors and released under the MIT license:
|
||||
- ONNX-Runtime-Inference, Copyright 2020 Lei Mao. For licensing see https://github.com/leimao/ONNX-Runtime-Inference/blob/main/LICENSE.md
|
||||
*/
|
||||
|
||||
#include <onnxruntime_cxx_api.h>
|
||||
#include <opencv2/dnn/dnn.hpp>
|
||||
#include <opencv2/imgcodecs.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <exception>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <stdexcept> // To use runtime_error
|
||||
|
||||
template <typename T>
|
||||
T vectorProduct(const std::vector<T>& v)
|
||||
{
|
||||
return accumulate(v.begin(), v.end(), 1, std::multiplies<T>());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Operator overloading for printing vectors
|
||||
* @tparam T
|
||||
* @param os
|
||||
* @param v
|
||||
* @return std::ostream&
|
||||
*/
|
||||
|
||||
template <typename T>
|
||||
std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
|
||||
{
|
||||
os << "[";
|
||||
for (int i = 0; i < v.size(); ++i)
|
||||
{
|
||||
os << v[i];
|
||||
if (i != v.size() - 1)
|
||||
{
|
||||
os << ", ";
|
||||
}
|
||||
}
|
||||
os << "]";
|
||||
return os;
|
||||
}
|
||||
|
||||
// Function to validate the input image file extension.
|
||||
bool imageFileExtension(std::string str)
|
||||
{
|
||||
// is empty throw error
|
||||
if (str.empty())
|
||||
throw std::runtime_error("[ ERROR ] The image File path is empty");
|
||||
|
||||
size_t pos = str.rfind('.');
|
||||
if (pos == std::string::npos)
|
||||
return false;
|
||||
|
||||
std::string ext = str.substr(pos+1);
|
||||
|
||||
if (ext == "jpg" || ext == "jpeg" || ext == "gif" || ext == "png" || ext == "jfif" ||
|
||||
ext == "JPG" || ext == "JPEG" || ext == "GIF" || ext == "PNG" || ext == "JFIF") {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Function to read the labels from the labelFilepath.
|
||||
std::vector<std::string> readLabels(std::string& labelFilepath)
|
||||
{
|
||||
std::vector<std::string> labels;
|
||||
std::string line;
|
||||
std::ifstream fp(labelFilepath);
|
||||
while (std::getline(fp, line))
|
||||
{
|
||||
labels.push_back(line);
|
||||
}
|
||||
return labels;
|
||||
}
|
||||
|
||||
// Function to validate the input model file extension.
|
||||
bool checkModelExtension(const std::string& filename)
|
||||
{
|
||||
if(filename.empty())
|
||||
{
|
||||
throw std::runtime_error("[ ERROR ] The Model file path is empty");
|
||||
}
|
||||
size_t pos = filename.rfind('.');
|
||||
if (pos == std::string::npos)
|
||||
return false;
|
||||
std::string ext = filename.substr(pos+1);
|
||||
if (ext == "onnx")
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Function to validate the Label file extension.
|
||||
bool checkLabelFileExtension(const std::string& filename)
|
||||
{
|
||||
size_t pos = filename.rfind('.');
|
||||
if (filename.empty())
|
||||
{
|
||||
throw std::runtime_error("[ ERROR ] The Label file path is empty");
|
||||
}
|
||||
if (pos == std::string::npos)
|
||||
return false;
|
||||
std::string ext = filename.substr(pos+1);
|
||||
if (ext == "txt") {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
//Handling divide by zero
|
||||
float division(float num, float den){
|
||||
if (den == 0) {
|
||||
throw std::runtime_error("[ ERROR ] Math error: Attempted to divide by Zero\n");
|
||||
}
|
||||
return (num / den);
|
||||
}
|
||||
|
||||
void printHelp() {
|
||||
std::cout << "To run the model, use the following command:\n";
|
||||
std::cout << "Example: ./run_squeezenet --use_openvino <path_to_the_model> <path_to_the_image> <path_to_the_classes_file>" << std::endl;
|
||||
std::cout << "\n To Run using OpenVINO EP.\nExample: ./run_squeezenet --use_openvino squeezenet1.1-7.onnx demo.jpeg synset.txt \n" << std::endl;
|
||||
std::cout << "\n To Run on Default CPU.\n Example: ./run_squeezenet --use_cpu squeezenet1.1-7.onnx demo.jpeg synset.txt \n" << std::endl;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
bool useOPENVINO{true};
|
||||
const char* useOPENVINOFlag = "--use_openvino";
|
||||
const char* useCPUFlag = "--use_cpu";
|
||||
|
||||
if(argc == 2) {
|
||||
std::string option = argv[1];
|
||||
if (option == "--help" || option == "-help" || option == "--h" || option == "-h") {
|
||||
printHelp();
|
||||
}
|
||||
return 0;
|
||||
} else if(argc != 5) {
|
||||
std::cout << "[ ERROR ] you have used the wrong command to run your program." << std::endl;
|
||||
printHelp();
|
||||
return 0;
|
||||
} else if (strcmp(argv[1], useOPENVINOFlag) == 0) {
|
||||
useOPENVINO = true;
|
||||
} else if (strcmp(argv[1], useCPUFlag) == 0) {
|
||||
useOPENVINO = false;
|
||||
}
|
||||
|
||||
if (useOPENVINO)
|
||||
{
|
||||
std::cout << "Inference Execution Provider: OPENVINO" << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Inference Execution Provider: CPU" << std::endl;
|
||||
}
|
||||
|
||||
std::string instanceName{"image-classification-inference"};
|
||||
|
||||
std::string modelFilepath = argv[2]; // .onnx file
|
||||
|
||||
//validate ModelFilePath
|
||||
checkModelExtension(modelFilepath);
|
||||
if(!checkModelExtension(modelFilepath)) {
|
||||
throw std::runtime_error("[ ERROR ] The ModelFilepath is not correct. Make sure you are setting the path to an onnx model file (.onnx)");
|
||||
}
|
||||
std::string imageFilepath = argv[3];
|
||||
|
||||
// Validate ImageFilePath
|
||||
imageFileExtension(imageFilepath);
|
||||
if(!imageFileExtension(imageFilepath)) {
|
||||
throw std::runtime_error("[ ERROR ] The imageFilepath doesn't have correct image extension. Choose from jpeg, jpg, gif, png, PNG, jfif");
|
||||
}
|
||||
std::ifstream f(imageFilepath.c_str());
|
||||
if(!f.good()) {
|
||||
throw std::runtime_error("[ ERROR ] The imageFilepath is not set correctly or doesn't exist");
|
||||
}
|
||||
|
||||
// Validate LabelFilePath
|
||||
std::string labelFilepath = argv[4];
|
||||
if(!checkLabelFileExtension(labelFilepath)) {
|
||||
throw std::runtime_error("[ ERROR ] The LabelFilepath is not set correctly and the labels file should end with extension .txt");
|
||||
}
|
||||
|
||||
std::vector<std::string> labels{readLabels(labelFilepath)};
|
||||
|
||||
Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,
|
||||
instanceName.c_str());
|
||||
Ort::SessionOptions sessionOptions;
|
||||
sessionOptions.SetIntraOpNumThreads(1);
|
||||
|
||||
//Appending OpenVINO Execution Provider API
|
||||
if (useOPENVINO) {
|
||||
// Using OPENVINO backend
|
||||
OrtOpenVINOProviderOptions options;
|
||||
options.device_type = "CPU_FP32"; //Other options are: GPU_FP32, GPU_FP16, MYRIAD_FP16
|
||||
std::cout << "OpenVINO device type is set to: " << options.device_type << std::endl;
|
||||
sessionOptions.AppendExecutionProvider_OpenVINO(options);
|
||||
}
|
||||
|
||||
// Sets graph optimization level
|
||||
// Available levels are
|
||||
// ORT_DISABLE_ALL -> To disable all optimizations
|
||||
// ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node
|
||||
// removals) ORT_ENABLE_EXTENDED -> To enable extended optimizations
|
||||
// (Includes level 1 + more complex optimizations like node fusions)
|
||||
// ORT_ENABLE_ALL -> To Enable All possible optimizations
|
||||
sessionOptions.SetGraphOptimizationLevel(
|
||||
GraphOptimizationLevel::ORT_DISABLE_ALL);
|
||||
|
||||
//Creation: The Ort::Session is created here
|
||||
Ort::Session session(env, modelFilepath.c_str(), sessionOptions);
|
||||
|
||||
Ort::AllocatorWithDefaultOptions allocator;
|
||||
|
||||
size_t numInputNodes = session.GetInputCount();
|
||||
size_t numOutputNodes = session.GetOutputCount();
|
||||
|
||||
std::cout << "Number of Input Nodes: " << numInputNodes << std::endl;
|
||||
std::cout << "Number of Output Nodes: " << numOutputNodes << std::endl;
|
||||
|
||||
const char* inputName = session.GetInputName(0, allocator);
|
||||
std::cout << "Input Name: " << inputName << std::endl;
|
||||
|
||||
Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
|
||||
auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();
|
||||
|
||||
ONNXTensorElementDataType inputType = inputTensorInfo.GetElementType();
|
||||
std::cout << "Input Type: " << inputType << std::endl;
|
||||
|
||||
std::vector<int64_t> inputDims = inputTensorInfo.GetShape();
|
||||
std::cout << "Input Dimensions: " << inputDims << std::endl;
|
||||
|
||||
const char* outputName = session.GetOutputName(0, allocator);
|
||||
std::cout << "Output Name: " << outputName << std::endl;
|
||||
|
||||
Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(0);
|
||||
auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();
|
||||
|
||||
ONNXTensorElementDataType outputType = outputTensorInfo.GetElementType();
|
||||
std::cout << "Output Type: " << outputType << std::endl;
|
||||
|
||||
std::vector<int64_t> outputDims = outputTensorInfo.GetShape();
|
||||
std::cout << "Output Dimensions: " << outputDims << std::endl;
|
||||
//pre-processing the Image
|
||||
// step 1: Read an image in HWC BGR UINT8 format.
|
||||
cv::Mat imageBGR = cv::imread(imageFilepath, cv::ImreadModes::IMREAD_COLOR);
|
||||
|
||||
// step 2: Resize the image.
|
||||
cv::Mat resizedImageBGR, resizedImageRGB, resizedImage, preprocessedImage;
|
||||
cv::resize(imageBGR, resizedImageBGR,
|
||||
cv::Size(inputDims.at(2), inputDims.at(3)),
|
||||
cv::InterpolationFlags::INTER_CUBIC);
|
||||
|
||||
// step 3: Convert the image to HWC RGB UINT8 format.
|
||||
cv::cvtColor(resizedImageBGR, resizedImageRGB,
|
||||
cv::ColorConversionCodes::COLOR_BGR2RGB);
|
||||
// step 4: Convert the image to HWC RGB float format by dividing each pixel by 255.
|
||||
resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);
|
||||
|
||||
// step 5: Split the RGB channels from the image.
|
||||
cv::Mat channels[3];
|
||||
cv::split(resizedImage, channels);
|
||||
|
||||
//step 6: Normalize each channel.
|
||||
// Normalization per channel
|
||||
// Normalization parameters obtained from
|
||||
// https://github.com/onnx/models/tree/master/vision/classification/squeezenet
|
||||
channels[0] = (channels[0] - 0.485) / 0.229;
|
||||
channels[1] = (channels[1] - 0.456) / 0.224;
|
||||
channels[2] = (channels[2] - 0.406) / 0.225;
|
||||
|
||||
//step 7: Merge the RGB channels back to the image.
|
||||
cv::merge(channels, 3, resizedImage);
|
||||
|
||||
// step 8: Convert the image to CHW RGB float format.
|
||||
// HWC to CHW
|
||||
cv::dnn::blobFromImage(resizedImage, preprocessedImage);
|
||||
|
||||
|
||||
//Run Inference
|
||||
|
||||
/* To run inference using ONNX Runtime, the user is responsible for creating and managing the
|
||||
input and output buffers. These buffers could be created and managed via std::vector.
|
||||
The linear-format input data should be copied to the buffer for ONNX Runtime inference. */
|
||||
|
||||
size_t inputTensorSize = vectorProduct(inputDims);
|
||||
std::vector<float> inputTensorValues(inputTensorSize);
|
||||
inputTensorValues.assign(preprocessedImage.begin<float>(),
|
||||
preprocessedImage.end<float>());
|
||||
|
||||
size_t outputTensorSize = vectorProduct(outputDims);
|
||||
assert(("Output tensor size should equal to the label set size.",
|
||||
labels.size() == outputTensorSize));
|
||||
std::vector<float> outputTensorValues(outputTensorSize);
|
||||
|
||||
|
||||
/* Once the buffers were created, they would be used for creating instances of Ort::Value
|
||||
which is the tensor format for ONNX Runtime. There could be multiple inputs for a neural network,
|
||||
so we have to prepare an array of Ort::Value instances for inputs and outputs respectively even if
|
||||
we only have one input and one output. */
|
||||
|
||||
std::vector<const char*> inputNames{inputName};
|
||||
std::vector<const char*> outputNames{outputName};
|
||||
std::vector<Ort::Value> inputTensors;
|
||||
std::vector<Ort::Value> outputTensors;
|
||||
|
||||
/*
|
||||
Creating ONNX Runtime inference sessions, querying input and output names,
|
||||
dimensions, and types are trivial.
|
||||
Setup inputs & outputs: The input & output tensors are created here. */
|
||||
|
||||
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
|
||||
OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
|
||||
inputTensors.push_back(Ort::Value::CreateTensor<float>(
|
||||
memoryInfo, inputTensorValues.data(), inputTensorSize, inputDims.data(),
|
||||
inputDims.size()));
|
||||
outputTensors.push_back(Ort::Value::CreateTensor<float>(
|
||||
memoryInfo, outputTensorValues.data(), outputTensorSize,
|
||||
outputDims.data(), outputDims.size()));
|
||||
|
||||
/* To run inference, we provide the run options, an array of input names corresponding to the
|
||||
inputs in the input tensor, an array of input tensor, number of inputs, an array of output names
|
||||
corresponding to the the outputs in the output tensor, an array of output tensor, number of outputs. */
|
||||
|
||||
session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
|
||||
inputTensors.data(), 1, outputNames.data(),
|
||||
outputTensors.data(), 1);
|
||||
|
||||
int predId = 0;
|
||||
float activation = 0;
|
||||
float maxActivation = std::numeric_limits<float>::lowest();
|
||||
float expSum = 0;
|
||||
/* The inference result could be found in the buffer for the output tensors,
|
||||
which are usually the buffer from std::vector instances. */
|
||||
for (int i = 0; i < labels.size(); i++) {
|
||||
activation = outputTensorValues.at(i);
|
||||
expSum += std::exp(activation);
|
||||
if (activation > maxActivation)
|
||||
{
|
||||
predId = i;
|
||||
maxActivation = activation;
|
||||
}
|
||||
}
|
||||
std::cout << "Predicted Label ID: " << predId << std::endl;
|
||||
std::cout << "Predicted Label: " << labels.at(predId) << std::endl;
|
||||
float result;
|
||||
try {
|
||||
result = division(std::exp(maxActivation), expSum);
|
||||
std::cout << "Uncalibrated Confidence: " << result << std::endl;
|
||||
}
|
||||
catch (std::runtime_error& e) {
|
||||
std::cout << "Exception occurred" << std::endl << e.what();
|
||||
}
|
||||
|
||||
// Measure latency
|
||||
int numTests{100};
|
||||
std::chrono::steady_clock::time_point begin =
|
||||
std::chrono::steady_clock::now();
|
||||
|
||||
//Run: Running the session is done in the Run() method:
|
||||
for (int i = 0; i < numTests; i++) {
|
||||
session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
|
||||
inputTensors.data(), 1, outputNames.data(),
|
||||
outputTensors.data(), 1);
|
||||
}
|
||||
std::chrono::steady_clock::time_point end =
|
||||
std::chrono::steady_clock::now();
|
||||
std::cout << "Minimum Inference Latency: "
|
||||
<< std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() / static_cast<float>(numTests)
|
||||
<< " ms" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
1000
samples/c_cxx/OpenVINO_EP/squeezenet_classification/synset.txt
Normal file
1000
samples/c_cxx/OpenVINO_EP/squeezenet_classification/synset.txt
Normal file
File diff suppressed because it is too large
Load diff
91
samples/c_sharp/OpenVINO_EP/yolov3_object_detection/Label.cs
Normal file
91
samples/c_sharp/OpenVINO_EP/yolov3_object_detection/Label.cs
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
Copyright (C) 2021, Intel Corporation
|
||||
SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
namespace yolov3
|
||||
{
|
||||
public class LabelMap
|
||||
{
|
||||
public static readonly string[] Labels = new[] {"person",
|
||||
"bicycle",
|
||||
"car",
|
||||
"motorcycle",
|
||||
"airplane",
|
||||
"bus",
|
||||
"train",
|
||||
"truck",
|
||||
"boat",
|
||||
"traffic light",
|
||||
"fire hydrant",
|
||||
"stop sign",
|
||||
"parking meter",
|
||||
"bench",
|
||||
"bird",
|
||||
"cat",
|
||||
"dog",
|
||||
"horse",
|
||||
"sheep",
|
||||
"cow",
|
||||
"elephant",
|
||||
"bear",
|
||||
"zebra",
|
||||
"giraffe",
|
||||
"backpack",
|
||||
"umbrella",
|
||||
"handbag",
|
||||
"tie",
|
||||
"suitcase",
|
||||
"frisbee",
|
||||
"skis",
|
||||
"snowboard",
|
||||
"sports ball",
|
||||
"kite",
|
||||
"baseball bat",
|
||||
"baseball glove",
|
||||
"skateboard",
|
||||
"surfboard",
|
||||
"tennis racket",
|
||||
"bottle",
|
||||
"wine glass",
|
||||
"cup",
|
||||
"fork",
|
||||
"knife",
|
||||
"spoon",
|
||||
"bowl",
|
||||
"banana",
|
||||
"apple",
|
||||
"sandwich",
|
||||
"orange",
|
||||
"broccoli",
|
||||
"carrot",
|
||||
"hot dog",
|
||||
"pizza",
|
||||
"donut",
|
||||
"cake",
|
||||
"chair",
|
||||
"couch",
|
||||
"potted plant",
|
||||
"bed",
|
||||
"dining table",
|
||||
"toilet",
|
||||
"tv",
|
||||
"laptop",
|
||||
"mouse",
|
||||
"remote",
|
||||
"keyboard",
|
||||
"cell phone",
|
||||
"microwave",
|
||||
"oven",
|
||||
"toaster",
|
||||
"sink",
|
||||
"refrigerator",
|
||||
"book",
|
||||
"clock",
|
||||
"vase",
|
||||
"scissors",
|
||||
"teddy bear",
|
||||
"hair drier",
|
||||
"toothbrush"};
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
Copyright (C) 2021, Intel Corporation
|
||||
SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
namespace yolov3
|
||||
{
|
||||
public class Prediction
|
||||
{
|
||||
public Box Box { get; set; }
|
||||
public string Class { get; set; }
|
||||
public float Score { get; set; }
|
||||
}
|
||||
|
||||
public class Box
|
||||
{
|
||||
public float Xmin { get; set; }
|
||||
public float Ymin { get; set; }
|
||||
public float Xmax { get; set; }
|
||||
public float Ymax { get; set; }
|
||||
|
||||
public Box(float xmin, float ymin, float xmax, float ymax)
|
||||
{
|
||||
Xmin = xmin;
|
||||
Ymin = ymin;
|
||||
Xmax = xmax;
|
||||
Ymax = ymax;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
173
samples/c_sharp/OpenVINO_EP/yolov3_object_detection/Program.cs
Normal file
173
samples/c_sharp/OpenVINO_EP/yolov3_object_detection/Program.cs
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
/*
|
||||
Copyright (C) 2021, Intel Corporation
|
||||
SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||
using Microsoft.ML.OnnxRuntime;
|
||||
using SixLabors.ImageSharp;
|
||||
using SixLabors.ImageSharp.PixelFormats;
|
||||
using SixLabors.ImageSharp.Processing;
|
||||
using SixLabors.ImageSharp.Formats;
|
||||
using SixLabors.ImageSharp.Drawing.Processing;
|
||||
using SixLabors.Fonts;
|
||||
|
||||
namespace yolov3
|
||||
{
|
||||
class Program
|
||||
{
|
||||
static void Main(string[] args)
|
||||
{
|
||||
// string is null or empty
|
||||
if (args == null || args.Length < 3)
|
||||
{
|
||||
Console.WriteLine("Usage information: dotnet run model.onnx input.jpg output.jpg");
|
||||
return;
|
||||
} else
|
||||
{
|
||||
if(!(File.Exists(args[0])))
|
||||
{
|
||||
Console.WriteLine("Model Path does not exist");
|
||||
return;
|
||||
}
|
||||
if (!(File.Exists(args[1])))
|
||||
{
|
||||
Console.WriteLine("Input Path does not exist");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Read paths
|
||||
string modelFilePath = args[0];
|
||||
string imageFilePath = args[1];
|
||||
string outImageFilePath = args[2];
|
||||
|
||||
using Image imageOrg = Image.Load(imageFilePath, out IImageFormat format);
|
||||
|
||||
//Letterbox image
|
||||
var iw = imageOrg.Width;
|
||||
var ih = imageOrg.Height;
|
||||
var w = 416;
|
||||
var h = 416;
|
||||
|
||||
if ((iw == 0) || (ih == 0))
|
||||
{
|
||||
Console.WriteLine("Math error: Attempted to divide by Zero");
|
||||
return;
|
||||
}
|
||||
|
||||
float width = (float)w / iw;
|
||||
float height = (float)h / ih;
|
||||
|
||||
float scale = Math.Min(width, height);
|
||||
|
||||
var nw = (int)(iw * scale);
|
||||
var nh = (int)(ih * scale);
|
||||
|
||||
var pad_dims_w = (w - nw) / 2;
|
||||
var pad_dims_h = (h - nh) / 2;
|
||||
|
||||
// Resize image using default bicubic sampler
|
||||
var image = imageOrg.Clone(x => x.Resize((nw), (nh)));
|
||||
|
||||
var clone = new Image<Rgb24>(w, h);
|
||||
clone.Mutate(i => i.Fill(Color.Gray));
|
||||
clone.Mutate(o => o.DrawImage(image, new Point(pad_dims_w, pad_dims_h), 1f)); // draw the first one top left
|
||||
|
||||
//Preprocessing image
|
||||
Tensor<float> input = new DenseTensor<float>(new[] { 1, 3, h, w });
|
||||
for (int y = 0; y < clone.Height; y++)
|
||||
{
|
||||
Span<Rgb24> pixelSpan = clone.GetPixelRowSpan(y);
|
||||
for (int x = 0; x < clone.Width; x++)
|
||||
{
|
||||
input[0, 0, y, x] = pixelSpan[x].B / 255f;
|
||||
input[0, 1, y, x] = pixelSpan[x].G / 255f;
|
||||
input[0, 2, y, x] = pixelSpan[x].R / 255f;
|
||||
}
|
||||
}
|
||||
|
||||
//Get the Image Shape
|
||||
var image_shape = new DenseTensor<float>(new[] { 1, 2 });
|
||||
image_shape[0, 0] = ih;
|
||||
image_shape[0, 1] = iw;
|
||||
|
||||
// Setup inputs and outputs
|
||||
var container = new List<NamedOnnxValue>();
|
||||
container.Add(NamedOnnxValue.CreateFromTensor("input_1", input));
|
||||
container.Add(NamedOnnxValue.CreateFromTensor("image_shape", image_shape));
|
||||
|
||||
// Session Options
|
||||
SessionOptions options = new SessionOptions();
|
||||
options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
|
||||
options.AppendExecutionProvider_OpenVINO(@"MYRIAD_FP16");
|
||||
options.AppendExecutionProvider_CPU(1);
|
||||
|
||||
// Run inference
|
||||
using var session = new InferenceSession(modelFilePath,options);
|
||||
|
||||
using IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results = session.Run(container);
|
||||
|
||||
Console.WriteLine("Inference done");
|
||||
|
||||
//Post Processing Steps
|
||||
var resultsArray = results.ToArray();
|
||||
Tensor<float> boxes = resultsArray[0].AsTensor<float>();
|
||||
Tensor<float> scores = resultsArray[1].AsTensor<float>();
|
||||
int[] indices = resultsArray[2].AsTensor<int>().ToArray();
|
||||
|
||||
var len = indices.Length / 3;
|
||||
var out_classes = new int[len];
|
||||
float[] out_scores = new float[len];
|
||||
|
||||
var predictions = new List<Prediction>();
|
||||
var count = 0;
|
||||
for (int i = 0; i < indices.Length; i = i + 3)
|
||||
{
|
||||
out_classes[count] = indices[i + 1];
|
||||
out_scores[count] = scores[indices[i], indices[i + 1], indices[i + 2]];
|
||||
predictions.Add(new Prediction
|
||||
{
|
||||
Box = new Box(boxes[indices[i], indices[i + 2], 1],
|
||||
boxes[indices[i], indices[i + 2], 0],
|
||||
boxes[indices[i], indices[i + 2], 3],
|
||||
boxes[indices[i], indices[i + 2], 2]),
|
||||
Class = LabelMap.Labels[out_classes[count]],
|
||||
Score = out_scores[count]
|
||||
});
|
||||
count++;
|
||||
}
|
||||
|
||||
// Put boxes, labels and confidence on image and save for viewing
|
||||
using var outputImage = File.OpenWrite(outImageFilePath);
|
||||
Font font = SystemFonts.CreateFont("Arial", 16);
|
||||
foreach (var p in predictions)
|
||||
{
|
||||
imageOrg.Mutate(x =>
|
||||
{
|
||||
x.DrawLines(Color.Red, 2f, new PointF[] {
|
||||
|
||||
new PointF(p.Box.Xmin, p.Box.Ymin),
|
||||
new PointF(p.Box.Xmax, p.Box.Ymin),
|
||||
|
||||
new PointF(p.Box.Xmax, p.Box.Ymin),
|
||||
new PointF(p.Box.Xmax, p.Box.Ymax),
|
||||
|
||||
new PointF(p.Box.Xmax, p.Box.Ymax),
|
||||
new PointF(p.Box.Xmin, p.Box.Ymax),
|
||||
|
||||
new PointF(p.Box.Xmin, p.Box.Ymax),
|
||||
new PointF(p.Box.Xmin, p.Box.Ymin)
|
||||
});
|
||||
x.DrawText($"{p.Class}, {p.Score:0.00}", font, Color.White, new PointF(p.Box.Xmin, p.Box.Ymin));
|
||||
});
|
||||
}
|
||||
imageOrg.Save(outputImage, format);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,195 @@
|
|||
'''
|
||||
Copyright (C) 2021, Intel Corporation
|
||||
SPDX-License-Identifier: Apache-2.0
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
import onnxruntime as rt
|
||||
import cv2
|
||||
import time
|
||||
import os
|
||||
|
||||
def sigmoid(x, derivative=False):
|
||||
return x*(1-x) if derivative else 1/(1+np.exp(-x))
|
||||
|
||||
def softmax(x):
|
||||
scoreMatExp = np.exp(np.asarray(x))
|
||||
return scoreMatExp / scoreMatExp.sum(0)
|
||||
|
||||
def checkModelExtension(fp):
|
||||
# Split the extension from the path and normalise it to lowercase.
|
||||
ext = os.path.splitext(fp)[-1].lower()
|
||||
|
||||
# Now we can simply use != to check for inequality, no need for wildcards.
|
||||
if(ext != ".onnx"):
|
||||
raise Exception(fp, "is an unknown file format. Use the model ending with .onnx format")
|
||||
|
||||
if not os.path.exists(fp):
|
||||
raise Exception("[ ERROR ] Path of the onnx model file is Invalid")
|
||||
|
||||
def checkVideoFileExtension(fp):
|
||||
# Split the extension from the path and normalise it to lowercase.
|
||||
ext = os.path.splitext(fp)[-1].lower()
|
||||
# Now we can simply use != to check for inequality, no need for wildcards.
|
||||
|
||||
if(ext == ".mp4" or ext == ".avi" or ext == ".mov"):
|
||||
pass
|
||||
else:
|
||||
raise Exception(fp, "is an unknown file format. Use the video file ending with .mp4 or .avi or .mov formats")
|
||||
|
||||
if not os.path.exists(fp):
|
||||
raise Exception("[ ERROR ] Path of the video file is Invalid")
|
||||
|
||||
# color look up table for different classes for object detection sample
|
||||
clut = [(0,0,0),(255,0,0),(255,0,255),(0,0,255),(0,255,0),(0,255,128),
|
||||
(128,255,0),(128,128,0),(0,128,255),(128,0,128),
|
||||
(255,0,128),(128,0,255),(255,128,128),(128,255,128),(255,255,0),
|
||||
(255,128,128),(128,128,255),(255,128,128),(128,255,128),(128,255,128)]
|
||||
|
||||
# 20 labels that the tiny-yolov2 model can do the object_detection on
|
||||
label = ["aeroplane","bicycle","bird","boat","bottle",
|
||||
"bus","car","cat","chair","cow","diningtable",
|
||||
"dog","horse","motorbike","person","pottedplant",
|
||||
"sheep","sofa","train","tvmonitor"]
|
||||
|
||||
model_file_path = "tiny_yolo_v2_zoo_model.onnx"
|
||||
# TODO: You need to modify the path to the input onnx model based on where it is located on your device after downloading it from ONNX Model zoo.
|
||||
|
||||
# Validate model file path
|
||||
checkModelExtension(model_file_path)
|
||||
|
||||
# Load the model
|
||||
sess = rt.InferenceSession(model_file_path)
|
||||
|
||||
# Get the input name of the model
|
||||
input_name = sess.get_inputs()[0].name
|
||||
|
||||
device = 'CPU_FP32'
|
||||
# Set OpenVINO as the Execution provider to infer this model
|
||||
sess.set_providers(['OpenVINOExecutionProvider'], [{'device_type' : device}])
|
||||
'''
|
||||
other 'device_type' options are: (Any hardware target can be assigned if you have the access to it)
|
||||
|
||||
'CPU_FP32', 'GPU_FP32', 'GPU_FP16', 'MYRIAD_FP16', 'VAD-M_FP16', 'VAD-F_FP32',
|
||||
'HETERO:MYRIAD,CPU', 'MULTI:MYRIAD,GPU,CPU'
|
||||
|
||||
'''
|
||||
|
||||
#Path to video file has to be provided
|
||||
video_file_path = "sample_demo_video.mp4"
|
||||
# TODO: You need to specify the path to your own sample video based on where it is located on your device.
|
||||
|
||||
#validate video file input path
|
||||
checkVideoFileExtension(video_file_path)
|
||||
|
||||
#Path to video file has to be provided
|
||||
cap = cv2.VideoCapture(video_file_path)
|
||||
|
||||
# capturing different metrics of the image from the video
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
x_scale = float(width)/416.0 #In the document of tino-yolo-v2, input shape of this network is (1,3,416,416).
|
||||
y_scale = float(height)/416.0
|
||||
|
||||
# writing the inferencing output as a video to the local disk
|
||||
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
||||
output_video_name = device + "_output.avi"
|
||||
output_video = cv2.VideoWriter(output_video_name,fourcc, float(17.0), (640,360))
|
||||
|
||||
# capturing one frame at a time from the video feed and performing the inference
|
||||
i = 0
|
||||
while cap.isOpened():
|
||||
l_start = time.time()
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
initial_w = cap.get(3)
|
||||
initial_h = cap.get(4)
|
||||
|
||||
# preprocessing the input frame and reshaping it.
|
||||
#In the document of tino-yolo-v2, input shape of this network is (1,3,416,416). so we resize the model frame w.r.t that size.
|
||||
in_frame = cv2.resize(frame, (416, 416))
|
||||
X = np.asarray(in_frame)
|
||||
X = X.astype(np.float32)
|
||||
X = X.transpose(2,0,1)
|
||||
# Reshaping the input array to align with the input shape of the model
|
||||
X = X.reshape(1,3,416,416)
|
||||
|
||||
start = time.time()
|
||||
#Running the session by passing in the input data of the model
|
||||
out = sess.run(None, {input_name: X})
|
||||
end = time.time()
|
||||
inference_time = end - start
|
||||
out = out[0][0]
|
||||
|
||||
numClasses = 20
|
||||
anchors = [1.08, 1.19, 3.42, 4.41, 6.63, 11.38, 9.42, 5.11, 16.62, 10.52]
|
||||
|
||||
existingLabels = {l: [] for l in label}
|
||||
|
||||
#Inside this loop we compute the bounding box b for grid cell (cy, cx)
|
||||
for cy in range(0,13):
|
||||
for cx in range(0,13):
|
||||
for b in range(0,5):
|
||||
# First we read the tx, ty, width(tw), and height(th) for the bounding box from the out array, as well as the confidence score
|
||||
channel = b*(numClasses+5)
|
||||
tx = out[channel ][cy][cx]
|
||||
ty = out[channel+1][cy][cx]
|
||||
tw = out[channel+2][cy][cx]
|
||||
th = out[channel+3][cy][cx]
|
||||
tc = out[channel+4][cy][cx]
|
||||
|
||||
x = (float(cx) + sigmoid(tx))*32
|
||||
y = (float(cy) + sigmoid(ty))*32
|
||||
|
||||
w = np.exp(tw) * 32 * anchors[2*b ]
|
||||
h = np.exp(th) * 32 * anchors[2*b+1]
|
||||
|
||||
#calculating the confidence score
|
||||
confidence = sigmoid(tc) # The confidence value for the bounding box is given by tc
|
||||
|
||||
classes = np.zeros(numClasses)
|
||||
for c in range(0,numClasses):
|
||||
classes[c] = out[channel + 5 +c][cy][cx]
|
||||
# we take the softmax to turn the array into a probability distribution. And then we pick the class with the largest score as the winner.
|
||||
classes = softmax(classes)
|
||||
detectedClass = classes.argmax()
|
||||
|
||||
# Now we can compute the final score for this bounding box and we only want to keep the ones whose combined score is over a certain threshold
|
||||
if 0.45< classes[detectedClass]*confidence:
|
||||
color =clut[detectedClass]
|
||||
x = (x - w/2)*x_scale
|
||||
y = (y - h/2)*y_scale
|
||||
w *= x_scale
|
||||
h *= y_scale
|
||||
|
||||
labelX = int((x+x+w)/2)
|
||||
labelY = int((y+y+h)/2)
|
||||
addLabel = True
|
||||
labThreshold = 40
|
||||
for point in existingLabels[label[detectedClass]]:
|
||||
if labelX < point[0] + labThreshold and labelX > point[0] - labThreshold and \
|
||||
labelY < point[1] + labThreshold and labelY > point[1] - labThreshold:
|
||||
addLabel = False
|
||||
#Adding class labels to the output of the frame and also drawing a rectangular bounding box around the object detected.
|
||||
if addLabel:
|
||||
cv2.rectangle(frame, (int(x),int(y)),(int(x+w),int(y+h)),color,2)
|
||||
cv2.rectangle(frame, (int(x),int(y-13)),(int(x)+9*len(label[detectedClass]),int(y)),color,-1)
|
||||
cv2.putText(frame,label[detectedClass],(int(x)+2,int(y)-3),cv2.FONT_HERSHEY_COMPLEX,0.4,(255,255,255),1)
|
||||
existingLabels[label[detectedClass]].append((labelX,labelY))
|
||||
print('{} detected in frame {}'.format(label[detectedClass],i))
|
||||
output_video.write(frame)
|
||||
cv2.putText(frame,device,(10,20),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
|
||||
cv2.putText(frame,'FPS: {}'.format(1.0/inference_time),(10,40),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
|
||||
cv2.imshow('frame',frame)
|
||||
|
||||
#Press 'q' to quit the process
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
print('Processed Frame {}'.format(i))
|
||||
i += 1
|
||||
l_end = time.time()
|
||||
print('Loop Time = {}'.format(l_end - l_start))
|
||||
output_video.release()
|
||||
cv2.destroyAllWindows()
|
||||
Loading…
Reference in a new issue